@ls-stack/agent-eval 0.27.1 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -531,10 +531,13 @@ function recordOpIfActive(scope, op) {
531
531
  if (top) top.ops.push(op);
532
532
  }
533
533
  function toAssertionFailure$1(message, error = void 0) {
534
- return error?.stack ? {
534
+ const name = error?.name;
535
+ const stack = error?.stack ? stripTerminalControlCodes$1(error.stack) : void 0;
536
+ return {
537
+ ...name !== void 0 ? { name } : {},
535
538
  message,
536
- stack: stripTerminalControlCodes$1(error.stack)
537
- } : { message };
539
+ ...stack !== void 0 ? { stack } : {}
540
+ };
538
541
  }
539
542
  /**
540
543
  * Record or replace an output value for the current case scope.
@@ -802,7 +805,8 @@ function repoFile(path, mimeType) {
802
805
  }
803
806
  //#endregion
804
807
  //#region ../sdk/src/cacheSerialization.ts
805
- const serializedCacheValueMarker = "__agentEvalsCacheSerialization";
808
+ const serializedCacheValueMarker = "__aecs";
809
+ const legacySerializedCacheValueMarker = "__agentEvalsCacheSerialization";
806
810
  const jsonSafeCacheValueVersion = "json-safe-v1";
807
811
  const packedNumberArrayMinLength = 128;
808
812
  const compressedStringMinBytes = 16 * 1024;
@@ -812,7 +816,7 @@ function isRecordLike$3(value) {
812
816
  return typeof value === "object" && value !== null && !Array.isArray(value);
813
817
  }
814
818
  function isJsonSafeSerializedCacheValue(value) {
815
- return isRecordLike$3(value) && value[serializedCacheValueMarker] === jsonSafeCacheValueVersion && typeof value.type === "string";
819
+ return isRecordLike$3(value) && serializationMarkerValue(value) === jsonSafeCacheValueVersion && typeof value.type === "string";
816
820
  }
817
821
  function jsonSafeValue(type, value) {
818
822
  return value === void 0 ? {
@@ -825,32 +829,39 @@ function jsonSafeValue(type, value) {
825
829
  };
826
830
  }
827
831
  function hasSerializationMarkerKey(value) {
828
- return Object.hasOwn(value, serializedCacheValueMarker);
832
+ return Object.hasOwn(value, serializedCacheValueMarker) || Object.hasOwn(value, legacySerializedCacheValueMarker);
833
+ }
834
+ function serializationMarkerValue(value) {
835
+ return value[serializedCacheValueMarker] ?? value[legacySerializedCacheValueMarker];
829
836
  }
830
837
  /**
831
838
  * Serialize one cached value while keeping plain JSON as plain JSON.
832
839
  *
833
- * Rich runtime values use small tagged wrappers.
840
+ * Rich runtime values use small tagged wrappers. Undefined values are omitted
841
+ * by default; pass `preserveUndefined: true` to round-trip them explicitly.
834
842
  */
835
- async function serializeCacheValue(value) {
836
- return serializeJsonSafeValue(value, /* @__PURE__ */ new WeakSet(), 0);
843
+ async function serializeCacheValue(value, options = void 0) {
844
+ return serializeJsonSafeValue(value, /* @__PURE__ */ new WeakSet(), 0, normalizeCacheSerializationOptions(options));
837
845
  }
838
846
  /** Revive one cached value, while preserving legacy JSON-round-tripped data. */
839
847
  function deserializeCacheValue(value) {
840
848
  return deserializeJsonSafeValue(value);
841
849
  }
842
850
  /** Clone one value through the same serialization path used for cache data. */
843
- async function cloneCacheValue(value) {
844
- return deserializeCacheValue(await serializeCacheValue(value));
851
+ async function cloneCacheValue(value, options = void 0) {
852
+ return deserializeCacheValue(await serializeCacheValue(value, options));
853
+ }
854
+ function normalizeCacheSerializationOptions(options) {
855
+ return { preserveUndefined: options?.preserveUndefined === true };
845
856
  }
846
- async function serializeJsonSafeValue(value, refs, depth) {
847
- if (value === void 0) return jsonSafeValue("Undefined");
857
+ async function serializeJsonSafeValue(value, refs, depth, config) {
858
+ if (value === void 0) return config.preserveUndefined ? jsonSafeValue("Undefined") : void 0;
848
859
  if (typeof value === "bigint") return jsonSafeValue("BigInt", value.toString());
849
860
  if (typeof value === "number") return serializeNumber(value);
850
861
  if (typeof value === "string") return serializeString(value, depth);
851
862
  if (value instanceof Date) return jsonSafeValue("Date", value.toISOString());
852
- if (value instanceof Map) return serializeMap(value, refs, depth);
853
- if (value instanceof Set) return serializeSet(value, refs, depth);
863
+ if (value instanceof Map) return serializeMap(value, refs, depth, config);
864
+ if (value instanceof Set) return serializeSet(value, refs, depth, config);
854
865
  if (value instanceof RegExp) return jsonSafeValue("RegExp", {
855
866
  flags: value.flags,
856
867
  source: value.source
@@ -869,7 +880,7 @@ async function serializeJsonSafeValue(value, refs, depth) {
869
880
  type: value.type
870
881
  });
871
882
  if (value instanceof ArrayBuffer) return jsonSafeValue("ArrayBuffer", bytesToBase64(new Uint8Array(value)));
872
- if (value instanceof Error) return serializeError(value, refs, depth);
883
+ if (value instanceof Error) return serializeError(value, refs, depth, config);
873
884
  if (!value || typeof value !== "object") return value;
874
885
  if (refs.has(value)) throw new Error("Circular cache values are not supported");
875
886
  refs.add(value);
@@ -882,12 +893,18 @@ async function serializeJsonSafeValue(value, refs, depth) {
882
893
  }
883
894
  }
884
895
  const items = [];
885
- for (const item of value) items.push(await serializeJsonSafeValue(item, refs, depth + 1));
896
+ for (const item of value) {
897
+ const serializedItem = await serializeJsonSafeValue(item, refs, depth + 1, config);
898
+ if (serializedItem !== void 0) items.push(serializedItem);
899
+ }
886
900
  refs.delete(value);
887
901
  return compressNestedJsonValue(items, depth) ?? items;
888
902
  }
889
903
  const entries = [];
890
- for (const [key, entryValue] of Object.entries(value)) entries.push([key, await serializeJsonSafeValue(entryValue, refs, depth + 1)]);
904
+ for (const [key, entryValue] of Object.entries(value)) {
905
+ const serializedEntryValue = await serializeJsonSafeValue(entryValue, refs, depth + 1, config);
906
+ if (serializedEntryValue !== void 0) entries.push([key, serializedEntryValue]);
907
+ }
891
908
  refs.delete(value);
892
909
  const serialized = hasSerializationMarkerKey(value) ? jsonSafeValue("Object", entries) : Object.fromEntries(entries);
893
910
  return compressNestedJsonValue(serialized, depth) ?? serialized;
@@ -957,32 +974,40 @@ function compressNestedJsonValue(value, depth) {
957
974
  function compressionIsWorthIt(value, rawSize) {
958
975
  return Buffer$1.byteLength(JSON.stringify(value)) < rawSize * maxCompressedSizeRatio;
959
976
  }
960
- async function serializeMap(value, refs, depth) {
977
+ async function serializeMap(value, refs, depth, config) {
961
978
  if (refs.has(value)) throw new Error("Circular cache values are not supported");
962
979
  refs.add(value);
963
980
  const entries = [];
964
- for (const [key, entryValue] of value.entries()) entries.push([await serializeJsonSafeValue(key, refs, depth + 1), await serializeJsonSafeValue(entryValue, refs, depth + 1)]);
981
+ for (const [key, entryValue] of value.entries()) {
982
+ const serializedKey = await serializeJsonSafeValue(key, refs, depth + 1, config);
983
+ const serializedEntryValue = await serializeJsonSafeValue(entryValue, refs, depth + 1, config);
984
+ if (serializedKey !== void 0 && serializedEntryValue !== void 0) entries.push([serializedKey, serializedEntryValue]);
985
+ }
965
986
  refs.delete(value);
966
987
  return jsonSafeValue("Map", entries);
967
988
  }
968
- async function serializeSet(value, refs, depth) {
989
+ async function serializeSet(value, refs, depth, config) {
969
990
  if (refs.has(value)) throw new Error("Circular cache values are not supported");
970
991
  refs.add(value);
971
992
  const items = [];
972
- for (const item of value.values()) items.push(await serializeJsonSafeValue(item, refs, depth + 1));
993
+ for (const item of value.values()) {
994
+ const serializedItem = await serializeJsonSafeValue(item, refs, depth + 1, config);
995
+ if (serializedItem !== void 0) items.push(serializedItem);
996
+ }
973
997
  refs.delete(value);
974
998
  return jsonSafeValue("Set", items);
975
999
  }
976
- async function serializeError(value, refs, depth) {
1000
+ async function serializeError(value, refs, depth, config) {
977
1001
  if (refs.has(value)) throw new Error("Circular cache values are not supported");
978
1002
  refs.add(value);
979
1003
  const props = [];
980
1004
  for (const [key, entryValue] of Object.entries(value)) {
981
1005
  if (key === "cause") continue;
982
- props.push([key, await serializeJsonSafeValue(entryValue, refs, depth + 1)]);
1006
+ const serializedEntryValue = await serializeJsonSafeValue(entryValue, refs, depth + 1, config);
1007
+ if (serializedEntryValue !== void 0) props.push([key, serializedEntryValue]);
983
1008
  }
984
1009
  const serialized = jsonSafeValue("Error", {
985
- cause: "cause" in value ? await serializeJsonSafeValue(value.cause, refs, depth + 1) : void 0,
1010
+ cause: "cause" in value ? await serializeJsonSafeValue(value.cause, refs, depth + 1, config) : void 0,
986
1011
  message: value.message,
987
1012
  name: value.name,
988
1013
  props,
@@ -1123,33 +1148,36 @@ function deserializeError(value) {
1123
1148
  });
1124
1149
  return error;
1125
1150
  }
1126
- async function serializeRecordValues(record) {
1151
+ async function serializeRecordValues(record, config) {
1127
1152
  const entries = [];
1128
- for (const [key, value] of Object.entries(record)) entries.push([key, await serializeCacheValue(value)]);
1153
+ for (const [key, value] of Object.entries(record)) {
1154
+ const serializedValue = await serializeCacheValue(value, config);
1155
+ if (serializedValue !== void 0) entries.push([key, serializedValue]);
1156
+ }
1129
1157
  return Object.fromEntries(entries);
1130
1158
  }
1131
1159
  function deserializeRecordValues(record) {
1132
1160
  return Object.fromEntries(Object.entries(record).map(([key, value]) => [key, deserializeCacheValue(value)]));
1133
1161
  }
1134
- async function serializeCacheRecordingOp(op) {
1162
+ async function serializeCacheRecordingOp(op, config) {
1135
1163
  switch (op.kind) {
1136
1164
  case "setOutput":
1137
1165
  case "appendOutput": return {
1138
1166
  ...op,
1139
- value: await serializeCacheValue(op.value)
1167
+ value: await serializeCacheValue(op.value, config)
1140
1168
  };
1141
1169
  case "mergeOutput": return {
1142
1170
  ...op,
1143
- patch: await serializeRecordValues(op.patch)
1171
+ patch: await serializeRecordValues(op.patch, config)
1144
1172
  };
1145
1173
  case "incrementOutput": return op;
1146
1174
  case "checkpoint": return {
1147
1175
  ...op,
1148
- data: await serializeCacheValue(op.data)
1176
+ data: await serializeCacheValue(op.data, config)
1149
1177
  };
1150
1178
  case "subSpan": return {
1151
1179
  ...op,
1152
- span: await serializeCacheSpan(op.span)
1180
+ span: await serializeCacheSpan(op.span, config)
1153
1181
  };
1154
1182
  }
1155
1183
  }
@@ -1175,11 +1203,11 @@ function deserializeCacheRecordingOp(op) {
1175
1203
  };
1176
1204
  }
1177
1205
  }
1178
- async function serializeCacheSpan(span) {
1206
+ async function serializeCacheSpan(span, config) {
1179
1207
  return {
1180
1208
  ...span,
1181
- attributes: span.attributes === void 0 ? void 0 : await serializeRecordValues(span.attributes),
1182
- children: await Promise.all(span.children.map(serializeCacheSpan))
1209
+ attributes: span.attributes === void 0 ? void 0 : await serializeRecordValues(span.attributes, config),
1210
+ children: await Promise.all(span.children.map((child) => serializeCacheSpan(child, config)))
1183
1211
  };
1184
1212
  }
1185
1213
  function deserializeCacheSpan(span) {
@@ -1189,13 +1217,19 @@ function deserializeCacheSpan(span) {
1189
1217
  children: span.children.map(deserializeCacheSpan)
1190
1218
  };
1191
1219
  }
1192
- /** Serialize all rich values captured in a cache recording before persistence. */
1193
- async function serializeCacheRecording(recording) {
1220
+ /**
1221
+ * Serialize all rich values captured in a cache recording before persistence.
1222
+ *
1223
+ * Undefined values are omitted by default; pass `preserveUndefined: true` to
1224
+ * retain the legacy explicit undefined wrappers in the recording payload.
1225
+ */
1226
+ async function serializeCacheRecording(recording, options = void 0) {
1227
+ const config = normalizeCacheSerializationOptions(options);
1194
1228
  return {
1195
1229
  ...recording,
1196
- returnValue: await serializeCacheValue(recording.returnValue),
1197
- finalAttributes: await serializeRecordValues(recording.finalAttributes),
1198
- ops: await Promise.all(recording.ops.map(serializeCacheRecordingOp))
1230
+ returnValue: await serializeCacheValue(recording.returnValue, config),
1231
+ finalAttributes: await serializeRecordValues(recording.finalAttributes, config),
1232
+ ops: await Promise.all(recording.ops.map((op) => serializeCacheRecordingOp(op, config)))
1199
1233
  };
1200
1234
  }
1201
1235
  /** Revive all rich values captured in a cache recording after lookup. */
@@ -1587,7 +1621,9 @@ function createTraceCache(generateSpanId) {
1587
1621
  key: info.key
1588
1622
  }, { serializeFileBytes: info.serializeFileBytes === true });
1589
1623
  const activeSpan = scope.activeSpanStack.at(-1);
1590
- if (cacheCtx.mode === "use") {
1624
+ const canRead = cacheCtx.mode === "use" && cacheCtx.read !== false;
1625
+ const canStore = cacheCtx.mode !== "bypass" && cacheCtx.store !== false;
1626
+ if (canRead) {
1591
1627
  const hit = await cacheCtx.adapter.lookup(namespace, keyHash);
1592
1628
  if (hit) {
1593
1629
  const storedAt = hit.storedAt;
@@ -1610,14 +1646,24 @@ function createTraceCache(generateSpanId) {
1610
1646
  name: info.name,
1611
1647
  namespace,
1612
1648
  key: keyHash,
1613
- status: "miss"
1649
+ status: "miss",
1650
+ ...canStore ? {} : { stored: false }
1614
1651
  });
1615
- } else if (cacheCtx.mode === "refresh") recordCacheRef(scope, activeSpan, {
1652
+ } else if (cacheCtx.mode === "use" && canStore) recordCacheRef(scope, activeSpan, {
1616
1653
  type: "value",
1617
1654
  name: info.name,
1618
1655
  namespace,
1619
1656
  key: keyHash,
1620
- status: "refresh"
1657
+ status: "miss",
1658
+ read: false
1659
+ });
1660
+ else if (cacheCtx.mode === "refresh") recordCacheRef(scope, activeSpan, {
1661
+ type: "value",
1662
+ name: info.name,
1663
+ namespace,
1664
+ key: keyHash,
1665
+ status: "refresh",
1666
+ ...canStore ? {} : { stored: false }
1621
1667
  });
1622
1668
  else recordCacheRef(scope, activeSpan, {
1623
1669
  type: "value",
@@ -1640,7 +1686,7 @@ function createTraceCache(generateSpanId) {
1640
1686
  scope.recordingStack.pop();
1641
1687
  }
1642
1688
  appendSubSpanOps(scope, frame);
1643
- if (cacheCtx.mode !== "bypass") {
1689
+ if (canStore) {
1644
1690
  const finalAttributes = diffNonCacheAttributes(beforeAttributes, await snapshotNonCacheAttributes(activeSpan));
1645
1691
  const recording = {
1646
1692
  returnValue: bodyResult,
@@ -1654,13 +1700,11 @@ function createTraceCache(generateSpanId) {
1654
1700
  operationType: "value",
1655
1701
  operationName: info.name,
1656
1702
  storedAt: new Date(getRealDateNowMs()).toISOString(),
1657
- codeFingerprint: cacheCtx.codeFingerprint,
1658
1703
  recording: await serializeCacheRecording(recording)
1659
1704
  }, {
1660
1705
  rawKey: info.key,
1661
1706
  operationType: "value",
1662
- operationName: info.name,
1663
- codeFingerprint: cacheCtx.codeFingerprint
1707
+ operationName: info.name
1664
1708
  });
1665
1709
  }
1666
1710
  return bodyResult;
@@ -2031,11 +2075,13 @@ async function traceSpanInternal(info, fn) {
2031
2075
  namespace,
2032
2076
  key: cacheOpts.key
2033
2077
  }, { serializeFileBytes: cacheOpts.serializeFileBytes === true });
2078
+ const canRead = ctx.mode === "use" && ctx.read !== false;
2079
+ const canStore = ctx.mode !== "bypass" && ctx.store !== false;
2034
2080
  mergeSpanAttributes(spanRecord, {
2035
2081
  "cache.key": keyHash,
2036
2082
  "cache.namespace": namespace
2037
2083
  });
2038
- if (ctx.mode === "use") {
2084
+ if (canRead) {
2039
2085
  const hit = await ctx.adapter.lookup(namespace, keyHash);
2040
2086
  if (hit) {
2041
2087
  const storedAt = hit.storedAt;
@@ -2050,8 +2096,18 @@ async function traceSpanInternal(info, fn) {
2050
2096
  spanRecord.endedAt = addElapsedMsToTimestamp(spanRecord.startedAt, getRealDateNowMs() - realStartedAt);
2051
2097
  return recording.returnValue;
2052
2098
  }
2053
- mergeSpanAttributes(spanRecord, { "cache.status": "miss" });
2054
- } else if (ctx.mode === "refresh") mergeSpanAttributes(spanRecord, { "cache.status": "refresh" });
2099
+ mergeSpanAttributes(spanRecord, {
2100
+ "cache.status": "miss",
2101
+ ...canStore ? {} : { "cache.stored": false }
2102
+ });
2103
+ } else if (ctx.mode === "use" && canStore) mergeSpanAttributes(spanRecord, {
2104
+ "cache.status": "miss",
2105
+ "cache.read": false
2106
+ });
2107
+ else if (ctx.mode === "refresh") mergeSpanAttributes(spanRecord, {
2108
+ "cache.status": "refresh",
2109
+ ...canStore ? {} : { "cache.stored": false }
2110
+ });
2055
2111
  else mergeSpanAttributes(spanRecord, { "cache.status": "bypass" });
2056
2112
  const frame = {
2057
2113
  baseSpanIndex: scope.spans.length,
@@ -2067,7 +2123,7 @@ async function traceSpanInternal(info, fn) {
2067
2123
  }
2068
2124
  appendSubSpanOps(scope, frame);
2069
2125
  finishSpanWithoutThrownError(spanRecord, realStartedAt);
2070
- if (ctx.mode !== "bypass") {
2126
+ if (canStore) {
2071
2127
  const recording = {
2072
2128
  returnValue: bodyResult,
2073
2129
  finalAttributes: stripCacheAttributes(spanRecord.attributes),
@@ -2087,14 +2143,12 @@ async function traceSpanInternal(info, fn) {
2087
2143
  spanName: info.name,
2088
2144
  spanKind: info.kind,
2089
2145
  storedAt: new Date(getRealDateNowMs()).toISOString(),
2090
- codeFingerprint: ctx.codeFingerprint,
2091
2146
  recording: await serializeCacheRecording(recording)
2092
2147
  };
2093
2148
  await ctx.adapter.write(entry, {
2094
2149
  rawKey: cacheOpts.key,
2095
2150
  operationType: "span",
2096
- operationName: info.name,
2097
- codeFingerprint: ctx.codeFingerprint
2151
+ operationName: info.name
2098
2152
  });
2099
2153
  }
2100
2154
  return bodyResult;
@@ -2287,6 +2341,7 @@ const columnDefSchema = z.object({
2287
2341
  passThreshold: z.number().optional(),
2288
2342
  maxStars: z.number().int().min(2).optional(),
2289
2343
  hideInTable: z.boolean().optional(),
2344
+ hideIfNoValue: z.boolean().optional(),
2290
2345
  align: z.enum([
2291
2346
  "left",
2292
2347
  "center",
@@ -2430,6 +2485,10 @@ const traceCacheRefSchema = z.object({
2430
2485
  namespace: z.string(),
2431
2486
  key: z.string(),
2432
2487
  status: cacheStatusSchema,
2488
+ /** Whether this ref attempted to read from cache. Defaults to true. */
2489
+ read: z.boolean().optional(),
2490
+ /** Whether this ref wrote a persisted cache entry. Defaults to true for misses/refreshes. */
2491
+ stored: z.boolean().optional(),
2433
2492
  storedAt: z.string().optional(),
2434
2493
  age: z.number().optional()
2435
2494
  });
@@ -2442,7 +2501,6 @@ const cacheListItemSchema = z.object({
2442
2501
  spanName: z.string().optional(),
2443
2502
  spanKind: traceSpanKindSchema.optional(),
2444
2503
  storedAt: z.string(),
2445
- codeFingerprint: z.string(),
2446
2504
  sizeBytes: z.number()
2447
2505
  });
2448
2506
  /** Zod schema for `SerializedCacheSpan`, defined lazily for recursion. */
@@ -2524,7 +2582,6 @@ const cacheEntrySchema = z.object({
2524
2582
  spanName: z.string().optional(),
2525
2583
  spanKind: traceSpanKindSchema.optional(),
2526
2584
  storedAt: z.string(),
2527
- codeFingerprint: z.string(),
2528
2585
  recording: cacheRecordingSchema
2529
2586
  });
2530
2587
  /** Debug-only raw key metadata stored outside the reusable cache entry. */
@@ -2535,7 +2592,6 @@ const cacheDebugKeyEntrySchema = z.object({
2535
2592
  operationType: cacheOperationTypeSchema,
2536
2593
  operationName: z.string(),
2537
2594
  storedAt: z.string(),
2538
- codeFingerprint: z.string(),
2539
2595
  rawKey: z.unknown()
2540
2596
  });
2541
2597
  /** Cache lookup response with optional debug-only raw key data. */
@@ -2627,6 +2683,11 @@ const evalChartTooltipExtraSchema = z.discriminatedUnion("source", [z.object({
2627
2683
  const evalChartConfigSchema = z.object({
2628
2684
  /** Optional heading shown above the chart frame in the UI. */
2629
2685
  heading: z.string().optional(),
2686
+ /**
2687
+ * Hide this chart in the UI when none of its metrics has a numeric value in
2688
+ * the rendered history window.
2689
+ */
2690
+ hideIfNoValue: z.boolean().optional(),
2630
2691
  type: evalChartTypeSchema,
2631
2692
  /** At least one series must be declared. */
2632
2693
  metrics: z.array(evalChartMetricSchema).min(1),
@@ -2667,17 +2728,31 @@ const evalStatAggregateSchema = z.enum([
2667
2728
  "sum",
2668
2729
  "last"
2669
2730
  ]);
2731
+ const hideIfNoValueShape = {
2732
+ /**
2733
+ * Hide this stat in the UI when the current run has no displayable value.
2734
+ * Missing values, `null`, and empty strings count as no value; `0` remains
2735
+ * visible.
2736
+ */
2737
+ hideIfNoValue: z.boolean().optional() };
2670
2738
  /**
2671
2739
  * One entry in the EvalCard stats row. Built-in kinds use latest run totals;
2672
2740
  * `column` aggregates a score or numeric output column across the latest run.
2673
2741
  */
2674
2742
  const evalStatItemSchema = z.discriminatedUnion("kind", [
2675
- z.object({ kind: z.literal("cases") }),
2743
+ z.object({
2744
+ kind: z.literal("cases"),
2745
+ ...hideIfNoValueShape
2746
+ }),
2676
2747
  z.object({
2677
2748
  kind: z.literal("passRate"),
2678
- accent: z.boolean().optional()
2749
+ accent: z.boolean().optional(),
2750
+ ...hideIfNoValueShape
2751
+ }),
2752
+ z.object({
2753
+ kind: z.literal("duration"),
2754
+ ...hideIfNoValueShape
2679
2755
  }),
2680
- z.object({ kind: z.literal("duration") }),
2681
2756
  z.object({
2682
2757
  kind: z.literal("column"),
2683
2758
  key: z.string(),
@@ -2686,7 +2761,8 @@ const evalStatItemSchema = z.discriminatedUnion("kind", [
2686
2761
  format: columnFormatSchema.optional(),
2687
2762
  /** Number presentation options applied when `format: 'number'`. */
2688
2763
  numberFormat: numberDisplayOptionsSchema.optional(),
2689
- accent: z.boolean().optional()
2764
+ accent: z.boolean().optional(),
2765
+ ...hideIfNoValueShape
2690
2766
  })
2691
2767
  ]);
2692
2768
  /** Ordered list of stats rendered in the EvalCard stats row. */
@@ -2767,6 +2843,12 @@ const caseRowSchema = z.object({
2767
2843
  });
2768
2844
  /** Structured assertion failure metadata captured for one case run. */
2769
2845
  const assertionFailureSchema = z.object({
2846
+ /**
2847
+ * Error class or category label rendered alongside the message (e.g.
2848
+ * `EvalAssertionError`, `OutputsSchemaError`). Optional for legacy entries
2849
+ * and synthetic failures without an originating Error.
2850
+ */
2851
+ name: z.string().optional(),
2770
2852
  /** Human-readable assertion failure message shown in the UI and artifacts. */
2771
2853
  message: z.string(),
2772
2854
  /** Stack trace captured from the originating error when available. */
@@ -2915,6 +2997,25 @@ const defaultConfigKeySchema = z.enum([
2915
2997
  ]);
2916
2998
  /** Removal config for built-in eval-level outputs and UI metadata. */
2917
2999
  const removeDefaultConfigSchema = z.union([z.literal(true), z.array(defaultConfigKeySchema)]);
3000
+ const evalDeriveValueFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a derive output function" });
3001
+ /** Schema for keyed or object-returning trace-derived output config. */
3002
+ const evalDeriveConfigSchema = z.union([z.custom((value) => typeof value === "function", { message: "Expected a deriveFromTracing function" }), z.record(z.string().min(1), evalDeriveValueFnSchema)]);
3003
+ /** Schema for UI overrides on derived or scored columns. */
3004
+ const evalColumnOverrideSchema = z.object({
3005
+ label: z.string().optional(),
3006
+ format: columnFormatSchema.optional(),
3007
+ numberFormat: numberDisplayOptionsSchema.optional(),
3008
+ hideInTable: z.boolean().optional(),
3009
+ hideIfNoValue: z.boolean().optional(),
3010
+ align: z.enum([
3011
+ "left",
3012
+ "center",
3013
+ "right"
3014
+ ]).optional(),
3015
+ maxStars: z.number().int().min(2).optional()
3016
+ });
3017
+ /** Schema for column override maps keyed by output or score field name. */
3018
+ const evalColumnsSchema = z.record(z.string(), evalColumnOverrideSchema);
2918
3019
  /** Render formats supported by an LLM-call metric in the UI. */
2919
3020
  const llmCallMetricFormatSchema = z.enum([
2920
3021
  "string",
@@ -2992,18 +3093,9 @@ const apiCallMetricSchema = z.object({
2992
3093
  placements: z.array(apiCallMetricPlacementSchema).nonempty().optional()
2993
3094
  });
2994
3095
  /**
2995
- * Schema for one model/provider pricing entry used to derive LLM-call costs
2996
- * from token counts.
3096
+ * Schema for pricing rates used to derive LLM-call costs from token counts.
2997
3097
  */
2998
- const llmCallPricingSchema = z.object({
2999
- /** Exact model name read from the configured `attributes.model` path. */
3000
- model: z.string().min(1),
3001
- /**
3002
- * Optional provider discriminator read from `attributes.provider`. When set,
3003
- * the entry only applies to calls from that provider; provider-specific
3004
- * entries take precedence over generic entries for the same model.
3005
- */
3006
- provider: z.string().min(1).optional(),
3098
+ const llmCallPricingRateSchema = z.object({
3007
3099
  /** USD per one million non-cached input tokens. */
3008
3100
  inputUsdPerMillion: z.number().nonnegative().optional(),
3009
3101
  /** USD per one million output tokens. */
@@ -3017,6 +3109,23 @@ const llmCallPricingSchema = z.object({
3017
3109
  /** USD per one million reasoning tokens when reported separately. */
3018
3110
  reasoningUsdPerMillion: z.number().nonnegative().optional()
3019
3111
  });
3112
+ /**
3113
+ * Schema for one model's pricing config. The object key is the exact model
3114
+ * name. Use `providers` when a model has provider-specific rates in addition
3115
+ * to, or instead of, generic model rates.
3116
+ */
3117
+ const llmCallPricingSchema = llmCallPricingRateSchema.extend({
3118
+ /**
3119
+ * Optional provider discriminator read from `attributes.provider`. When set,
3120
+ * the top-level entry only applies to calls from that provider.
3121
+ */
3122
+ provider: z.string().min(1).optional(),
3123
+ /**
3124
+ * Provider-specific pricing for the model. Provider entries take precedence
3125
+ * over generic rates for the same model.
3126
+ */
3127
+ providers: z.record(z.string().min(1), llmCallPricingRateSchema).optional()
3128
+ });
3020
3129
  /** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
3021
3130
  const llmCallsConfigSchema = z.object({
3022
3131
  /** Span kinds treated as LLM calls. Defaults to `['llm']`. */
@@ -3053,10 +3162,10 @@ const llmCallsConfigSchema = z.object({
3053
3162
  */
3054
3163
  derivedAttributes: z.record(z.string().min(1), callDerivedAttributeSchema).optional(),
3055
3164
  /**
3056
- * Model/provider pricing registry used to calculate LLM-call costs from
3057
- * token counts. Built-in LLM cost fields are only derived from this registry.
3165
+ * Model-keyed pricing registry used to calculate LLM-call costs from token
3166
+ * counts. Built-in LLM cost fields are only derived from this registry.
3058
3167
  */
3059
- pricing: z.array(llmCallPricingSchema).optional(),
3168
+ pricing: z.record(z.string().min(1), llmCallPricingSchema).optional(),
3060
3169
  /** Custom user-defined metrics surfaced on each LLM call. */
3061
3170
  metrics: z.array(llmCallMetricSchema).optional()
3062
3171
  });
@@ -3172,6 +3281,33 @@ function resolveApiCallMetric(metric) {
3172
3281
  placements: metric.placements ? [...metric.placements] : ["body"]
3173
3282
  };
3174
3283
  }
3284
+ function hasPricingRates(pricing) {
3285
+ return pricing.inputUsdPerMillion !== void 0 || pricing.outputUsdPerMillion !== void 0 || pricing.cachedInputUsdPerMillion !== void 0 || pricing.cacheCreationInputUsdPerMillion !== void 0 || pricing.cacheCreationInput1hUsdPerMillion !== void 0 || pricing.reasoningUsdPerMillion !== void 0;
3286
+ }
3287
+ function copyPricingRates(pricing) {
3288
+ return {
3289
+ inputUsdPerMillion: pricing.inputUsdPerMillion,
3290
+ outputUsdPerMillion: pricing.outputUsdPerMillion,
3291
+ cachedInputUsdPerMillion: pricing.cachedInputUsdPerMillion,
3292
+ cacheCreationInputUsdPerMillion: pricing.cacheCreationInputUsdPerMillion,
3293
+ cacheCreationInput1hUsdPerMillion: pricing.cacheCreationInput1hUsdPerMillion,
3294
+ reasoningUsdPerMillion: pricing.reasoningUsdPerMillion
3295
+ };
3296
+ }
3297
+ function resolveLlmCallPricingEntries(model, pricing) {
3298
+ const entries = [];
3299
+ if (hasPricingRates(pricing)) entries.push({
3300
+ model,
3301
+ provider: pricing.provider,
3302
+ ...copyPricingRates(pricing)
3303
+ });
3304
+ for (const [provider, providerPricing] of Object.entries(pricing.providers ?? {})) entries.push({
3305
+ model,
3306
+ provider,
3307
+ ...copyPricingRates(providerPricing)
3308
+ });
3309
+ return entries;
3310
+ }
3175
3311
  /**
3176
3312
  * Resolve the user-authored LLM-calls config to a fully-defaulted shape used
3177
3313
  * by the UI to derive the LLM calls tab.
@@ -3182,7 +3318,7 @@ function resolveApiCallMetric(metric) {
3182
3318
  * - Missing `metrics[].format` defaults to `'string'`.
3183
3319
  * - Missing `metrics[].placements` defaults to `['body']`.
3184
3320
  * - Missing `pricing` defaults to an empty registry; built-in costs are only
3185
- * derived from configured pricing and token counts.
3321
+ * derived from configured model-keyed pricing and token counts.
3186
3322
  */
3187
3323
  function resolveLlmCallsConfig(input) {
3188
3324
  return {
@@ -3193,16 +3329,7 @@ function resolveLlmCallsConfig(input) {
3193
3329
  },
3194
3330
  derivedAttributes: resolveDerivedAttributes(input?.derivedAttributes),
3195
3331
  metrics: (input?.metrics ?? []).map(resolveLlmCallMetric),
3196
- pricing: (input?.pricing ?? []).map((p) => ({
3197
- model: p.model,
3198
- provider: p.provider,
3199
- inputUsdPerMillion: p.inputUsdPerMillion,
3200
- outputUsdPerMillion: p.outputUsdPerMillion,
3201
- cachedInputUsdPerMillion: p.cachedInputUsdPerMillion,
3202
- cacheCreationInputUsdPerMillion: p.cacheCreationInputUsdPerMillion,
3203
- cacheCreationInput1hUsdPerMillion: p.cacheCreationInput1hUsdPerMillion,
3204
- reasoningUsdPerMillion: p.reasoningUsdPerMillion
3205
- }))
3332
+ pricing: Object.entries(input?.pricing ?? {}).flatMap(([model, pricing]) => resolveLlmCallPricingEntries(model, pricing))
3206
3333
  };
3207
3334
  }
3208
3335
  /**
@@ -3236,6 +3363,9 @@ const agentEvalsConfigSchema = z.object({
3236
3363
  staleAfterDays: z.number().optional(),
3237
3364
  allowCliRunAll: z.boolean().optional(),
3238
3365
  traceDisplay: traceDisplayInputConfigSchema.optional(),
3366
+ columns: evalColumnsSchema.optional(),
3367
+ deriveFromTracing: evalDeriveConfigSchema.optional(),
3368
+ stats: evalStatsConfigSchema.optional(),
3239
3369
  llmCalls: llmCallsConfigSchema.optional(),
3240
3370
  removeDefaultConfig: removeDefaultConfigSchema.optional(),
3241
3371
  apiCalls: apiCallsConfigSchema.optional(),
@@ -3888,6 +4018,11 @@ function readNumber(attributes, key) {
3888
4018
  const value = attributes[key];
3889
4019
  return typeof value === "number" && Number.isFinite(value) ? value : void 0;
3890
4020
  }
4021
+ function readBoolean(attributes, key) {
4022
+ if (!isRecord$2(attributes)) return void 0;
4023
+ const value = attributes[key];
4024
+ return typeof value === "boolean" ? value : void 0;
4025
+ }
3891
4026
  function readArray(attributes, key) {
3892
4027
  if (!isRecord$2(attributes)) return [];
3893
4028
  const value = attributes[key];
@@ -3916,12 +4051,14 @@ function extractCacheEntries(spans, caseCacheRefs) {
3916
4051
  const namespace = readString(span.attributes, "cache.namespace");
3917
4052
  if (key !== void 0 && namespace !== void 0) {
3918
4053
  const isHit = status === "hit";
4054
+ const stored = isHit ? true : readBoolean(span.attributes, "cache.stored") !== false;
3919
4055
  entries.push({
3920
4056
  id: span.id,
3921
4057
  source: "span",
3922
4058
  origin: "span",
3923
- action: isHit ? "hit" : "added",
4059
+ action: isHit ? "hit" : stored ? "added" : "notStored",
3924
4060
  status,
4061
+ stored,
3925
4062
  name: span.name,
3926
4063
  namespace,
3927
4064
  key,
@@ -3938,12 +4075,14 @@ function extractCacheEntries(spans, caseCacheRefs) {
3938
4075
  const ref = parsed.data;
3939
4076
  if (ref.status === "bypass") continue;
3940
4077
  const isHit = ref.status === "hit";
4078
+ const stored = isHit ? true : ref.stored !== false;
3941
4079
  entries.push({
3942
4080
  id: `${span.id}:value:${String(index)}`,
3943
4081
  source: "value",
3944
4082
  origin: "span",
3945
- action: isHit ? "hit" : "added",
4083
+ action: isHit ? "hit" : stored ? "added" : "notStored",
3946
4084
  status: ref.status,
4085
+ stored,
3947
4086
  name: ref.name,
3948
4087
  namespace: ref.namespace,
3949
4088
  key: ref.key,
@@ -3956,12 +4095,14 @@ function extractCacheEntries(spans, caseCacheRefs) {
3956
4095
  for (const [index, ref] of caseCacheRefs.entries()) {
3957
4096
  if (ref.status === "bypass") continue;
3958
4097
  const isHit = ref.status === "hit";
4098
+ const stored = isHit ? true : ref.stored !== false;
3959
4099
  entries.push({
3960
4100
  id: `case:value:${String(index)}`,
3961
4101
  source: "value",
3962
4102
  origin: "caseRoot",
3963
- action: isHit ? "hit" : "added",
4103
+ action: isHit ? "hit" : stored ? "added" : "notStored",
3964
4104
  status: ref.status,
4105
+ stored,
3965
4106
  name: ref.name,
3966
4107
  namespace: ref.namespace,
3967
4108
  key: ref.key,
@@ -4033,7 +4174,8 @@ const updateManualScoreRequestSchema = z.object({ value: z.number().min(0).max(1
4033
4174
  //#endregion
4034
4175
  //#region ../runner/src/cacheStore.ts
4035
4176
  const defaultMaxEntriesPerNamespace = 100;
4036
- const cacheSerializationMarker = "__agentEvalsCacheSerialization";
4177
+ const cacheSerializationMarker = "__aecs";
4178
+ const legacyCacheSerializationMarker = "__agentEvalsCacheSerialization";
4037
4179
  const supportedCacheSerializationVersion = "json-safe-v1";
4038
4180
  /**
4039
4181
  * Create a filesystem-backed cache adapter rooted at `<workspaceRoot>/<dir>`.
@@ -4118,7 +4260,6 @@ function createFsCacheStore(options) {
4118
4260
  spanName: entry.spanName,
4119
4261
  spanKind: entry.spanKind,
4120
4262
  storedAt: entry.storedAt,
4121
- codeFingerprint: entry.codeFingerprint,
4122
4263
  sizeBytes: Buffer.byteLength(JSON.stringify(entry), "utf8")
4123
4264
  });
4124
4265
  }
@@ -4247,7 +4388,7 @@ async function readCacheFilePath(filePath) {
4247
4388
  function usesSupportedCacheSerialization(value) {
4248
4389
  if (Array.isArray(value)) return value.every(usesSupportedCacheSerialization);
4249
4390
  if (!isRecordLike(value)) return true;
4250
- if (Object.hasOwn(value, cacheSerializationMarker) && value[cacheSerializationMarker] !== supportedCacheSerializationVersion) return false;
4391
+ for (const marker of [cacheSerializationMarker, legacyCacheSerializationMarker]) if (Object.hasOwn(value, marker) && value[marker] !== supportedCacheSerializationVersion) return false;
4251
4392
  return Object.values(value).every(usesSupportedCacheSerialization);
4252
4393
  }
4253
4394
  async function writeOrRemoveCacheFile(cacheDir, cacheFile) {
@@ -4291,7 +4432,6 @@ async function writeDebugKeyEntry(params) {
4291
4432
  operationType: debugKey.operationType,
4292
4433
  operationName: debugKey.operationName,
4293
4434
  storedAt: entry.storedAt,
4294
- codeFingerprint: debugKey.codeFingerprint,
4295
4435
  rawKey: debugKey.rawKey
4296
4436
  };
4297
4437
  await writeDebugKeyFile(debugDir, {
@@ -4507,6 +4647,7 @@ function getScoreOverride(def) {
4507
4647
  format: def.format,
4508
4648
  numberFormat: def.numberFormat,
4509
4649
  hideInTable: def.hideInTable,
4650
+ hideIfNoValue: def.hideIfNoValue,
4510
4651
  align: def.align,
4511
4652
  maxStars: def.maxStars
4512
4653
  };
@@ -4519,6 +4660,7 @@ function mergeOverrides(base, override) {
4519
4660
  format: override.format ?? base.format,
4520
4661
  numberFormat: override.numberFormat ?? base.numberFormat,
4521
4662
  hideInTable: override.hideInTable ?? base.hideInTable,
4663
+ hideIfNoValue: override.hideIfNoValue ?? base.hideIfNoValue,
4522
4664
  align: override.align ?? base.align,
4523
4665
  maxStars: override.maxStars ?? base.maxStars
4524
4666
  };
@@ -4633,6 +4775,7 @@ function createColumnDef(params) {
4633
4775
  if (override?.numberFormat !== void 0) def.numberFormat = override.numberFormat;
4634
4776
  if (override?.maxStars !== void 0) def.maxStars = override.maxStars;
4635
4777
  if (override?.hideInTable !== void 0) def.hideInTable = override.hideInTable;
4778
+ if (override?.hideIfNoValue !== void 0) def.hideIfNoValue = override.hideIfNoValue;
4636
4779
  if (override?.align !== void 0) def.align = override.align;
4637
4780
  if (!isScore) return def;
4638
4781
  def.isScore = true;
@@ -4717,60 +4860,70 @@ const DEFAULT_COLUMNS = {
4717
4860
  label: "API Calls",
4718
4861
  format: "number",
4719
4862
  numberFormat: countNumberFormat,
4720
- align: "right"
4863
+ align: "right",
4864
+ hideIfNoValue: true
4721
4865
  },
4722
4866
  costUsd: {
4723
4867
  label: "Cost",
4724
4868
  format: "number",
4725
4869
  numberFormat: costNumberFormat,
4726
- align: "right"
4870
+ align: "right",
4871
+ hideIfNoValue: true
4727
4872
  },
4728
4873
  llmTurns: {
4729
4874
  label: "LLM Turns",
4730
4875
  format: "number",
4731
4876
  numberFormat: countNumberFormat,
4732
- align: "right"
4877
+ align: "right",
4878
+ hideIfNoValue: true
4733
4879
  },
4734
4880
  inputTokens: {
4735
4881
  label: "Input Tokens",
4736
4882
  format: "number",
4737
4883
  numberFormat: tokenNumberFormat,
4738
- align: "right"
4884
+ align: "right",
4885
+ hideIfNoValue: true
4739
4886
  },
4740
4887
  outputTokens: {
4741
4888
  label: "Output Tokens",
4742
4889
  format: "number",
4743
4890
  numberFormat: tokenNumberFormat,
4744
- align: "right"
4891
+ align: "right",
4892
+ hideIfNoValue: true
4745
4893
  },
4746
4894
  totalTokens: {
4747
4895
  label: "Total Tokens",
4748
4896
  format: "number",
4749
4897
  numberFormat: tokenNumberFormat,
4750
- align: "right"
4898
+ align: "right",
4899
+ hideIfNoValue: true
4751
4900
  },
4752
4901
  cachedInputTokens: {
4753
4902
  label: "Cached Input Tokens",
4754
4903
  format: "number",
4755
4904
  numberFormat: tokenNumberFormat,
4756
- align: "right"
4905
+ align: "right",
4906
+ hideIfNoValue: true
4757
4907
  },
4758
4908
  cacheCreationInputTokens: {
4759
4909
  label: "Cache Write Tokens",
4760
4910
  format: "number",
4761
4911
  numberFormat: tokenNumberFormat,
4762
- align: "right"
4912
+ align: "right",
4913
+ hideIfNoValue: true
4763
4914
  },
4764
4915
  reasoningTokens: {
4765
4916
  label: "Reasoning Tokens",
4766
4917
  format: "number",
4767
4918
  numberFormat: tokenNumberFormat,
4768
- align: "right"
4919
+ align: "right",
4920
+ hideIfNoValue: true
4769
4921
  },
4770
4922
  llmDurationMs: {
4771
4923
  label: "LLM Duration",
4772
4924
  format: "duration",
4773
- align: "right"
4925
+ align: "right",
4926
+ hideIfNoValue: true
4774
4927
  }
4775
4928
  };
4776
4929
  function resolveRemovedKeys(globalRemove, evalRemove) {
@@ -4783,9 +4936,16 @@ function getActiveDefaultConfigKeys(params) {
4783
4936
  }
4784
4937
  function mergeDefaultColumns(params) {
4785
4938
  const activeKeys = getActiveDefaultConfigKeys(params);
4786
- if (activeKeys.length === 0) return params.columns;
4939
+ if (activeKeys.length === 0) {
4940
+ const merged = {
4941
+ ...params.globalColumns,
4942
+ ...params.columns
4943
+ };
4944
+ return Object.keys(merged).length > 0 ? merged : void 0;
4945
+ }
4787
4946
  return {
4788
4947
  ...Object.fromEntries(activeKeys.map((key) => [key, DEFAULT_COLUMNS[key]])),
4948
+ ...params.globalColumns,
4789
4949
  ...params.columns
4790
4950
  };
4791
4951
  }
@@ -4797,30 +4957,38 @@ function appendDefaultStats(params) {
4797
4957
  key: "apiCalls",
4798
4958
  label: "API Calls",
4799
4959
  aggregate: "avg",
4800
- numberFormat: countNumberFormat
4960
+ numberFormat: countNumberFormat,
4961
+ hideIfNoValue: true
4801
4962
  });
4802
4963
  if (activeKeys.has("costUsd")) defaults.push({
4803
4964
  kind: "column",
4804
4965
  key: "costUsd",
4805
4966
  label: "LLM Cost",
4806
4967
  aggregate: "avg",
4807
- numberFormat: costNumberFormat
4968
+ numberFormat: costNumberFormat,
4969
+ hideIfNoValue: true
4808
4970
  });
4809
4971
  if (activeKeys.has("totalTokens")) defaults.push({
4810
4972
  kind: "column",
4811
4973
  key: "totalTokens",
4812
4974
  label: "Tokens",
4813
4975
  aggregate: "avg",
4814
- numberFormat: tokenNumberFormat
4976
+ numberFormat: tokenNumberFormat,
4977
+ hideIfNoValue: true
4815
4978
  });
4816
4979
  if (activeKeys.has("llmTurns")) defaults.push({
4817
4980
  kind: "column",
4818
4981
  key: "llmTurns",
4819
4982
  label: "LLM Turns",
4820
4983
  aggregate: "avg",
4821
- numberFormat: countNumberFormat
4984
+ numberFormat: countNumberFormat,
4985
+ hideIfNoValue: true
4822
4986
  });
4823
- const merged = [...params.stats ?? [], ...defaults];
4987
+ const merged = [
4988
+ ...params.globalStats ?? [],
4989
+ ...params.stats ?? [],
4990
+ ...defaults
4991
+ ];
4824
4992
  return merged.length > 0 ? merged : void 0;
4825
4993
  }
4826
4994
  function appendDefaultCharts(params) {
@@ -4828,6 +4996,7 @@ function appendDefaultCharts(params) {
4828
4996
  const defaults = [];
4829
4997
  if (activeKeys.has("costUsd")) defaults.push({
4830
4998
  heading: "LLM Cost",
4999
+ hideIfNoValue: true,
4831
5000
  type: "area",
4832
5001
  metrics: [{
4833
5002
  source: "column",
@@ -4869,6 +5038,7 @@ function appendDefaultCharts(params) {
4869
5038
  ].filter((metric) => metric !== null);
4870
5039
  if (tokenMetrics.length > 0) defaults.push({
4871
5040
  heading: "LLM Tokens",
5041
+ hideIfNoValue: true,
4872
5042
  type: "bar",
4873
5043
  metrics: tokenMetrics,
4874
5044
  tooltipExtras: activeKeys.has("totalTokens") ? [{
@@ -4885,11 +5055,13 @@ function resolveEvalDefaultConfig(params) {
4885
5055
  const evalRemove = params.evalDef.removeDefaultConfig;
4886
5056
  return {
4887
5057
  columns: mergeDefaultColumns({
5058
+ globalColumns: params.globalColumns,
4888
5059
  columns: params.evalDef.columns,
4889
5060
  globalRemove: params.globalRemove,
4890
5061
  evalRemove
4891
5062
  }),
4892
5063
  stats: appendDefaultStats({
5064
+ globalStats: params.globalStats,
4893
5065
  stats: params.evalDef.stats,
4894
5066
  globalRemove: params.globalRemove,
4895
5067
  evalRemove
@@ -5227,6 +5399,65 @@ function isFile(value) {
5227
5399
  return value instanceof File;
5228
5400
  }
5229
5401
  //#endregion
5402
+ //#region ../runner/src/traceDisplay.ts
5403
+ function isRecord$1(value) {
5404
+ return typeof value === "object" && value !== null;
5405
+ }
5406
+ function mergeNestedAttribute(value, path, attributeValue) {
5407
+ const root = value === void 0 ? {} : { ...value };
5408
+ const parts = path.split(".");
5409
+ let current = root;
5410
+ for (const [index, part] of parts.entries()) {
5411
+ if (index === parts.length - 1) {
5412
+ current[part] = attributeValue;
5413
+ continue;
5414
+ }
5415
+ const nextValue = current[part];
5416
+ const nextRecord = isRecord$1(nextValue) ? { ...nextValue } : {};
5417
+ current[part] = nextRecord;
5418
+ current = nextRecord;
5419
+ }
5420
+ return root;
5421
+ }
5422
+ function resolveTracePresentation(spans, globalTraceDisplay, evalTraceDisplay) {
5423
+ const merged = /* @__PURE__ */ new Map();
5424
+ for (const attribute of globalTraceDisplay?.attributes ?? []) merged.set(attribute.key ?? attribute.path, attribute);
5425
+ for (const attribute of evalTraceDisplay?.attributes ?? []) merged.set(attribute.key ?? attribute.path, attribute);
5426
+ const resolvedAttributes = [];
5427
+ const transformedTrace = spans.map((span) => ({
5428
+ ...span,
5429
+ attributes: span.attributes === void 0 ? void 0 : { ...span.attributes }
5430
+ }));
5431
+ for (const attribute of merged.values()) {
5432
+ const resolvedPath = attribute.transform ? `__display.${attribute.key ?? attribute.path}` : attribute.path;
5433
+ resolvedAttributes.push({
5434
+ key: attribute.key,
5435
+ path: resolvedPath,
5436
+ label: attribute.label,
5437
+ format: attribute.format,
5438
+ numberFormat: attribute.numberFormat,
5439
+ placements: attribute.placements,
5440
+ scope: attribute.scope,
5441
+ mode: attribute.mode
5442
+ });
5443
+ if (!attribute.transform) continue;
5444
+ for (const span of transformedTrace) {
5445
+ const sourceValue = getNestedAttribute(span.attributes, attribute.path);
5446
+ if (sourceValue === void 0) continue;
5447
+ const transformedValue = attribute.transform({
5448
+ value: sourceValue,
5449
+ span
5450
+ });
5451
+ if (transformedValue === void 0) continue;
5452
+ span.attributes = mergeNestedAttribute(span.attributes, resolvedPath, transformedValue);
5453
+ }
5454
+ }
5455
+ return {
5456
+ trace: transformedTrace,
5457
+ traceDisplay: { attributes: resolvedAttributes }
5458
+ };
5459
+ }
5460
+ //#endregion
5230
5461
  //#region ../runner/src/runMaintenance.ts
5231
5462
  async function persistRunState(runState) {
5232
5463
  await writeFile(join(runState.runDir, "summary.json"), JSON.stringify(runState.summary, null, 2));
@@ -5551,65 +5782,6 @@ function stripTerminalControlCodes(value) {
5551
5782
  return stripVTControlCharacters(value).replaceAll(orphanedAnsiSgrPattern, "");
5552
5783
  }
5553
5784
  //#endregion
5554
- //#region ../runner/src/traceDisplay.ts
5555
- function isRecord$1(value) {
5556
- return typeof value === "object" && value !== null;
5557
- }
5558
- function mergeNestedAttribute(value, path, attributeValue) {
5559
- const root = value === void 0 ? {} : { ...value };
5560
- const parts = path.split(".");
5561
- let current = root;
5562
- for (const [index, part] of parts.entries()) {
5563
- if (index === parts.length - 1) {
5564
- current[part] = attributeValue;
5565
- continue;
5566
- }
5567
- const nextValue = current[part];
5568
- const nextRecord = isRecord$1(nextValue) ? { ...nextValue } : {};
5569
- current[part] = nextRecord;
5570
- current = nextRecord;
5571
- }
5572
- return root;
5573
- }
5574
- function resolveTracePresentation(spans, globalTraceDisplay, evalTraceDisplay) {
5575
- const merged = /* @__PURE__ */ new Map();
5576
- for (const attribute of globalTraceDisplay?.attributes ?? []) merged.set(attribute.key ?? attribute.path, attribute);
5577
- for (const attribute of evalTraceDisplay?.attributes ?? []) merged.set(attribute.key ?? attribute.path, attribute);
5578
- const resolvedAttributes = [];
5579
- const transformedTrace = spans.map((span) => ({
5580
- ...span,
5581
- attributes: span.attributes === void 0 ? void 0 : { ...span.attributes }
5582
- }));
5583
- for (const attribute of merged.values()) {
5584
- const resolvedPath = attribute.transform ? `__display.${attribute.key ?? attribute.path}` : attribute.path;
5585
- resolvedAttributes.push({
5586
- key: attribute.key,
5587
- path: resolvedPath,
5588
- label: attribute.label,
5589
- format: attribute.format,
5590
- numberFormat: attribute.numberFormat,
5591
- placements: attribute.placements,
5592
- scope: attribute.scope,
5593
- mode: attribute.mode
5594
- });
5595
- if (!attribute.transform) continue;
5596
- for (const span of transformedTrace) {
5597
- const sourceValue = getNestedAttribute(span.attributes, attribute.path);
5598
- if (sourceValue === void 0) continue;
5599
- const transformedValue = attribute.transform({
5600
- value: sourceValue,
5601
- span
5602
- });
5603
- if (transformedValue === void 0) continue;
5604
- span.attributes = mergeNestedAttribute(span.attributes, resolvedPath, transformedValue);
5605
- }
5606
- }
5607
- return {
5608
- trace: transformedTrace,
5609
- traceDisplay: { attributes: resolvedAttributes }
5610
- };
5611
- }
5612
- //#endregion
5613
5785
  //#region ../runner/src/runExecution.ts
5614
5786
  function filterEvalCases(cases, caseIds) {
5615
5787
  if (!caseIds || caseIds.length === 0) return cases;
@@ -5639,8 +5811,54 @@ function buildScopedEvalIdPrefix(params) {
5639
5811
  async function callWithUnknownResult(fn, args) {
5640
5812
  return await Reflect.apply(fn, void 0, args);
5641
5813
  }
5814
+ async function callUnknownFunction(fn, args) {
5815
+ if (typeof fn !== "function") throw new Error("Expected a function");
5816
+ return await Reflect.apply(fn, void 0, args);
5817
+ }
5818
+ function assignDerivedOutputs(params) {
5819
+ for (const [key, value] of Object.entries(params.derived)) {
5820
+ if (key in params.outputs) continue;
5821
+ params.outputs[key] = value;
5822
+ }
5823
+ }
5824
+ async function resolveDeriveFromTracingConfig(params) {
5825
+ const ctx = {
5826
+ trace: params.traceTree,
5827
+ input: params.evalCase.input,
5828
+ case: params.evalCase
5829
+ };
5830
+ if (typeof params.deriveFromTracing === "function") {
5831
+ const derived = await callUnknownFunction(params.deriveFromTracing, [ctx]);
5832
+ if (!isRecord(derived)) throw new Error("deriveFromTracing must return an object");
5833
+ return derived;
5834
+ }
5835
+ const derived = {};
5836
+ for (const [key, compute] of Object.entries(params.deriveFromTracing)) {
5837
+ const value = await callUnknownFunction(compute, [ctx]);
5838
+ if (value !== void 0) derived[key] = value;
5839
+ }
5840
+ return derived;
5841
+ }
5842
+ async function runDeriveFromTracingConfig(params) {
5843
+ if (params.deriveFromTracing === void 0) return;
5844
+ const { deriveFromTracing } = params;
5845
+ try {
5846
+ const derived = await runInExistingEvalScope(params.scope, "derive", async () => await resolveDeriveFromTracingConfig({
5847
+ deriveFromTracing,
5848
+ traceTree: params.traceTree,
5849
+ evalCase: params.evalCase
5850
+ }));
5851
+ assignDerivedOutputs({
5852
+ outputs: params.scope.outputs,
5853
+ derived
5854
+ });
5855
+ } catch (e) {
5856
+ const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
5857
+ params.scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
5858
+ }
5859
+ }
5642
5860
  async function runCase(params) {
5643
- const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, codeFingerprint, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
5861
+ const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, globalColumns, globalDeriveFromTracing, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
5644
5862
  const scopedIdPrefix = buildScopedEvalIdPrefix({
5645
5863
  evalId,
5646
5864
  evalFilePath,
@@ -5672,7 +5890,8 @@ async function runCase(params) {
5672
5890
  adapter: cacheAdapter,
5673
5891
  mode: cacheMode,
5674
5892
  evalId,
5675
- codeFingerprint
5893
+ read: evalDef.cache?.read,
5894
+ store: evalDef.cache?.store
5676
5895
  } : void 0,
5677
5896
  startTime: evalDef.startTime,
5678
5897
  freezeTime: evalDef.freezeTime
@@ -5685,22 +5904,19 @@ async function runCase(params) {
5685
5904
  const traceTree = buildTraceTree(spansWithDerivedAttributes, scope.checkpoints);
5686
5905
  const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
5687
5906
  if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) scope.assertionFailures.push(toAssertionFailure(executeError.message, executeError));
5688
- if (!nonAssertError && evalDef.deriveFromTracing) {
5689
- const { deriveFromTracing } = evalDef;
5690
- try {
5691
- const derived = await runInExistingEvalScope(scope, "derive", async () => {
5692
- return await callWithUnknownResult(deriveFromTracing, [{
5693
- trace: traceTree,
5694
- input: evalCase.input,
5695
- case: evalCase
5696
- }]);
5697
- });
5698
- if (!isRecord(derived)) throw new Error("deriveFromTracing must return an object");
5699
- for (const [key, value] of Object.entries(derived)) if (!(key in scope.outputs)) scope.outputs[key] = value;
5700
- } catch (e) {
5701
- const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
5702
- scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
5703
- }
5907
+ if (!nonAssertError) {
5908
+ await runDeriveFromTracingConfig({
5909
+ deriveFromTracing: globalDeriveFromTracing,
5910
+ scope,
5911
+ traceTree,
5912
+ evalCase
5913
+ });
5914
+ await runDeriveFromTracingConfig({
5915
+ deriveFromTracing: evalDef.deriveFromTracing,
5916
+ scope,
5917
+ traceTree,
5918
+ evalCase
5919
+ });
5704
5920
  }
5705
5921
  if (!nonAssertError) addDefaultOutputs({
5706
5922
  outputs: scope.outputs,
@@ -5717,7 +5933,7 @@ async function runCase(params) {
5717
5933
  ...scope.outputs,
5718
5934
  ...parsedOutputs.data
5719
5935
  };
5720
- else scope.assertionFailures.push(toAssertionFailure(formatOutputsSchemaError(parsedOutputs.error)));
5936
+ else scope.assertionFailures.push(toAssertionFailure(formatOutputsSchemaError(parsedOutputs.error), void 0, "OutputsSchemaError"));
5721
5937
  }
5722
5938
  const scoreResults = /* @__PURE__ */ new Map();
5723
5939
  const scoringTraces = {};
@@ -5740,7 +5956,8 @@ async function runCase(params) {
5740
5956
  adapter: cacheAdapter,
5741
5957
  mode: cacheMode,
5742
5958
  evalId: `${evalId}__score__${key}`,
5743
- codeFingerprint
5959
+ read: evalDef.cache?.read,
5960
+ store: evalDef.cache?.store
5744
5961
  } : void 0,
5745
5962
  startTime: scoreStartTime,
5746
5963
  freezeTime: evalDef.freezeTime
@@ -5795,6 +6012,7 @@ async function runCase(params) {
5795
6012
  const { trace: displayTrace, traceDisplay } = resolveTracePresentation(spansWithDerivedAttributes, globalTraceDisplay, evalDef.traceDisplay);
5796
6013
  const columns = {};
5797
6014
  const columnOverrides = mergeDefaultColumns({
6015
+ globalColumns,
5798
6016
  columns: evalDef.columns,
5799
6017
  globalRemove: globalRemoveDefaultConfig,
5800
6018
  evalRemove: evalDef.removeDefaultConfig
@@ -5858,14 +6076,17 @@ function formatOutputsSchemaError(error) {
5858
6076
  const issueLines = error.issues.map((issue) => {
5859
6077
  return `${issue.path.length > 0 ? issue.path.join(".") : "<root>"}: ${issue.message}`;
5860
6078
  });
5861
- if (issueLines.length === 0) return "outputsSchema validation failed";
5862
- return `outputsSchema validation failed:\n${issueLines.join("\n")}`;
6079
+ if (issueLines.length === 0) return "outputs did not match the configured schema";
6080
+ return issueLines.join("\n");
5863
6081
  }
5864
- function toAssertionFailure(message, error = void 0) {
5865
- return error?.stack ? {
6082
+ function toAssertionFailure(message, error = void 0, nameOverride = void 0) {
6083
+ const name = nameOverride ?? error?.name;
6084
+ const stack = error?.stack ? stripTerminalControlCodes(error.stack) : void 0;
6085
+ return {
6086
+ ...name !== void 0 ? { name } : {},
5866
6087
  message,
5867
- stack: stripTerminalControlCodes(error.stack)
5868
- } : { message };
6088
+ ...stack !== void 0 ? { stack } : {}
6089
+ };
5869
6090
  }
5870
6091
  //#endregion
5871
6092
  //#region ../runner/src/runQueue.ts
@@ -6095,15 +6316,15 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6095
6316
  const apiCallsConfig = resolveApiCallsConfig(config.apiCalls);
6096
6317
  for (const evalMeta of targetEvals) {
6097
6318
  const evalFilePath = evalMeta.sourceFilePath;
6098
- let codeFingerprint = "";
6319
+ let sourceFingerprint = "";
6099
6320
  try {
6100
- codeFingerprint = getSourceFingerprint(await readFile(evalFilePath, "utf-8"));
6321
+ sourceFingerprint = getSourceFingerprint(await readFile(evalFilePath, "utf-8"));
6101
6322
  } catch {
6102
- codeFingerprint = "";
6323
+ sourceFingerprint = "";
6103
6324
  }
6104
- if (codeFingerprint.length > 0) {
6105
- runState.manifest.evalSourceFingerprints[evalMeta.key] = codeFingerprint;
6106
- evalMeta.sourceFingerprint = codeFingerprint;
6325
+ if (sourceFingerprint.length > 0) {
6326
+ runState.manifest.evalSourceFingerprints[evalMeta.key] = sourceFingerprint;
6327
+ evalMeta.sourceFingerprint = sourceFingerprint;
6107
6328
  } else {
6108
6329
  delete runState.manifest.evalSourceFingerprints[evalMeta.key];
6109
6330
  evalMeta.sourceFingerprint = null;
@@ -6112,7 +6333,7 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6112
6333
  const registry = getEvalRegistry();
6113
6334
  await runWithModuleIsolation(moduleIsolation, async () => {
6114
6335
  await runInEvalRuntimeScope("env", async () => {
6115
- await loadEvalModule(evalFilePath, codeFingerprint);
6336
+ await loadEvalModule(evalFilePath, sourceFingerprint);
6116
6337
  });
6117
6338
  });
6118
6339
  const entry = registry.get(evalMeta.id);
@@ -6136,6 +6357,8 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6136
6357
  runState.summary.totalCases += cases.length;
6137
6358
  const defaultConfig = resolveEvalDefaultConfig({
6138
6359
  evalDef,
6360
+ globalColumns: config.columns,
6361
+ globalStats: config.stats,
6139
6362
  globalRemove: config.removeDefaultConfig
6140
6363
  });
6141
6364
  const declaredColumnDefs = buildDeclaredColumnDefs(defaultConfig.columns, evalDef.scores, evalDef.manualScores);
@@ -6181,6 +6404,8 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6181
6404
  evalKey: evalMeta.key,
6182
6405
  evalCase,
6183
6406
  globalTraceDisplay,
6407
+ globalColumns: config.columns,
6408
+ globalDeriveFromTracing: config.deriveFromTracing,
6184
6409
  llmCallsConfig,
6185
6410
  apiCallsConfig,
6186
6411
  globalRemoveDefaultConfig: config.removeDefaultConfig,
@@ -6188,7 +6413,6 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6188
6413
  startTime,
6189
6414
  cacheAdapter: bufferedCacheStore ?? (cacheEnabled ? cacheStore : null),
6190
6415
  cacheMode,
6191
- codeFingerprint,
6192
6416
  moduleIsolation,
6193
6417
  evalFilePath,
6194
6418
  evalFileRelativePath: evalMeta.filePath,
@@ -6343,4 +6567,4 @@ function toLastRunStatus(status) {
6343
6567
  return status === "pending" ? null : status;
6344
6568
  }
6345
6569
  //#endregion
6346
- export { llmCallsConfigSchema as $, traceSpanKindSchema as $t, extractApiCalls as A, getCurrentScope as An, evalChartTypeSchema as At, runSummarySchema as B, setEvalOutput as Bn, cacheRecordingOpSchema as Bt, validateCharts as C, evalExpect as Cn, evalChartAggregateSchema as Ct, sseEnvelopeSchema as D, configureEvalRunLogs as Dn, evalChartConfigSchema as Dt, updateManualScoreRequestSchema as E, appendToEvalOutput as En, evalChartColorSchema as Et, getEvalDisplayStatus as F, mergeEvalOutput as Fn, cacheEntryWithDebugKeySchema as Ft, apiCallMetricPlacementSchema as G, traceCacheRefSchema as Gt, DEFAULT_LLM_CALLS_CONFIG as H, startEvalBackgroundJob as Hn, cacheStatusSchema as Ht, deriveScopedSummaryFromCases as I, nextEvalId as In, cacheFileSchema as It, defaultConfigKeySchema as J, traceAttributeDisplayPlacementSchema as Jt, apiCallMetricSchema as K, traceAttributeDisplayFormatSchema as Kt, deriveStatusFromCaseRows as L, runInEvalRuntimeScope as Ln, cacheListItemSchema as Lt, applyDerivedCallAttributes as M, getEvalStartTime as Mn, cacheDebugKeyEntrySchema as Mt, getNestedAttribute as N, incrementEvalOutput as Nn, cacheDebugKeyFileSchema as Nt, extractCacheEntries as O, evalAssert as On, evalChartMetricSchema as Ot, getEvalTitle as P, isInEvalScope as Pn, cacheEntrySchema as Pt, llmCallPricingSchema as Q, traceSpanErrorSchema as Qt, deriveStatusFromChildStatuses as R, runInEvalScope as Rn, cacheModeSchema as Rt, normalizeScoreDef as S, repoFile as Sn, scoreTraceSchema as St, createRunRequestSchema as T, advanceEvalTime as Tn, evalChartBuiltinMetricSchema as Tt, agentEvalsConfigSchema as U, defineEval as Un, serializedCacheSpanSchema as Ut, DEFAULT_API_CALLS_CONFIG as V, setScopeCacheContext as Vn, cacheRecordingSchema as Vt, apiCallMetricFormatSchema as W, getEvalRegistry as Wn, spanCacheOptionsSchema as Wt, llmCallMetricPlacementSchema as X, traceDisplayConfigSchema as Xt, llmCallMetricFormatSchema as Y, traceAttributeDisplaySchema as Yt, llmCallMetricSchema as Z, traceDisplayInputConfigSchema as Zt, loadEvalModule as _, hashCacheKeySync as _n, evalSummarySchema as _t, getLastRunStatuses as a, columnKindSchema as an, buildCaseKey as at, loadConfig as b, serializeCacheRecording as bn, runLogLocationSchema as bt, loadPersistedRunSnapshots as c, numberDisplayOptionsSchema as cn, getCaseRowEvalKey as ct, persistRunState as d, z$1 as dn, caseRowSchema as dt, traceSpanSchema as en, removeDefaultConfigSchema as et, recomputeEvalStatusesInRuns as f, buildTraceTree as fn, discoveryIssueSchema as ft, deriveEvalFreshness as g, hashCacheKey as gn, evalStatsConfigSchema as gt, resolveArtifactPath as h, evalTracer as hn, evalStatItemSchema as ht, generateRunId as i, columnFormatSchema as in, trialSelectionModeSchema as it, extractLlmCalls as j, getEvalCaseInput as jn, evalChartsConfigSchema as jt, extractCacheHits as k, evalLog as kn, evalChartTooltipExtraSchema as kt, nextShortIdFromSnapshots as l, repoFileRefSchema as ln, assertionFailureSchema as lt, runTouchesEval as m, evalSpan as mn, evalStatAggregateSchema as mt, getTargetEvalKeys as n, cellValueSchema as nn, resolveLlmCallsConfig as nt, getLatestRunInfos as o, fileRefSchema as on, buildEvalKey as ot, recomputePersistedCaseStatus as p, captureEvalSpanError as pn, evalFreshnessStatusSchema as pt, apiCallsConfigSchema as q, traceAttributeDisplayInputSchema as qt, getTargetEvals as r, columnDefSchema as rn, runLogsConfigSchema as rt, loadPersistedRunSnapshot as s, jsonCellSchema as sn, getCaseRowCaseKey as st, executeRun as t, traceSpanWarningSchema as tn, resolveApiCallsConfig as tt, persistCaseDetail as u, runArtifactRefSchema as un, caseDetailSchema as ut, parseEvalDiscovery as v, deserializeCacheRecording as vn, runLogEntrySchema as vt, createFsCacheStore as w, EvalAssertionError as wn, evalChartAxisSchema as wt, buildDeclaredColumnDefs as x, serializeCacheValue as xn, runLogPhaseSchema as xt, resolveEvalDefaultConfig as y, deserializeCacheValue as yn, runLogLevelSchema as yt, runManifestSchema as z, runInExistingEvalScope as zn, cacheOperationTypeSchema as zt };
6570
+ export { llmCallMetricFormatSchema as $, traceAttributeDisplayPlacementSchema as $t, extractCacheHits as A, advanceEvalTime as An, evalChartBuiltinMetricSchema as At, runManifestSchema as B, mergeEvalOutput as Bn, cacheEntryWithDebugKeySchema as Bt, normalizeScoreDef as C, deserializeCacheRecording as Cn, runLogEntrySchema as Ct, updateManualScoreRequestSchema as D, repoFile as Dn, scoreTraceSchema as Dt, createRunRequestSchema as E, serializeCacheValue as En, runLogPhaseSchema as Et, getEvalTitle as F, getCurrentScope as Fn, evalChartTypeSchema as Ft, apiCallMetricFormatSchema as G, setEvalOutput as Gn, cacheRecordingOpSchema as Gt, DEFAULT_API_CALLS_CONFIG as H, runInEvalRuntimeScope as Hn, cacheListItemSchema as Ht, getEvalDisplayStatus as I, getEvalCaseInput as In, evalChartsConfigSchema as It, apiCallsConfigSchema as J, defineEval as Jn, serializedCacheSpanSchema as Jt, apiCallMetricPlacementSchema as K, setScopeCacheContext as Kn, cacheRecordingSchema as Kt, deriveScopedSummaryFromCases as L, getEvalStartTime as Ln, cacheDebugKeyEntrySchema as Lt, extractLlmCalls as M, configureEvalRunLogs as Mn, evalChartConfigSchema as Mt, applyDerivedCallAttributes as N, evalAssert as Nn, evalChartMetricSchema as Nt, sseEnvelopeSchema as O, evalExpect as On, evalChartAggregateSchema as Ot, getNestedAttribute as P, evalLog as Pn, evalChartTooltipExtraSchema as Pt, evalDeriveConfigSchema as Q, traceAttributeDisplayInputSchema as Qt, deriveStatusFromCaseRows as R, incrementEvalOutput as Rn, cacheDebugKeyFileSchema as Rt, buildDeclaredColumnDefs as S, hashCacheKeySync as Sn, evalSummarySchema as St, createFsCacheStore as T, serializeCacheRecording as Tn, runLogLocationSchema as Tt, DEFAULT_LLM_CALLS_CONFIG as U, runInEvalScope as Un, cacheModeSchema as Ut, runSummarySchema as V, nextEvalId as Vn, cacheFileSchema as Vt, agentEvalsConfigSchema as W, runInExistingEvalScope as Wn, cacheOperationTypeSchema as Wt, evalColumnOverrideSchema as X, traceCacheRefSchema as Xt, defaultConfigKeySchema as Y, getEvalRegistry as Yn, spanCacheOptionsSchema as Yt, evalColumnsSchema as Z, traceAttributeDisplayFormatSchema as Zt, deriveEvalFreshness as _, buildTraceTree as _n, discoveryIssueSchema as _t, getLastRunStatuses as a, traceSpanSchema as an, removeDefaultConfigSchema as at, resolveEvalDefaultConfig as b, evalTracer as bn, evalStatItemSchema as bt, loadPersistedRunSnapshots as c, columnDefSchema as cn, runLogsConfigSchema as ct, persistRunState as d, fileRefSchema as dn, buildEvalKey as dt, traceAttributeDisplaySchema as en, llmCallMetricPlacementSchema as et, recomputeEvalStatusesInRuns as f, jsonCellSchema as fn, getCaseRowCaseKey as ft, resolveArtifactPath as g, z$1 as gn, caseRowSchema as gt, resolveTracePresentation as h, runArtifactRefSchema as hn, caseDetailSchema as ht, generateRunId as i, traceSpanKindSchema as in, llmCallsConfigSchema as it, extractApiCalls as j, appendToEvalOutput as jn, evalChartColorSchema as jt, extractCacheEntries as k, EvalAssertionError as kn, evalChartAxisSchema as kt, nextShortIdFromSnapshots as l, columnFormatSchema as ln, trialSelectionModeSchema as lt, runTouchesEval as m, repoFileRefSchema as mn, assertionFailureSchema as mt, getTargetEvalKeys as n, traceDisplayInputConfigSchema as nn, llmCallPricingRateSchema as nt, getLatestRunInfos as o, traceSpanWarningSchema as on, resolveApiCallsConfig as ot, recomputePersistedCaseStatus as p, numberDisplayOptionsSchema as pn, getCaseRowEvalKey as pt, apiCallMetricSchema as q, startEvalBackgroundJob as qn, cacheStatusSchema as qt, getTargetEvals as r, traceSpanErrorSchema as rn, llmCallPricingSchema as rt, loadPersistedRunSnapshot as s, cellValueSchema as sn, resolveLlmCallsConfig as st, executeRun as t, traceDisplayConfigSchema as tn, llmCallMetricSchema as tt, persistCaseDetail as u, columnKindSchema as un, buildCaseKey as ut, loadEvalModule as v, captureEvalSpanError as vn, evalFreshnessStatusSchema as vt, validateCharts as w, deserializeCacheValue as wn, runLogLevelSchema as wt, loadConfig as x, hashCacheKey as xn, evalStatsConfigSchema as xt, parseEvalDiscovery as y, evalSpan as yn, evalStatAggregateSchema as yt, deriveStatusFromChildStatuses as z, isInEvalScope as zn, cacheEntrySchema as zt };