@ls-stack/agent-eval 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@ import { createRequire, registerHooks } from "node:module";
2
2
  import { createHash } from "node:crypto";
3
3
  import { mkdir, readFile, readdir, rename, rm, stat, writeFile } from "node:fs/promises";
4
4
  import { extname, isAbsolute, join, relative, resolve } from "node:path";
5
- import { formatWithOptions, isDeepStrictEqual } from "node:util";
5
+ import { formatWithOptions, isDeepStrictEqual, stripVTControlCharacters } from "node:util";
6
6
  import { AsyncLocalStorage } from "node:async_hooks";
7
7
  import { z, z as z$1 } from "zod/v4";
8
8
  import { Buffer as Buffer$1 } from "node:buffer";
@@ -29,6 +29,19 @@ function defineEval(definition) {
29
29
  });
30
30
  }
31
31
  //#endregion
32
+ //#region ../sdk/src/stackFormatting.ts
33
+ const orphanedAnsiSgrPattern$1 = /\[(?:\d{1,3}(?:;\d{1,3})*)?m/g;
34
+ /**
35
+ * Remove terminal styling control codes from captured stack text.
36
+ *
37
+ * Some stack providers add ANSI SGR codes for terminal output. Persisted eval
38
+ * artifacts are rendered in the web UI, so stacks should be stored as plain
39
+ * text.
40
+ */
41
+ function stripTerminalControlCodes$1(value) {
42
+ return stripVTControlCharacters(value).replaceAll(orphanedAnsiSgrPattern$1, "");
43
+ }
44
+ //#endregion
32
45
  //#region ../sdk/src/runtime.ts
33
46
  const scopeStorage = new AsyncLocalStorage();
34
47
  const runtimeScopeStorage = new AsyncLocalStorage();
@@ -518,10 +531,13 @@ function recordOpIfActive(scope, op) {
518
531
  if (top) top.ops.push(op);
519
532
  }
520
533
  function toAssertionFailure$1(message, error = void 0) {
521
- return error?.stack ? {
534
+ const name = error?.name;
535
+ const stack = error?.stack ? stripTerminalControlCodes$1(error.stack) : void 0;
536
+ return {
537
+ ...name !== void 0 ? { name } : {},
522
538
  message,
523
- stack: error.stack
524
- } : { message };
539
+ ...stack !== void 0 ? { stack } : {}
540
+ };
525
541
  }
526
542
  /**
527
543
  * Record or replace an output value for the current case scope.
@@ -789,7 +805,8 @@ function repoFile(path, mimeType) {
789
805
  }
790
806
  //#endregion
791
807
  //#region ../sdk/src/cacheSerialization.ts
792
- const serializedCacheValueMarker = "__agentEvalsCacheSerialization";
808
+ const serializedCacheValueMarker = "__aecs";
809
+ const legacySerializedCacheValueMarker = "__agentEvalsCacheSerialization";
793
810
  const jsonSafeCacheValueVersion = "json-safe-v1";
794
811
  const packedNumberArrayMinLength = 128;
795
812
  const compressedStringMinBytes = 16 * 1024;
@@ -799,7 +816,7 @@ function isRecordLike$3(value) {
799
816
  return typeof value === "object" && value !== null && !Array.isArray(value);
800
817
  }
801
818
  function isJsonSafeSerializedCacheValue(value) {
802
- return isRecordLike$3(value) && value[serializedCacheValueMarker] === jsonSafeCacheValueVersion && typeof value.type === "string";
819
+ return isRecordLike$3(value) && serializationMarkerValue(value) === jsonSafeCacheValueVersion && typeof value.type === "string";
803
820
  }
804
821
  function jsonSafeValue(type, value) {
805
822
  return value === void 0 ? {
@@ -812,32 +829,39 @@ function jsonSafeValue(type, value) {
812
829
  };
813
830
  }
814
831
  function hasSerializationMarkerKey(value) {
815
- return Object.hasOwn(value, serializedCacheValueMarker);
832
+ return Object.hasOwn(value, serializedCacheValueMarker) || Object.hasOwn(value, legacySerializedCacheValueMarker);
833
+ }
834
+ function serializationMarkerValue(value) {
835
+ return value[serializedCacheValueMarker] ?? value[legacySerializedCacheValueMarker];
816
836
  }
817
837
  /**
818
838
  * Serialize one cached value while keeping plain JSON as plain JSON.
819
839
  *
820
- * Rich runtime values use small tagged wrappers.
840
+ * Rich runtime values use small tagged wrappers. Undefined values are omitted
841
+ * by default; pass `preserveUndefined: true` to round-trip them explicitly.
821
842
  */
822
- async function serializeCacheValue(value) {
823
- return serializeJsonSafeValue(value, /* @__PURE__ */ new WeakSet(), 0);
843
+ async function serializeCacheValue(value, options = void 0) {
844
+ return serializeJsonSafeValue(value, /* @__PURE__ */ new WeakSet(), 0, normalizeCacheSerializationOptions(options));
824
845
  }
825
846
  /** Revive one cached value, while preserving legacy JSON-round-tripped data. */
826
847
  function deserializeCacheValue(value) {
827
848
  return deserializeJsonSafeValue(value);
828
849
  }
829
850
  /** Clone one value through the same serialization path used for cache data. */
830
- async function cloneCacheValue(value) {
831
- return deserializeCacheValue(await serializeCacheValue(value));
851
+ async function cloneCacheValue(value, options = void 0) {
852
+ return deserializeCacheValue(await serializeCacheValue(value, options));
853
+ }
854
+ function normalizeCacheSerializationOptions(options) {
855
+ return { preserveUndefined: options?.preserveUndefined === true };
832
856
  }
833
- async function serializeJsonSafeValue(value, refs, depth) {
834
- if (value === void 0) return jsonSafeValue("Undefined");
857
+ async function serializeJsonSafeValue(value, refs, depth, config) {
858
+ if (value === void 0) return config.preserveUndefined ? jsonSafeValue("Undefined") : void 0;
835
859
  if (typeof value === "bigint") return jsonSafeValue("BigInt", value.toString());
836
860
  if (typeof value === "number") return serializeNumber(value);
837
861
  if (typeof value === "string") return serializeString(value, depth);
838
862
  if (value instanceof Date) return jsonSafeValue("Date", value.toISOString());
839
- if (value instanceof Map) return serializeMap(value, refs, depth);
840
- if (value instanceof Set) return serializeSet(value, refs, depth);
863
+ if (value instanceof Map) return serializeMap(value, refs, depth, config);
864
+ if (value instanceof Set) return serializeSet(value, refs, depth, config);
841
865
  if (value instanceof RegExp) return jsonSafeValue("RegExp", {
842
866
  flags: value.flags,
843
867
  source: value.source
@@ -856,7 +880,7 @@ async function serializeJsonSafeValue(value, refs, depth) {
856
880
  type: value.type
857
881
  });
858
882
  if (value instanceof ArrayBuffer) return jsonSafeValue("ArrayBuffer", bytesToBase64(new Uint8Array(value)));
859
- if (value instanceof Error) return serializeError(value, refs, depth);
883
+ if (value instanceof Error) return serializeError(value, refs, depth, config);
860
884
  if (!value || typeof value !== "object") return value;
861
885
  if (refs.has(value)) throw new Error("Circular cache values are not supported");
862
886
  refs.add(value);
@@ -869,12 +893,18 @@ async function serializeJsonSafeValue(value, refs, depth) {
869
893
  }
870
894
  }
871
895
  const items = [];
872
- for (const item of value) items.push(await serializeJsonSafeValue(item, refs, depth + 1));
896
+ for (const item of value) {
897
+ const serializedItem = await serializeJsonSafeValue(item, refs, depth + 1, config);
898
+ if (serializedItem !== void 0) items.push(serializedItem);
899
+ }
873
900
  refs.delete(value);
874
901
  return compressNestedJsonValue(items, depth) ?? items;
875
902
  }
876
903
  const entries = [];
877
- for (const [key, entryValue] of Object.entries(value)) entries.push([key, await serializeJsonSafeValue(entryValue, refs, depth + 1)]);
904
+ for (const [key, entryValue] of Object.entries(value)) {
905
+ const serializedEntryValue = await serializeJsonSafeValue(entryValue, refs, depth + 1, config);
906
+ if (serializedEntryValue !== void 0) entries.push([key, serializedEntryValue]);
907
+ }
878
908
  refs.delete(value);
879
909
  const serialized = hasSerializationMarkerKey(value) ? jsonSafeValue("Object", entries) : Object.fromEntries(entries);
880
910
  return compressNestedJsonValue(serialized, depth) ?? serialized;
@@ -944,32 +974,40 @@ function compressNestedJsonValue(value, depth) {
944
974
  function compressionIsWorthIt(value, rawSize) {
945
975
  return Buffer$1.byteLength(JSON.stringify(value)) < rawSize * maxCompressedSizeRatio;
946
976
  }
947
- async function serializeMap(value, refs, depth) {
977
+ async function serializeMap(value, refs, depth, config) {
948
978
  if (refs.has(value)) throw new Error("Circular cache values are not supported");
949
979
  refs.add(value);
950
980
  const entries = [];
951
- for (const [key, entryValue] of value.entries()) entries.push([await serializeJsonSafeValue(key, refs, depth + 1), await serializeJsonSafeValue(entryValue, refs, depth + 1)]);
981
+ for (const [key, entryValue] of value.entries()) {
982
+ const serializedKey = await serializeJsonSafeValue(key, refs, depth + 1, config);
983
+ const serializedEntryValue = await serializeJsonSafeValue(entryValue, refs, depth + 1, config);
984
+ if (serializedKey !== void 0 && serializedEntryValue !== void 0) entries.push([serializedKey, serializedEntryValue]);
985
+ }
952
986
  refs.delete(value);
953
987
  return jsonSafeValue("Map", entries);
954
988
  }
955
- async function serializeSet(value, refs, depth) {
989
+ async function serializeSet(value, refs, depth, config) {
956
990
  if (refs.has(value)) throw new Error("Circular cache values are not supported");
957
991
  refs.add(value);
958
992
  const items = [];
959
- for (const item of value.values()) items.push(await serializeJsonSafeValue(item, refs, depth + 1));
993
+ for (const item of value.values()) {
994
+ const serializedItem = await serializeJsonSafeValue(item, refs, depth + 1, config);
995
+ if (serializedItem !== void 0) items.push(serializedItem);
996
+ }
960
997
  refs.delete(value);
961
998
  return jsonSafeValue("Set", items);
962
999
  }
963
- async function serializeError(value, refs, depth) {
1000
+ async function serializeError(value, refs, depth, config) {
964
1001
  if (refs.has(value)) throw new Error("Circular cache values are not supported");
965
1002
  refs.add(value);
966
1003
  const props = [];
967
1004
  for (const [key, entryValue] of Object.entries(value)) {
968
1005
  if (key === "cause") continue;
969
- props.push([key, await serializeJsonSafeValue(entryValue, refs, depth + 1)]);
1006
+ const serializedEntryValue = await serializeJsonSafeValue(entryValue, refs, depth + 1, config);
1007
+ if (serializedEntryValue !== void 0) props.push([key, serializedEntryValue]);
970
1008
  }
971
1009
  const serialized = jsonSafeValue("Error", {
972
- cause: "cause" in value ? await serializeJsonSafeValue(value.cause, refs, depth + 1) : void 0,
1010
+ cause: "cause" in value ? await serializeJsonSafeValue(value.cause, refs, depth + 1, config) : void 0,
973
1011
  message: value.message,
974
1012
  name: value.name,
975
1013
  props,
@@ -1110,33 +1148,36 @@ function deserializeError(value) {
1110
1148
  });
1111
1149
  return error;
1112
1150
  }
1113
- async function serializeRecordValues(record) {
1151
+ async function serializeRecordValues(record, config) {
1114
1152
  const entries = [];
1115
- for (const [key, value] of Object.entries(record)) entries.push([key, await serializeCacheValue(value)]);
1153
+ for (const [key, value] of Object.entries(record)) {
1154
+ const serializedValue = await serializeCacheValue(value, config);
1155
+ if (serializedValue !== void 0) entries.push([key, serializedValue]);
1156
+ }
1116
1157
  return Object.fromEntries(entries);
1117
1158
  }
1118
1159
  function deserializeRecordValues(record) {
1119
1160
  return Object.fromEntries(Object.entries(record).map(([key, value]) => [key, deserializeCacheValue(value)]));
1120
1161
  }
1121
- async function serializeCacheRecordingOp(op) {
1162
+ async function serializeCacheRecordingOp(op, config) {
1122
1163
  switch (op.kind) {
1123
1164
  case "setOutput":
1124
1165
  case "appendOutput": return {
1125
1166
  ...op,
1126
- value: await serializeCacheValue(op.value)
1167
+ value: await serializeCacheValue(op.value, config)
1127
1168
  };
1128
1169
  case "mergeOutput": return {
1129
1170
  ...op,
1130
- patch: await serializeRecordValues(op.patch)
1171
+ patch: await serializeRecordValues(op.patch, config)
1131
1172
  };
1132
1173
  case "incrementOutput": return op;
1133
1174
  case "checkpoint": return {
1134
1175
  ...op,
1135
- data: await serializeCacheValue(op.data)
1176
+ data: await serializeCacheValue(op.data, config)
1136
1177
  };
1137
1178
  case "subSpan": return {
1138
1179
  ...op,
1139
- span: await serializeCacheSpan(op.span)
1180
+ span: await serializeCacheSpan(op.span, config)
1140
1181
  };
1141
1182
  }
1142
1183
  }
@@ -1162,11 +1203,11 @@ function deserializeCacheRecordingOp(op) {
1162
1203
  };
1163
1204
  }
1164
1205
  }
1165
- async function serializeCacheSpan(span) {
1206
+ async function serializeCacheSpan(span, config) {
1166
1207
  return {
1167
1208
  ...span,
1168
- attributes: span.attributes === void 0 ? void 0 : await serializeRecordValues(span.attributes),
1169
- children: await Promise.all(span.children.map(serializeCacheSpan))
1209
+ attributes: span.attributes === void 0 ? void 0 : await serializeRecordValues(span.attributes, config),
1210
+ children: await Promise.all(span.children.map((child) => serializeCacheSpan(child, config)))
1170
1211
  };
1171
1212
  }
1172
1213
  function deserializeCacheSpan(span) {
@@ -1176,13 +1217,19 @@ function deserializeCacheSpan(span) {
1176
1217
  children: span.children.map(deserializeCacheSpan)
1177
1218
  };
1178
1219
  }
1179
- /** Serialize all rich values captured in a cache recording before persistence. */
1180
- async function serializeCacheRecording(recording) {
1220
+ /**
1221
+ * Serialize all rich values captured in a cache recording before persistence.
1222
+ *
1223
+ * Undefined values are omitted by default; pass `preserveUndefined: true` to
1224
+ * retain the legacy explicit undefined wrappers in the recording payload.
1225
+ */
1226
+ async function serializeCacheRecording(recording, options = void 0) {
1227
+ const config = normalizeCacheSerializationOptions(options);
1181
1228
  return {
1182
1229
  ...recording,
1183
- returnValue: await serializeCacheValue(recording.returnValue),
1184
- finalAttributes: await serializeRecordValues(recording.finalAttributes),
1185
- ops: await Promise.all(recording.ops.map(serializeCacheRecordingOp))
1230
+ returnValue: await serializeCacheValue(recording.returnValue, config),
1231
+ finalAttributes: await serializeRecordValues(recording.finalAttributes, config),
1232
+ ops: await Promise.all(recording.ops.map((op) => serializeCacheRecordingOp(op, config)))
1186
1233
  };
1187
1234
  }
1188
1235
  /** Revive all rich values captured in a cache recording after lookup. */
@@ -1574,7 +1621,9 @@ function createTraceCache(generateSpanId) {
1574
1621
  key: info.key
1575
1622
  }, { serializeFileBytes: info.serializeFileBytes === true });
1576
1623
  const activeSpan = scope.activeSpanStack.at(-1);
1577
- if (cacheCtx.mode === "use") {
1624
+ const canRead = cacheCtx.mode === "use" && cacheCtx.read !== false;
1625
+ const canStore = cacheCtx.mode !== "bypass" && cacheCtx.store !== false;
1626
+ if (canRead) {
1578
1627
  const hit = await cacheCtx.adapter.lookup(namespace, keyHash);
1579
1628
  if (hit) {
1580
1629
  const storedAt = hit.storedAt;
@@ -1597,14 +1646,24 @@ function createTraceCache(generateSpanId) {
1597
1646
  name: info.name,
1598
1647
  namespace,
1599
1648
  key: keyHash,
1600
- status: "miss"
1649
+ status: "miss",
1650
+ ...canStore ? {} : { stored: false }
1601
1651
  });
1602
- } else if (cacheCtx.mode === "refresh") recordCacheRef(scope, activeSpan, {
1652
+ } else if (cacheCtx.mode === "use" && canStore) recordCacheRef(scope, activeSpan, {
1653
+ type: "value",
1654
+ name: info.name,
1655
+ namespace,
1656
+ key: keyHash,
1657
+ status: "miss",
1658
+ read: false
1659
+ });
1660
+ else if (cacheCtx.mode === "refresh") recordCacheRef(scope, activeSpan, {
1603
1661
  type: "value",
1604
1662
  name: info.name,
1605
1663
  namespace,
1606
1664
  key: keyHash,
1607
- status: "refresh"
1665
+ status: "refresh",
1666
+ ...canStore ? {} : { stored: false }
1608
1667
  });
1609
1668
  else recordCacheRef(scope, activeSpan, {
1610
1669
  type: "value",
@@ -1627,7 +1686,7 @@ function createTraceCache(generateSpanId) {
1627
1686
  scope.recordingStack.pop();
1628
1687
  }
1629
1688
  appendSubSpanOps(scope, frame);
1630
- if (cacheCtx.mode !== "bypass") {
1689
+ if (canStore) {
1631
1690
  const finalAttributes = diffNonCacheAttributes(beforeAttributes, await snapshotNonCacheAttributes(activeSpan));
1632
1691
  const recording = {
1633
1692
  returnValue: bodyResult,
@@ -1641,13 +1700,11 @@ function createTraceCache(generateSpanId) {
1641
1700
  operationType: "value",
1642
1701
  operationName: info.name,
1643
1702
  storedAt: new Date(getRealDateNowMs()).toISOString(),
1644
- codeFingerprint: cacheCtx.codeFingerprint,
1645
1703
  recording: await serializeCacheRecording(recording)
1646
1704
  }, {
1647
1705
  rawKey: info.key,
1648
1706
  operationType: "value",
1649
- operationName: info.name,
1650
- codeFingerprint: cacheCtx.codeFingerprint
1707
+ operationName: info.name
1651
1708
  });
1652
1709
  }
1653
1710
  return bodyResult;
@@ -2018,11 +2075,13 @@ async function traceSpanInternal(info, fn) {
2018
2075
  namespace,
2019
2076
  key: cacheOpts.key
2020
2077
  }, { serializeFileBytes: cacheOpts.serializeFileBytes === true });
2078
+ const canRead = ctx.mode === "use" && ctx.read !== false;
2079
+ const canStore = ctx.mode !== "bypass" && ctx.store !== false;
2021
2080
  mergeSpanAttributes(spanRecord, {
2022
2081
  "cache.key": keyHash,
2023
2082
  "cache.namespace": namespace
2024
2083
  });
2025
- if (ctx.mode === "use") {
2084
+ if (canRead) {
2026
2085
  const hit = await ctx.adapter.lookup(namespace, keyHash);
2027
2086
  if (hit) {
2028
2087
  const storedAt = hit.storedAt;
@@ -2037,8 +2096,18 @@ async function traceSpanInternal(info, fn) {
2037
2096
  spanRecord.endedAt = addElapsedMsToTimestamp(spanRecord.startedAt, getRealDateNowMs() - realStartedAt);
2038
2097
  return recording.returnValue;
2039
2098
  }
2040
- mergeSpanAttributes(spanRecord, { "cache.status": "miss" });
2041
- } else if (ctx.mode === "refresh") mergeSpanAttributes(spanRecord, { "cache.status": "refresh" });
2099
+ mergeSpanAttributes(spanRecord, {
2100
+ "cache.status": "miss",
2101
+ ...canStore ? {} : { "cache.stored": false }
2102
+ });
2103
+ } else if (ctx.mode === "use" && canStore) mergeSpanAttributes(spanRecord, {
2104
+ "cache.status": "miss",
2105
+ "cache.read": false
2106
+ });
2107
+ else if (ctx.mode === "refresh") mergeSpanAttributes(spanRecord, {
2108
+ "cache.status": "refresh",
2109
+ ...canStore ? {} : { "cache.stored": false }
2110
+ });
2042
2111
  else mergeSpanAttributes(spanRecord, { "cache.status": "bypass" });
2043
2112
  const frame = {
2044
2113
  baseSpanIndex: scope.spans.length,
@@ -2054,7 +2123,7 @@ async function traceSpanInternal(info, fn) {
2054
2123
  }
2055
2124
  appendSubSpanOps(scope, frame);
2056
2125
  finishSpanWithoutThrownError(spanRecord, realStartedAt);
2057
- if (ctx.mode !== "bypass") {
2126
+ if (canStore) {
2058
2127
  const recording = {
2059
2128
  returnValue: bodyResult,
2060
2129
  finalAttributes: stripCacheAttributes(spanRecord.attributes),
@@ -2074,14 +2143,12 @@ async function traceSpanInternal(info, fn) {
2074
2143
  spanName: info.name,
2075
2144
  spanKind: info.kind,
2076
2145
  storedAt: new Date(getRealDateNowMs()).toISOString(),
2077
- codeFingerprint: ctx.codeFingerprint,
2078
2146
  recording: await serializeCacheRecording(recording)
2079
2147
  };
2080
2148
  await ctx.adapter.write(entry, {
2081
2149
  rawKey: cacheOpts.key,
2082
2150
  operationType: "span",
2083
- operationName: info.name,
2084
- codeFingerprint: ctx.codeFingerprint
2151
+ operationName: info.name
2085
2152
  });
2086
2153
  }
2087
2154
  return bodyResult;
@@ -2274,6 +2341,7 @@ const columnDefSchema = z.object({
2274
2341
  passThreshold: z.number().optional(),
2275
2342
  maxStars: z.number().int().min(2).optional(),
2276
2343
  hideInTable: z.boolean().optional(),
2344
+ hideIfNoValue: z.boolean().optional(),
2277
2345
  align: z.enum([
2278
2346
  "left",
2279
2347
  "center",
@@ -2417,6 +2485,10 @@ const traceCacheRefSchema = z.object({
2417
2485
  namespace: z.string(),
2418
2486
  key: z.string(),
2419
2487
  status: cacheStatusSchema,
2488
+ /** Whether this ref attempted to read from cache. Defaults to true. */
2489
+ read: z.boolean().optional(),
2490
+ /** Whether this ref wrote a persisted cache entry. Defaults to true for misses/refreshes. */
2491
+ stored: z.boolean().optional(),
2420
2492
  storedAt: z.string().optional(),
2421
2493
  age: z.number().optional()
2422
2494
  });
@@ -2429,7 +2501,6 @@ const cacheListItemSchema = z.object({
2429
2501
  spanName: z.string().optional(),
2430
2502
  spanKind: traceSpanKindSchema.optional(),
2431
2503
  storedAt: z.string(),
2432
- codeFingerprint: z.string(),
2433
2504
  sizeBytes: z.number()
2434
2505
  });
2435
2506
  /** Zod schema for `SerializedCacheSpan`, defined lazily for recursion. */
@@ -2511,7 +2582,6 @@ const cacheEntrySchema = z.object({
2511
2582
  spanName: z.string().optional(),
2512
2583
  spanKind: traceSpanKindSchema.optional(),
2513
2584
  storedAt: z.string(),
2514
- codeFingerprint: z.string(),
2515
2585
  recording: cacheRecordingSchema
2516
2586
  });
2517
2587
  /** Debug-only raw key metadata stored outside the reusable cache entry. */
@@ -2522,7 +2592,6 @@ const cacheDebugKeyEntrySchema = z.object({
2522
2592
  operationType: cacheOperationTypeSchema,
2523
2593
  operationName: z.string(),
2524
2594
  storedAt: z.string(),
2525
- codeFingerprint: z.string(),
2526
2595
  rawKey: z.unknown()
2527
2596
  });
2528
2597
  /** Cache lookup response with optional debug-only raw key data. */
@@ -2614,6 +2683,11 @@ const evalChartTooltipExtraSchema = z.discriminatedUnion("source", [z.object({
2614
2683
  const evalChartConfigSchema = z.object({
2615
2684
  /** Optional heading shown above the chart frame in the UI. */
2616
2685
  heading: z.string().optional(),
2686
+ /**
2687
+ * Hide this chart in the UI when none of its metrics has a numeric value in
2688
+ * the rendered history window.
2689
+ */
2690
+ hideIfNoValue: z.boolean().optional(),
2617
2691
  type: evalChartTypeSchema,
2618
2692
  /** At least one series must be declared. */
2619
2693
  metrics: z.array(evalChartMetricSchema).min(1),
@@ -2654,17 +2728,31 @@ const evalStatAggregateSchema = z.enum([
2654
2728
  "sum",
2655
2729
  "last"
2656
2730
  ]);
2731
+ const hideIfNoValueShape = {
2732
+ /**
2733
+ * Hide this stat in the UI when the current run has no displayable value.
2734
+ * Missing values, `null`, and empty strings count as no value; `0` remains
2735
+ * visible.
2736
+ */
2737
+ hideIfNoValue: z.boolean().optional() };
2657
2738
  /**
2658
2739
  * One entry in the EvalCard stats row. Built-in kinds use latest run totals;
2659
2740
  * `column` aggregates a score or numeric output column across the latest run.
2660
2741
  */
2661
2742
  const evalStatItemSchema = z.discriminatedUnion("kind", [
2662
- z.object({ kind: z.literal("cases") }),
2743
+ z.object({
2744
+ kind: z.literal("cases"),
2745
+ ...hideIfNoValueShape
2746
+ }),
2663
2747
  z.object({
2664
2748
  kind: z.literal("passRate"),
2665
- accent: z.boolean().optional()
2749
+ accent: z.boolean().optional(),
2750
+ ...hideIfNoValueShape
2751
+ }),
2752
+ z.object({
2753
+ kind: z.literal("duration"),
2754
+ ...hideIfNoValueShape
2666
2755
  }),
2667
- z.object({ kind: z.literal("duration") }),
2668
2756
  z.object({
2669
2757
  kind: z.literal("column"),
2670
2758
  key: z.string(),
@@ -2673,7 +2761,8 @@ const evalStatItemSchema = z.discriminatedUnion("kind", [
2673
2761
  format: columnFormatSchema.optional(),
2674
2762
  /** Number presentation options applied when `format: 'number'`. */
2675
2763
  numberFormat: numberDisplayOptionsSchema.optional(),
2676
- accent: z.boolean().optional()
2764
+ accent: z.boolean().optional(),
2765
+ ...hideIfNoValueShape
2677
2766
  })
2678
2767
  ]);
2679
2768
  /** Ordered list of stats rendered in the EvalCard stats row. */
@@ -2754,6 +2843,12 @@ const caseRowSchema = z.object({
2754
2843
  });
2755
2844
  /** Structured assertion failure metadata captured for one case run. */
2756
2845
  const assertionFailureSchema = z.object({
2846
+ /**
2847
+ * Error class or category label rendered alongside the message (e.g.
2848
+ * `EvalAssertionError`, `OutputsSchemaError`). Optional for legacy entries
2849
+ * and synthetic failures without an originating Error.
2850
+ */
2851
+ name: z.string().optional(),
2757
2852
  /** Human-readable assertion failure message shown in the UI and artifacts. */
2758
2853
  message: z.string(),
2759
2854
  /** Stack trace captured from the originating error when available. */
@@ -2902,6 +2997,25 @@ const defaultConfigKeySchema = z.enum([
2902
2997
  ]);
2903
2998
  /** Removal config for built-in eval-level outputs and UI metadata. */
2904
2999
  const removeDefaultConfigSchema = z.union([z.literal(true), z.array(defaultConfigKeySchema)]);
3000
+ const evalDeriveValueFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a derive output function" });
3001
+ /** Schema for keyed or object-returning trace-derived output config. */
3002
+ const evalDeriveConfigSchema = z.union([z.custom((value) => typeof value === "function", { message: "Expected a deriveFromTracing function" }), z.record(z.string().min(1), evalDeriveValueFnSchema)]);
3003
+ /** Schema for UI overrides on derived or scored columns. */
3004
+ const evalColumnOverrideSchema = z.object({
3005
+ label: z.string().optional(),
3006
+ format: columnFormatSchema.optional(),
3007
+ numberFormat: numberDisplayOptionsSchema.optional(),
3008
+ hideInTable: z.boolean().optional(),
3009
+ hideIfNoValue: z.boolean().optional(),
3010
+ align: z.enum([
3011
+ "left",
3012
+ "center",
3013
+ "right"
3014
+ ]).optional(),
3015
+ maxStars: z.number().int().min(2).optional()
3016
+ });
3017
+ /** Schema for column override maps keyed by output or score field name. */
3018
+ const evalColumnsSchema = z.record(z.string(), evalColumnOverrideSchema);
2905
3019
  /** Render formats supported by an LLM-call metric in the UI. */
2906
3020
  const llmCallMetricFormatSchema = z.enum([
2907
3021
  "string",
@@ -2979,18 +3093,9 @@ const apiCallMetricSchema = z.object({
2979
3093
  placements: z.array(apiCallMetricPlacementSchema).nonempty().optional()
2980
3094
  });
2981
3095
  /**
2982
- * Schema for one model/provider pricing entry used to derive LLM-call costs
2983
- * from token counts.
3096
+ * Schema for pricing rates used to derive LLM-call costs from token counts.
2984
3097
  */
2985
- const llmCallPricingSchema = z.object({
2986
- /** Exact model name read from the configured `attributes.model` path. */
2987
- model: z.string().min(1),
2988
- /**
2989
- * Optional provider discriminator read from `attributes.provider`. When set,
2990
- * the entry only applies to calls from that provider; provider-specific
2991
- * entries take precedence over generic entries for the same model.
2992
- */
2993
- provider: z.string().min(1).optional(),
3098
+ const llmCallPricingRateSchema = z.object({
2994
3099
  /** USD per one million non-cached input tokens. */
2995
3100
  inputUsdPerMillion: z.number().nonnegative().optional(),
2996
3101
  /** USD per one million output tokens. */
@@ -3004,6 +3109,23 @@ const llmCallPricingSchema = z.object({
3004
3109
  /** USD per one million reasoning tokens when reported separately. */
3005
3110
  reasoningUsdPerMillion: z.number().nonnegative().optional()
3006
3111
  });
3112
+ /**
3113
+ * Schema for one model's pricing config. The object key is the exact model
3114
+ * name. Use `providers` when a model has provider-specific rates in addition
3115
+ * to, or instead of, generic model rates.
3116
+ */
3117
+ const llmCallPricingSchema = llmCallPricingRateSchema.extend({
3118
+ /**
3119
+ * Optional provider discriminator read from `attributes.provider`. When set,
3120
+ * the top-level entry only applies to calls from that provider.
3121
+ */
3122
+ provider: z.string().min(1).optional(),
3123
+ /**
3124
+ * Provider-specific pricing for the model. Provider entries take precedence
3125
+ * over generic rates for the same model.
3126
+ */
3127
+ providers: z.record(z.string().min(1), llmCallPricingRateSchema).optional()
3128
+ });
3007
3129
  /** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
3008
3130
  const llmCallsConfigSchema = z.object({
3009
3131
  /** Span kinds treated as LLM calls. Defaults to `['llm']`. */
@@ -3040,10 +3162,10 @@ const llmCallsConfigSchema = z.object({
3040
3162
  */
3041
3163
  derivedAttributes: z.record(z.string().min(1), callDerivedAttributeSchema).optional(),
3042
3164
  /**
3043
- * Model/provider pricing registry used to calculate LLM-call costs from
3044
- * token counts. Built-in LLM cost fields are only derived from this registry.
3165
+ * Model-keyed pricing registry used to calculate LLM-call costs from token
3166
+ * counts. Built-in LLM cost fields are only derived from this registry.
3045
3167
  */
3046
- pricing: z.array(llmCallPricingSchema).optional(),
3168
+ pricing: z.record(z.string().min(1), llmCallPricingSchema).optional(),
3047
3169
  /** Custom user-defined metrics surfaced on each LLM call. */
3048
3170
  metrics: z.array(llmCallMetricSchema).optional()
3049
3171
  });
@@ -3159,6 +3281,33 @@ function resolveApiCallMetric(metric) {
3159
3281
  placements: metric.placements ? [...metric.placements] : ["body"]
3160
3282
  };
3161
3283
  }
3284
+ function hasPricingRates(pricing) {
3285
+ return pricing.inputUsdPerMillion !== void 0 || pricing.outputUsdPerMillion !== void 0 || pricing.cachedInputUsdPerMillion !== void 0 || pricing.cacheCreationInputUsdPerMillion !== void 0 || pricing.cacheCreationInput1hUsdPerMillion !== void 0 || pricing.reasoningUsdPerMillion !== void 0;
3286
+ }
3287
+ function copyPricingRates(pricing) {
3288
+ return {
3289
+ inputUsdPerMillion: pricing.inputUsdPerMillion,
3290
+ outputUsdPerMillion: pricing.outputUsdPerMillion,
3291
+ cachedInputUsdPerMillion: pricing.cachedInputUsdPerMillion,
3292
+ cacheCreationInputUsdPerMillion: pricing.cacheCreationInputUsdPerMillion,
3293
+ cacheCreationInput1hUsdPerMillion: pricing.cacheCreationInput1hUsdPerMillion,
3294
+ reasoningUsdPerMillion: pricing.reasoningUsdPerMillion
3295
+ };
3296
+ }
3297
+ function resolveLlmCallPricingEntries(model, pricing) {
3298
+ const entries = [];
3299
+ if (hasPricingRates(pricing)) entries.push({
3300
+ model,
3301
+ provider: pricing.provider,
3302
+ ...copyPricingRates(pricing)
3303
+ });
3304
+ for (const [provider, providerPricing] of Object.entries(pricing.providers ?? {})) entries.push({
3305
+ model,
3306
+ provider,
3307
+ ...copyPricingRates(providerPricing)
3308
+ });
3309
+ return entries;
3310
+ }
3162
3311
  /**
3163
3312
  * Resolve the user-authored LLM-calls config to a fully-defaulted shape used
3164
3313
  * by the UI to derive the LLM calls tab.
@@ -3169,7 +3318,7 @@ function resolveApiCallMetric(metric) {
3169
3318
  * - Missing `metrics[].format` defaults to `'string'`.
3170
3319
  * - Missing `metrics[].placements` defaults to `['body']`.
3171
3320
  * - Missing `pricing` defaults to an empty registry; built-in costs are only
3172
- * derived from configured pricing and token counts.
3321
+ * derived from configured model-keyed pricing and token counts.
3173
3322
  */
3174
3323
  function resolveLlmCallsConfig(input) {
3175
3324
  return {
@@ -3180,16 +3329,7 @@ function resolveLlmCallsConfig(input) {
3180
3329
  },
3181
3330
  derivedAttributes: resolveDerivedAttributes(input?.derivedAttributes),
3182
3331
  metrics: (input?.metrics ?? []).map(resolveLlmCallMetric),
3183
- pricing: (input?.pricing ?? []).map((p) => ({
3184
- model: p.model,
3185
- provider: p.provider,
3186
- inputUsdPerMillion: p.inputUsdPerMillion,
3187
- outputUsdPerMillion: p.outputUsdPerMillion,
3188
- cachedInputUsdPerMillion: p.cachedInputUsdPerMillion,
3189
- cacheCreationInputUsdPerMillion: p.cacheCreationInputUsdPerMillion,
3190
- cacheCreationInput1hUsdPerMillion: p.cacheCreationInput1hUsdPerMillion,
3191
- reasoningUsdPerMillion: p.reasoningUsdPerMillion
3192
- }))
3332
+ pricing: Object.entries(input?.pricing ?? {}).flatMap(([model, pricing]) => resolveLlmCallPricingEntries(model, pricing))
3193
3333
  };
3194
3334
  }
3195
3335
  /**
@@ -3223,6 +3363,9 @@ const agentEvalsConfigSchema = z.object({
3223
3363
  staleAfterDays: z.number().optional(),
3224
3364
  allowCliRunAll: z.boolean().optional(),
3225
3365
  traceDisplay: traceDisplayInputConfigSchema.optional(),
3366
+ columns: evalColumnsSchema.optional(),
3367
+ deriveFromTracing: evalDeriveConfigSchema.optional(),
3368
+ stats: evalStatsConfigSchema.optional(),
3226
3369
  llmCalls: llmCallsConfigSchema.optional(),
3227
3370
  removeDefaultConfig: removeDefaultConfigSchema.optional(),
3228
3371
  apiCalls: apiCallsConfigSchema.optional(),
@@ -3875,6 +4018,11 @@ function readNumber(attributes, key) {
3875
4018
  const value = attributes[key];
3876
4019
  return typeof value === "number" && Number.isFinite(value) ? value : void 0;
3877
4020
  }
4021
+ function readBoolean(attributes, key) {
4022
+ if (!isRecord$2(attributes)) return void 0;
4023
+ const value = attributes[key];
4024
+ return typeof value === "boolean" ? value : void 0;
4025
+ }
3878
4026
  function readArray(attributes, key) {
3879
4027
  if (!isRecord$2(attributes)) return [];
3880
4028
  const value = attributes[key];
@@ -3903,12 +4051,14 @@ function extractCacheEntries(spans, caseCacheRefs) {
3903
4051
  const namespace = readString(span.attributes, "cache.namespace");
3904
4052
  if (key !== void 0 && namespace !== void 0) {
3905
4053
  const isHit = status === "hit";
4054
+ const stored = isHit ? true : readBoolean(span.attributes, "cache.stored") !== false;
3906
4055
  entries.push({
3907
4056
  id: span.id,
3908
4057
  source: "span",
3909
4058
  origin: "span",
3910
- action: isHit ? "hit" : "added",
4059
+ action: isHit ? "hit" : stored ? "added" : "notStored",
3911
4060
  status,
4061
+ stored,
3912
4062
  name: span.name,
3913
4063
  namespace,
3914
4064
  key,
@@ -3925,12 +4075,14 @@ function extractCacheEntries(spans, caseCacheRefs) {
3925
4075
  const ref = parsed.data;
3926
4076
  if (ref.status === "bypass") continue;
3927
4077
  const isHit = ref.status === "hit";
4078
+ const stored = isHit ? true : ref.stored !== false;
3928
4079
  entries.push({
3929
4080
  id: `${span.id}:value:${String(index)}`,
3930
4081
  source: "value",
3931
4082
  origin: "span",
3932
- action: isHit ? "hit" : "added",
4083
+ action: isHit ? "hit" : stored ? "added" : "notStored",
3933
4084
  status: ref.status,
4085
+ stored,
3934
4086
  name: ref.name,
3935
4087
  namespace: ref.namespace,
3936
4088
  key: ref.key,
@@ -3943,12 +4095,14 @@ function extractCacheEntries(spans, caseCacheRefs) {
3943
4095
  for (const [index, ref] of caseCacheRefs.entries()) {
3944
4096
  if (ref.status === "bypass") continue;
3945
4097
  const isHit = ref.status === "hit";
4098
+ const stored = isHit ? true : ref.stored !== false;
3946
4099
  entries.push({
3947
4100
  id: `case:value:${String(index)}`,
3948
4101
  source: "value",
3949
4102
  origin: "caseRoot",
3950
- action: isHit ? "hit" : "added",
4103
+ action: isHit ? "hit" : stored ? "added" : "notStored",
3951
4104
  status: ref.status,
4105
+ stored,
3952
4106
  name: ref.name,
3953
4107
  namespace: ref.namespace,
3954
4108
  key: ref.key,
@@ -4020,7 +4174,8 @@ const updateManualScoreRequestSchema = z.object({ value: z.number().min(0).max(1
4020
4174
  //#endregion
4021
4175
  //#region ../runner/src/cacheStore.ts
4022
4176
  const defaultMaxEntriesPerNamespace = 100;
4023
- const cacheSerializationMarker = "__agentEvalsCacheSerialization";
4177
+ const cacheSerializationMarker = "__aecs";
4178
+ const legacyCacheSerializationMarker = "__agentEvalsCacheSerialization";
4024
4179
  const supportedCacheSerializationVersion = "json-safe-v1";
4025
4180
  /**
4026
4181
  * Create a filesystem-backed cache adapter rooted at `<workspaceRoot>/<dir>`.
@@ -4105,7 +4260,6 @@ function createFsCacheStore(options) {
4105
4260
  spanName: entry.spanName,
4106
4261
  spanKind: entry.spanKind,
4107
4262
  storedAt: entry.storedAt,
4108
- codeFingerprint: entry.codeFingerprint,
4109
4263
  sizeBytes: Buffer.byteLength(JSON.stringify(entry), "utf8")
4110
4264
  });
4111
4265
  }
@@ -4234,7 +4388,7 @@ async function readCacheFilePath(filePath) {
4234
4388
  function usesSupportedCacheSerialization(value) {
4235
4389
  if (Array.isArray(value)) return value.every(usesSupportedCacheSerialization);
4236
4390
  if (!isRecordLike(value)) return true;
4237
- if (Object.hasOwn(value, cacheSerializationMarker) && value[cacheSerializationMarker] !== supportedCacheSerializationVersion) return false;
4391
+ for (const marker of [cacheSerializationMarker, legacyCacheSerializationMarker]) if (Object.hasOwn(value, marker) && value[marker] !== supportedCacheSerializationVersion) return false;
4238
4392
  return Object.values(value).every(usesSupportedCacheSerialization);
4239
4393
  }
4240
4394
  async function writeOrRemoveCacheFile(cacheDir, cacheFile) {
@@ -4278,7 +4432,6 @@ async function writeDebugKeyEntry(params) {
4278
4432
  operationType: debugKey.operationType,
4279
4433
  operationName: debugKey.operationName,
4280
4434
  storedAt: entry.storedAt,
4281
- codeFingerprint: debugKey.codeFingerprint,
4282
4435
  rawKey: debugKey.rawKey
4283
4436
  };
4284
4437
  await writeDebugKeyFile(debugDir, {
@@ -4494,6 +4647,7 @@ function getScoreOverride(def) {
4494
4647
  format: def.format,
4495
4648
  numberFormat: def.numberFormat,
4496
4649
  hideInTable: def.hideInTable,
4650
+ hideIfNoValue: def.hideIfNoValue,
4497
4651
  align: def.align,
4498
4652
  maxStars: def.maxStars
4499
4653
  };
@@ -4506,6 +4660,7 @@ function mergeOverrides(base, override) {
4506
4660
  format: override.format ?? base.format,
4507
4661
  numberFormat: override.numberFormat ?? base.numberFormat,
4508
4662
  hideInTable: override.hideInTable ?? base.hideInTable,
4663
+ hideIfNoValue: override.hideIfNoValue ?? base.hideIfNoValue,
4509
4664
  align: override.align ?? base.align,
4510
4665
  maxStars: override.maxStars ?? base.maxStars
4511
4666
  };
@@ -4620,6 +4775,7 @@ function createColumnDef(params) {
4620
4775
  if (override?.numberFormat !== void 0) def.numberFormat = override.numberFormat;
4621
4776
  if (override?.maxStars !== void 0) def.maxStars = override.maxStars;
4622
4777
  if (override?.hideInTable !== void 0) def.hideInTable = override.hideInTable;
4778
+ if (override?.hideIfNoValue !== void 0) def.hideIfNoValue = override.hideIfNoValue;
4623
4779
  if (override?.align !== void 0) def.align = override.align;
4624
4780
  if (!isScore) return def;
4625
4781
  def.isScore = true;
@@ -4704,60 +4860,70 @@ const DEFAULT_COLUMNS = {
4704
4860
  label: "API Calls",
4705
4861
  format: "number",
4706
4862
  numberFormat: countNumberFormat,
4707
- align: "right"
4863
+ align: "right",
4864
+ hideIfNoValue: true
4708
4865
  },
4709
4866
  costUsd: {
4710
4867
  label: "Cost",
4711
4868
  format: "number",
4712
4869
  numberFormat: costNumberFormat,
4713
- align: "right"
4870
+ align: "right",
4871
+ hideIfNoValue: true
4714
4872
  },
4715
4873
  llmTurns: {
4716
4874
  label: "LLM Turns",
4717
4875
  format: "number",
4718
4876
  numberFormat: countNumberFormat,
4719
- align: "right"
4877
+ align: "right",
4878
+ hideIfNoValue: true
4720
4879
  },
4721
4880
  inputTokens: {
4722
4881
  label: "Input Tokens",
4723
4882
  format: "number",
4724
4883
  numberFormat: tokenNumberFormat,
4725
- align: "right"
4884
+ align: "right",
4885
+ hideIfNoValue: true
4726
4886
  },
4727
4887
  outputTokens: {
4728
4888
  label: "Output Tokens",
4729
4889
  format: "number",
4730
4890
  numberFormat: tokenNumberFormat,
4731
- align: "right"
4891
+ align: "right",
4892
+ hideIfNoValue: true
4732
4893
  },
4733
4894
  totalTokens: {
4734
4895
  label: "Total Tokens",
4735
4896
  format: "number",
4736
4897
  numberFormat: tokenNumberFormat,
4737
- align: "right"
4898
+ align: "right",
4899
+ hideIfNoValue: true
4738
4900
  },
4739
4901
  cachedInputTokens: {
4740
4902
  label: "Cached Input Tokens",
4741
4903
  format: "number",
4742
4904
  numberFormat: tokenNumberFormat,
4743
- align: "right"
4905
+ align: "right",
4906
+ hideIfNoValue: true
4744
4907
  },
4745
4908
  cacheCreationInputTokens: {
4746
4909
  label: "Cache Write Tokens",
4747
4910
  format: "number",
4748
4911
  numberFormat: tokenNumberFormat,
4749
- align: "right"
4912
+ align: "right",
4913
+ hideIfNoValue: true
4750
4914
  },
4751
4915
  reasoningTokens: {
4752
4916
  label: "Reasoning Tokens",
4753
4917
  format: "number",
4754
4918
  numberFormat: tokenNumberFormat,
4755
- align: "right"
4919
+ align: "right",
4920
+ hideIfNoValue: true
4756
4921
  },
4757
4922
  llmDurationMs: {
4758
4923
  label: "LLM Duration",
4759
4924
  format: "duration",
4760
- align: "right"
4925
+ align: "right",
4926
+ hideIfNoValue: true
4761
4927
  }
4762
4928
  };
4763
4929
  function resolveRemovedKeys(globalRemove, evalRemove) {
@@ -4770,9 +4936,16 @@ function getActiveDefaultConfigKeys(params) {
4770
4936
  }
4771
4937
  function mergeDefaultColumns(params) {
4772
4938
  const activeKeys = getActiveDefaultConfigKeys(params);
4773
- if (activeKeys.length === 0) return params.columns;
4939
+ if (activeKeys.length === 0) {
4940
+ const merged = {
4941
+ ...params.globalColumns,
4942
+ ...params.columns
4943
+ };
4944
+ return Object.keys(merged).length > 0 ? merged : void 0;
4945
+ }
4774
4946
  return {
4775
4947
  ...Object.fromEntries(activeKeys.map((key) => [key, DEFAULT_COLUMNS[key]])),
4948
+ ...params.globalColumns,
4776
4949
  ...params.columns
4777
4950
  };
4778
4951
  }
@@ -4784,30 +4957,38 @@ function appendDefaultStats(params) {
4784
4957
  key: "apiCalls",
4785
4958
  label: "API Calls",
4786
4959
  aggregate: "avg",
4787
- numberFormat: countNumberFormat
4960
+ numberFormat: countNumberFormat,
4961
+ hideIfNoValue: true
4788
4962
  });
4789
4963
  if (activeKeys.has("costUsd")) defaults.push({
4790
4964
  kind: "column",
4791
4965
  key: "costUsd",
4792
4966
  label: "LLM Cost",
4793
4967
  aggregate: "avg",
4794
- numberFormat: costNumberFormat
4968
+ numberFormat: costNumberFormat,
4969
+ hideIfNoValue: true
4795
4970
  });
4796
4971
  if (activeKeys.has("totalTokens")) defaults.push({
4797
4972
  kind: "column",
4798
4973
  key: "totalTokens",
4799
4974
  label: "Tokens",
4800
4975
  aggregate: "avg",
4801
- numberFormat: tokenNumberFormat
4976
+ numberFormat: tokenNumberFormat,
4977
+ hideIfNoValue: true
4802
4978
  });
4803
4979
  if (activeKeys.has("llmTurns")) defaults.push({
4804
4980
  kind: "column",
4805
4981
  key: "llmTurns",
4806
4982
  label: "LLM Turns",
4807
4983
  aggregate: "avg",
4808
- numberFormat: countNumberFormat
4984
+ numberFormat: countNumberFormat,
4985
+ hideIfNoValue: true
4809
4986
  });
4810
- const merged = [...params.stats ?? [], ...defaults];
4987
+ const merged = [
4988
+ ...params.globalStats ?? [],
4989
+ ...params.stats ?? [],
4990
+ ...defaults
4991
+ ];
4811
4992
  return merged.length > 0 ? merged : void 0;
4812
4993
  }
4813
4994
  function appendDefaultCharts(params) {
@@ -4815,6 +4996,7 @@ function appendDefaultCharts(params) {
4815
4996
  const defaults = [];
4816
4997
  if (activeKeys.has("costUsd")) defaults.push({
4817
4998
  heading: "LLM Cost",
4999
+ hideIfNoValue: true,
4818
5000
  type: "area",
4819
5001
  metrics: [{
4820
5002
  source: "column",
@@ -4856,6 +5038,7 @@ function appendDefaultCharts(params) {
4856
5038
  ].filter((metric) => metric !== null);
4857
5039
  if (tokenMetrics.length > 0) defaults.push({
4858
5040
  heading: "LLM Tokens",
5041
+ hideIfNoValue: true,
4859
5042
  type: "bar",
4860
5043
  metrics: tokenMetrics,
4861
5044
  tooltipExtras: activeKeys.has("totalTokens") ? [{
@@ -4872,11 +5055,13 @@ function resolveEvalDefaultConfig(params) {
4872
5055
  const evalRemove = params.evalDef.removeDefaultConfig;
4873
5056
  return {
4874
5057
  columns: mergeDefaultColumns({
5058
+ globalColumns: params.globalColumns,
4875
5059
  columns: params.evalDef.columns,
4876
5060
  globalRemove: params.globalRemove,
4877
5061
  evalRemove
4878
5062
  }),
4879
5063
  stats: appendDefaultStats({
5064
+ globalStats: params.globalStats,
4880
5065
  stats: params.evalDef.stats,
4881
5066
  globalRemove: params.globalRemove,
4882
5067
  evalRemove
@@ -5214,6 +5399,65 @@ function isFile(value) {
5214
5399
  return value instanceof File;
5215
5400
  }
5216
5401
  //#endregion
5402
+ //#region ../runner/src/traceDisplay.ts
5403
+ function isRecord$1(value) {
5404
+ return typeof value === "object" && value !== null;
5405
+ }
5406
+ function mergeNestedAttribute(value, path, attributeValue) {
5407
+ const root = value === void 0 ? {} : { ...value };
5408
+ const parts = path.split(".");
5409
+ let current = root;
5410
+ for (const [index, part] of parts.entries()) {
5411
+ if (index === parts.length - 1) {
5412
+ current[part] = attributeValue;
5413
+ continue;
5414
+ }
5415
+ const nextValue = current[part];
5416
+ const nextRecord = isRecord$1(nextValue) ? { ...nextValue } : {};
5417
+ current[part] = nextRecord;
5418
+ current = nextRecord;
5419
+ }
5420
+ return root;
5421
+ }
5422
+ function resolveTracePresentation(spans, globalTraceDisplay, evalTraceDisplay) {
5423
+ const merged = /* @__PURE__ */ new Map();
5424
+ for (const attribute of globalTraceDisplay?.attributes ?? []) merged.set(attribute.key ?? attribute.path, attribute);
5425
+ for (const attribute of evalTraceDisplay?.attributes ?? []) merged.set(attribute.key ?? attribute.path, attribute);
5426
+ const resolvedAttributes = [];
5427
+ const transformedTrace = spans.map((span) => ({
5428
+ ...span,
5429
+ attributes: span.attributes === void 0 ? void 0 : { ...span.attributes }
5430
+ }));
5431
+ for (const attribute of merged.values()) {
5432
+ const resolvedPath = attribute.transform ? `__display.${attribute.key ?? attribute.path}` : attribute.path;
5433
+ resolvedAttributes.push({
5434
+ key: attribute.key,
5435
+ path: resolvedPath,
5436
+ label: attribute.label,
5437
+ format: attribute.format,
5438
+ numberFormat: attribute.numberFormat,
5439
+ placements: attribute.placements,
5440
+ scope: attribute.scope,
5441
+ mode: attribute.mode
5442
+ });
5443
+ if (!attribute.transform) continue;
5444
+ for (const span of transformedTrace) {
5445
+ const sourceValue = getNestedAttribute(span.attributes, attribute.path);
5446
+ if (sourceValue === void 0) continue;
5447
+ const transformedValue = attribute.transform({
5448
+ value: sourceValue,
5449
+ span
5450
+ });
5451
+ if (transformedValue === void 0) continue;
5452
+ span.attributes = mergeNestedAttribute(span.attributes, resolvedPath, transformedValue);
5453
+ }
5454
+ }
5455
+ return {
5456
+ trace: transformedTrace,
5457
+ traceDisplay: { attributes: resolvedAttributes }
5458
+ };
5459
+ }
5460
+ //#endregion
5217
5461
  //#region ../runner/src/runMaintenance.ts
5218
5462
  async function persistRunState(runState) {
5219
5463
  await writeFile(join(runState.runDir, "summary.json"), JSON.stringify(runState.summary, null, 2));
@@ -5532,63 +5776,10 @@ async function runWithModuleIsolation(context, fn) {
5532
5776
  return await isolationStorage.run(context, fn);
5533
5777
  }
5534
5778
  //#endregion
5535
- //#region ../runner/src/traceDisplay.ts
5536
- function isRecord$1(value) {
5537
- return typeof value === "object" && value !== null;
5538
- }
5539
- function mergeNestedAttribute(value, path, attributeValue) {
5540
- const root = value === void 0 ? {} : { ...value };
5541
- const parts = path.split(".");
5542
- let current = root;
5543
- for (const [index, part] of parts.entries()) {
5544
- if (index === parts.length - 1) {
5545
- current[part] = attributeValue;
5546
- continue;
5547
- }
5548
- const nextValue = current[part];
5549
- const nextRecord = isRecord$1(nextValue) ? { ...nextValue } : {};
5550
- current[part] = nextRecord;
5551
- current = nextRecord;
5552
- }
5553
- return root;
5554
- }
5555
- function resolveTracePresentation(spans, globalTraceDisplay, evalTraceDisplay) {
5556
- const merged = /* @__PURE__ */ new Map();
5557
- for (const attribute of globalTraceDisplay?.attributes ?? []) merged.set(attribute.key ?? attribute.path, attribute);
5558
- for (const attribute of evalTraceDisplay?.attributes ?? []) merged.set(attribute.key ?? attribute.path, attribute);
5559
- const resolvedAttributes = [];
5560
- const transformedTrace = spans.map((span) => ({
5561
- ...span,
5562
- attributes: span.attributes === void 0 ? void 0 : { ...span.attributes }
5563
- }));
5564
- for (const attribute of merged.values()) {
5565
- const resolvedPath = attribute.transform ? `__display.${attribute.key ?? attribute.path}` : attribute.path;
5566
- resolvedAttributes.push({
5567
- key: attribute.key,
5568
- path: resolvedPath,
5569
- label: attribute.label,
5570
- format: attribute.format,
5571
- numberFormat: attribute.numberFormat,
5572
- placements: attribute.placements,
5573
- scope: attribute.scope,
5574
- mode: attribute.mode
5575
- });
5576
- if (!attribute.transform) continue;
5577
- for (const span of transformedTrace) {
5578
- const sourceValue = getNestedAttribute(span.attributes, attribute.path);
5579
- if (sourceValue === void 0) continue;
5580
- const transformedValue = attribute.transform({
5581
- value: sourceValue,
5582
- span
5583
- });
5584
- if (transformedValue === void 0) continue;
5585
- span.attributes = mergeNestedAttribute(span.attributes, resolvedPath, transformedValue);
5586
- }
5587
- }
5588
- return {
5589
- trace: transformedTrace,
5590
- traceDisplay: { attributes: resolvedAttributes }
5591
- };
5779
+ //#region ../runner/src/stackFormatting.ts
5780
+ const orphanedAnsiSgrPattern = /\[(?:\d{1,3}(?:;\d{1,3})*)?m/g;
5781
+ function stripTerminalControlCodes(value) {
5782
+ return stripVTControlCharacters(value).replaceAll(orphanedAnsiSgrPattern, "");
5592
5783
  }
5593
5784
  //#endregion
5594
5785
  //#region ../runner/src/runExecution.ts
@@ -5620,8 +5811,54 @@ function buildScopedEvalIdPrefix(params) {
5620
5811
  async function callWithUnknownResult(fn, args) {
5621
5812
  return await Reflect.apply(fn, void 0, args);
5622
5813
  }
5814
+ async function callUnknownFunction(fn, args) {
5815
+ if (typeof fn !== "function") throw new Error("Expected a function");
5816
+ return await Reflect.apply(fn, void 0, args);
5817
+ }
5818
+ function assignDerivedOutputs(params) {
5819
+ for (const [key, value] of Object.entries(params.derived)) {
5820
+ if (key in params.outputs) continue;
5821
+ params.outputs[key] = value;
5822
+ }
5823
+ }
5824
+ async function resolveDeriveFromTracingConfig(params) {
5825
+ const ctx = {
5826
+ trace: params.traceTree,
5827
+ input: params.evalCase.input,
5828
+ case: params.evalCase
5829
+ };
5830
+ if (typeof params.deriveFromTracing === "function") {
5831
+ const derived = await callUnknownFunction(params.deriveFromTracing, [ctx]);
5832
+ if (!isRecord(derived)) throw new Error("deriveFromTracing must return an object");
5833
+ return derived;
5834
+ }
5835
+ const derived = {};
5836
+ for (const [key, compute] of Object.entries(params.deriveFromTracing)) {
5837
+ const value = await callUnknownFunction(compute, [ctx]);
5838
+ if (value !== void 0) derived[key] = value;
5839
+ }
5840
+ return derived;
5841
+ }
5842
+ async function runDeriveFromTracingConfig(params) {
5843
+ if (params.deriveFromTracing === void 0) return;
5844
+ const { deriveFromTracing } = params;
5845
+ try {
5846
+ const derived = await runInExistingEvalScope(params.scope, "derive", async () => await resolveDeriveFromTracingConfig({
5847
+ deriveFromTracing,
5848
+ traceTree: params.traceTree,
5849
+ evalCase: params.evalCase
5850
+ }));
5851
+ assignDerivedOutputs({
5852
+ outputs: params.scope.outputs,
5853
+ derived
5854
+ });
5855
+ } catch (e) {
5856
+ const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
5857
+ params.scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
5858
+ }
5859
+ }
5623
5860
  async function runCase(params) {
5624
- const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, codeFingerprint, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
5861
+ const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, globalColumns, globalDeriveFromTracing, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
5625
5862
  const scopedIdPrefix = buildScopedEvalIdPrefix({
5626
5863
  evalId,
5627
5864
  evalFilePath,
@@ -5653,7 +5890,8 @@ async function runCase(params) {
5653
5890
  adapter: cacheAdapter,
5654
5891
  mode: cacheMode,
5655
5892
  evalId,
5656
- codeFingerprint
5893
+ read: evalDef.cache?.read,
5894
+ store: evalDef.cache?.store
5657
5895
  } : void 0,
5658
5896
  startTime: evalDef.startTime,
5659
5897
  freezeTime: evalDef.freezeTime
@@ -5666,22 +5904,19 @@ async function runCase(params) {
5666
5904
  const traceTree = buildTraceTree(spansWithDerivedAttributes, scope.checkpoints);
5667
5905
  const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
5668
5906
  if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) scope.assertionFailures.push(toAssertionFailure(executeError.message, executeError));
5669
- if (!nonAssertError && evalDef.deriveFromTracing) {
5670
- const { deriveFromTracing } = evalDef;
5671
- try {
5672
- const derived = await runInExistingEvalScope(scope, "derive", async () => {
5673
- return await callWithUnknownResult(deriveFromTracing, [{
5674
- trace: traceTree,
5675
- input: evalCase.input,
5676
- case: evalCase
5677
- }]);
5678
- });
5679
- if (!isRecord(derived)) throw new Error("deriveFromTracing must return an object");
5680
- for (const [key, value] of Object.entries(derived)) if (!(key in scope.outputs)) scope.outputs[key] = value;
5681
- } catch (e) {
5682
- const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
5683
- scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
5684
- }
5907
+ if (!nonAssertError) {
5908
+ await runDeriveFromTracingConfig({
5909
+ deriveFromTracing: globalDeriveFromTracing,
5910
+ scope,
5911
+ traceTree,
5912
+ evalCase
5913
+ });
5914
+ await runDeriveFromTracingConfig({
5915
+ deriveFromTracing: evalDef.deriveFromTracing,
5916
+ scope,
5917
+ traceTree,
5918
+ evalCase
5919
+ });
5685
5920
  }
5686
5921
  if (!nonAssertError) addDefaultOutputs({
5687
5922
  outputs: scope.outputs,
@@ -5698,7 +5933,7 @@ async function runCase(params) {
5698
5933
  ...scope.outputs,
5699
5934
  ...parsedOutputs.data
5700
5935
  };
5701
- else scope.assertionFailures.push(toAssertionFailure(formatOutputsSchemaError(parsedOutputs.error)));
5936
+ else scope.assertionFailures.push(toAssertionFailure(formatOutputsSchemaError(parsedOutputs.error), void 0, "OutputsSchemaError"));
5702
5937
  }
5703
5938
  const scoreResults = /* @__PURE__ */ new Map();
5704
5939
  const scoringTraces = {};
@@ -5721,7 +5956,8 @@ async function runCase(params) {
5721
5956
  adapter: cacheAdapter,
5722
5957
  mode: cacheMode,
5723
5958
  evalId: `${evalId}__score__${key}`,
5724
- codeFingerprint
5959
+ read: evalDef.cache?.read,
5960
+ store: evalDef.cache?.store
5725
5961
  } : void 0,
5726
5962
  startTime: scoreStartTime,
5727
5963
  freezeTime: evalDef.freezeTime
@@ -5776,6 +6012,7 @@ async function runCase(params) {
5776
6012
  const { trace: displayTrace, traceDisplay } = resolveTracePresentation(spansWithDerivedAttributes, globalTraceDisplay, evalDef.traceDisplay);
5777
6013
  const columns = {};
5778
6014
  const columnOverrides = mergeDefaultColumns({
6015
+ globalColumns,
5779
6016
  columns: evalDef.columns,
5780
6017
  globalRemove: globalRemoveDefaultConfig,
5781
6018
  evalRemove: evalDef.removeDefaultConfig
@@ -5839,14 +6076,17 @@ function formatOutputsSchemaError(error) {
5839
6076
  const issueLines = error.issues.map((issue) => {
5840
6077
  return `${issue.path.length > 0 ? issue.path.join(".") : "<root>"}: ${issue.message}`;
5841
6078
  });
5842
- if (issueLines.length === 0) return "outputsSchema validation failed";
5843
- return `outputsSchema validation failed:\n${issueLines.join("\n")}`;
6079
+ if (issueLines.length === 0) return "outputs did not match the configured schema";
6080
+ return issueLines.join("\n");
5844
6081
  }
5845
- function toAssertionFailure(message, error = void 0) {
5846
- return error?.stack ? {
6082
+ function toAssertionFailure(message, error = void 0, nameOverride = void 0) {
6083
+ const name = nameOverride ?? error?.name;
6084
+ const stack = error?.stack ? stripTerminalControlCodes(error.stack) : void 0;
6085
+ return {
6086
+ ...name !== void 0 ? { name } : {},
5847
6087
  message,
5848
- stack: error.stack
5849
- } : { message };
6088
+ ...stack !== void 0 ? { stack } : {}
6089
+ };
5850
6090
  }
5851
6091
  //#endregion
5852
6092
  //#region ../runner/src/runQueue.ts
@@ -6076,15 +6316,15 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6076
6316
  const apiCallsConfig = resolveApiCallsConfig(config.apiCalls);
6077
6317
  for (const evalMeta of targetEvals) {
6078
6318
  const evalFilePath = evalMeta.sourceFilePath;
6079
- let codeFingerprint = "";
6319
+ let sourceFingerprint = "";
6080
6320
  try {
6081
- codeFingerprint = getSourceFingerprint(await readFile(evalFilePath, "utf-8"));
6321
+ sourceFingerprint = getSourceFingerprint(await readFile(evalFilePath, "utf-8"));
6082
6322
  } catch {
6083
- codeFingerprint = "";
6323
+ sourceFingerprint = "";
6084
6324
  }
6085
- if (codeFingerprint.length > 0) {
6086
- runState.manifest.evalSourceFingerprints[evalMeta.key] = codeFingerprint;
6087
- evalMeta.sourceFingerprint = codeFingerprint;
6325
+ if (sourceFingerprint.length > 0) {
6326
+ runState.manifest.evalSourceFingerprints[evalMeta.key] = sourceFingerprint;
6327
+ evalMeta.sourceFingerprint = sourceFingerprint;
6088
6328
  } else {
6089
6329
  delete runState.manifest.evalSourceFingerprints[evalMeta.key];
6090
6330
  evalMeta.sourceFingerprint = null;
@@ -6093,7 +6333,7 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6093
6333
  const registry = getEvalRegistry();
6094
6334
  await runWithModuleIsolation(moduleIsolation, async () => {
6095
6335
  await runInEvalRuntimeScope("env", async () => {
6096
- await loadEvalModule(evalFilePath, codeFingerprint);
6336
+ await loadEvalModule(evalFilePath, sourceFingerprint);
6097
6337
  });
6098
6338
  });
6099
6339
  const entry = registry.get(evalMeta.id);
@@ -6117,6 +6357,8 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6117
6357
  runState.summary.totalCases += cases.length;
6118
6358
  const defaultConfig = resolveEvalDefaultConfig({
6119
6359
  evalDef,
6360
+ globalColumns: config.columns,
6361
+ globalStats: config.stats,
6120
6362
  globalRemove: config.removeDefaultConfig
6121
6363
  });
6122
6364
  const declaredColumnDefs = buildDeclaredColumnDefs(defaultConfig.columns, evalDef.scores, evalDef.manualScores);
@@ -6162,6 +6404,8 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6162
6404
  evalKey: evalMeta.key,
6163
6405
  evalCase,
6164
6406
  globalTraceDisplay,
6407
+ globalColumns: config.columns,
6408
+ globalDeriveFromTracing: config.deriveFromTracing,
6165
6409
  llmCallsConfig,
6166
6410
  apiCallsConfig,
6167
6411
  globalRemoveDefaultConfig: config.removeDefaultConfig,
@@ -6169,7 +6413,6 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6169
6413
  startTime,
6170
6414
  cacheAdapter: bufferedCacheStore ?? (cacheEnabled ? cacheStore : null),
6171
6415
  cacheMode,
6172
- codeFingerprint,
6173
6416
  moduleIsolation,
6174
6417
  evalFilePath,
6175
6418
  evalFileRelativePath: evalMeta.filePath,
@@ -6324,4 +6567,4 @@ function toLastRunStatus(status) {
6324
6567
  return status === "pending" ? null : status;
6325
6568
  }
6326
6569
  //#endregion
6327
- export { llmCallsConfigSchema as $, traceSpanKindSchema as $t, extractApiCalls as A, getCurrentScope as An, evalChartTypeSchema as At, runSummarySchema as B, setEvalOutput as Bn, cacheRecordingOpSchema as Bt, validateCharts as C, evalExpect as Cn, evalChartAggregateSchema as Ct, sseEnvelopeSchema as D, configureEvalRunLogs as Dn, evalChartConfigSchema as Dt, updateManualScoreRequestSchema as E, appendToEvalOutput as En, evalChartColorSchema as Et, getEvalDisplayStatus as F, mergeEvalOutput as Fn, cacheEntryWithDebugKeySchema as Ft, apiCallMetricPlacementSchema as G, traceCacheRefSchema as Gt, DEFAULT_LLM_CALLS_CONFIG as H, startEvalBackgroundJob as Hn, cacheStatusSchema as Ht, deriveScopedSummaryFromCases as I, nextEvalId as In, cacheFileSchema as It, defaultConfigKeySchema as J, traceAttributeDisplayPlacementSchema as Jt, apiCallMetricSchema as K, traceAttributeDisplayFormatSchema as Kt, deriveStatusFromCaseRows as L, runInEvalRuntimeScope as Ln, cacheListItemSchema as Lt, applyDerivedCallAttributes as M, getEvalStartTime as Mn, cacheDebugKeyEntrySchema as Mt, getNestedAttribute as N, incrementEvalOutput as Nn, cacheDebugKeyFileSchema as Nt, extractCacheEntries as O, evalAssert as On, evalChartMetricSchema as Ot, getEvalTitle as P, isInEvalScope as Pn, cacheEntrySchema as Pt, llmCallPricingSchema as Q, traceSpanErrorSchema as Qt, deriveStatusFromChildStatuses as R, runInEvalScope as Rn, cacheModeSchema as Rt, normalizeScoreDef as S, repoFile as Sn, scoreTraceSchema as St, createRunRequestSchema as T, advanceEvalTime as Tn, evalChartBuiltinMetricSchema as Tt, agentEvalsConfigSchema as U, defineEval as Un, serializedCacheSpanSchema as Ut, DEFAULT_API_CALLS_CONFIG as V, setScopeCacheContext as Vn, cacheRecordingSchema as Vt, apiCallMetricFormatSchema as W, getEvalRegistry as Wn, spanCacheOptionsSchema as Wt, llmCallMetricPlacementSchema as X, traceDisplayConfigSchema as Xt, llmCallMetricFormatSchema as Y, traceAttributeDisplaySchema as Yt, llmCallMetricSchema as Z, traceDisplayInputConfigSchema as Zt, loadEvalModule as _, hashCacheKeySync as _n, evalSummarySchema as _t, getLastRunStatuses as a, columnKindSchema as an, buildCaseKey as at, loadConfig as b, serializeCacheRecording as bn, runLogLocationSchema as bt, loadPersistedRunSnapshots as c, numberDisplayOptionsSchema as cn, getCaseRowEvalKey as ct, persistRunState as d, z$1 as dn, caseRowSchema as dt, traceSpanSchema as en, removeDefaultConfigSchema as et, recomputeEvalStatusesInRuns as f, buildTraceTree as fn, discoveryIssueSchema as ft, deriveEvalFreshness as g, hashCacheKey as gn, evalStatsConfigSchema as gt, resolveArtifactPath as h, evalTracer as hn, evalStatItemSchema as ht, generateRunId as i, columnFormatSchema as in, trialSelectionModeSchema as it, extractLlmCalls as j, getEvalCaseInput as jn, evalChartsConfigSchema as jt, extractCacheHits as k, evalLog as kn, evalChartTooltipExtraSchema as kt, nextShortIdFromSnapshots as l, repoFileRefSchema as ln, assertionFailureSchema as lt, runTouchesEval as m, evalSpan as mn, evalStatAggregateSchema as mt, getTargetEvalKeys as n, cellValueSchema as nn, resolveLlmCallsConfig as nt, getLatestRunInfos as o, fileRefSchema as on, buildEvalKey as ot, recomputePersistedCaseStatus as p, captureEvalSpanError as pn, evalFreshnessStatusSchema as pt, apiCallsConfigSchema as q, traceAttributeDisplayInputSchema as qt, getTargetEvals as r, columnDefSchema as rn, runLogsConfigSchema as rt, loadPersistedRunSnapshot as s, jsonCellSchema as sn, getCaseRowCaseKey as st, executeRun as t, traceSpanWarningSchema as tn, resolveApiCallsConfig as tt, persistCaseDetail as u, runArtifactRefSchema as un, caseDetailSchema as ut, parseEvalDiscovery as v, deserializeCacheRecording as vn, runLogEntrySchema as vt, createFsCacheStore as w, EvalAssertionError as wn, evalChartAxisSchema as wt, buildDeclaredColumnDefs as x, serializeCacheValue as xn, runLogPhaseSchema as xt, resolveEvalDefaultConfig as y, deserializeCacheValue as yn, runLogLevelSchema as yt, runManifestSchema as z, runInExistingEvalScope as zn, cacheOperationTypeSchema as zt };
6570
+ export { llmCallMetricFormatSchema as $, traceAttributeDisplayPlacementSchema as $t, extractCacheHits as A, advanceEvalTime as An, evalChartBuiltinMetricSchema as At, runManifestSchema as B, mergeEvalOutput as Bn, cacheEntryWithDebugKeySchema as Bt, normalizeScoreDef as C, deserializeCacheRecording as Cn, runLogEntrySchema as Ct, updateManualScoreRequestSchema as D, repoFile as Dn, scoreTraceSchema as Dt, createRunRequestSchema as E, serializeCacheValue as En, runLogPhaseSchema as Et, getEvalTitle as F, getCurrentScope as Fn, evalChartTypeSchema as Ft, apiCallMetricFormatSchema as G, setEvalOutput as Gn, cacheRecordingOpSchema as Gt, DEFAULT_API_CALLS_CONFIG as H, runInEvalRuntimeScope as Hn, cacheListItemSchema as Ht, getEvalDisplayStatus as I, getEvalCaseInput as In, evalChartsConfigSchema as It, apiCallsConfigSchema as J, defineEval as Jn, serializedCacheSpanSchema as Jt, apiCallMetricPlacementSchema as K, setScopeCacheContext as Kn, cacheRecordingSchema as Kt, deriveScopedSummaryFromCases as L, getEvalStartTime as Ln, cacheDebugKeyEntrySchema as Lt, extractLlmCalls as M, configureEvalRunLogs as Mn, evalChartConfigSchema as Mt, applyDerivedCallAttributes as N, evalAssert as Nn, evalChartMetricSchema as Nt, sseEnvelopeSchema as O, evalExpect as On, evalChartAggregateSchema as Ot, getNestedAttribute as P, evalLog as Pn, evalChartTooltipExtraSchema as Pt, evalDeriveConfigSchema as Q, traceAttributeDisplayInputSchema as Qt, deriveStatusFromCaseRows as R, incrementEvalOutput as Rn, cacheDebugKeyFileSchema as Rt, buildDeclaredColumnDefs as S, hashCacheKeySync as Sn, evalSummarySchema as St, createFsCacheStore as T, serializeCacheRecording as Tn, runLogLocationSchema as Tt, DEFAULT_LLM_CALLS_CONFIG as U, runInEvalScope as Un, cacheModeSchema as Ut, runSummarySchema as V, nextEvalId as Vn, cacheFileSchema as Vt, agentEvalsConfigSchema as W, runInExistingEvalScope as Wn, cacheOperationTypeSchema as Wt, evalColumnOverrideSchema as X, traceCacheRefSchema as Xt, defaultConfigKeySchema as Y, getEvalRegistry as Yn, spanCacheOptionsSchema as Yt, evalColumnsSchema as Z, traceAttributeDisplayFormatSchema as Zt, deriveEvalFreshness as _, buildTraceTree as _n, discoveryIssueSchema as _t, getLastRunStatuses as a, traceSpanSchema as an, removeDefaultConfigSchema as at, resolveEvalDefaultConfig as b, evalTracer as bn, evalStatItemSchema as bt, loadPersistedRunSnapshots as c, columnDefSchema as cn, runLogsConfigSchema as ct, persistRunState as d, fileRefSchema as dn, buildEvalKey as dt, traceAttributeDisplaySchema as en, llmCallMetricPlacementSchema as et, recomputeEvalStatusesInRuns as f, jsonCellSchema as fn, getCaseRowCaseKey as ft, resolveArtifactPath as g, z$1 as gn, caseRowSchema as gt, resolveTracePresentation as h, runArtifactRefSchema as hn, caseDetailSchema as ht, generateRunId as i, traceSpanKindSchema as in, llmCallsConfigSchema as it, extractApiCalls as j, appendToEvalOutput as jn, evalChartColorSchema as jt, extractCacheEntries as k, EvalAssertionError as kn, evalChartAxisSchema as kt, nextShortIdFromSnapshots as l, columnFormatSchema as ln, trialSelectionModeSchema as lt, runTouchesEval as m, repoFileRefSchema as mn, assertionFailureSchema as mt, getTargetEvalKeys as n, traceDisplayInputConfigSchema as nn, llmCallPricingRateSchema as nt, getLatestRunInfos as o, traceSpanWarningSchema as on, resolveApiCallsConfig as ot, recomputePersistedCaseStatus as p, numberDisplayOptionsSchema as pn, getCaseRowEvalKey as pt, apiCallMetricSchema as q, startEvalBackgroundJob as qn, cacheStatusSchema as qt, getTargetEvals as r, traceSpanErrorSchema as rn, llmCallPricingSchema as rt, loadPersistedRunSnapshot as s, cellValueSchema as sn, resolveLlmCallsConfig as st, executeRun as t, traceDisplayConfigSchema as tn, llmCallMetricSchema as tt, persistCaseDetail as u, columnKindSchema as un, buildCaseKey as ut, loadEvalModule as v, captureEvalSpanError as vn, evalFreshnessStatusSchema as vt, validateCharts as w, deserializeCacheValue as wn, runLogLevelSchema as wt, loadConfig as x, hashCacheKey as xn, evalStatsConfigSchema as xt, parseEvalDiscovery as y, evalSpan as yn, evalStatAggregateSchema as yt, deriveStatusFromChildStatuses as z, isInEvalScope as zn, cacheEntrySchema as zt };