@ls-stack/agent-eval 0.27.1 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,15 @@
1
1
  import { createRequire, registerHooks } from "node:module";
2
- import { createHash } from "node:crypto";
2
+ import { createHash, randomUUID } from "node:crypto";
3
3
  import { mkdir, readFile, readdir, rename, rm, stat, writeFile } from "node:fs/promises";
4
4
  import { extname, isAbsolute, join, relative, resolve } from "node:path";
5
5
  import { formatWithOptions, isDeepStrictEqual, stripVTControlCharacters } from "node:util";
6
6
  import { AsyncLocalStorage } from "node:async_hooks";
7
7
  import { z, z as z$1 } from "zod/v4";
8
- import { Buffer as Buffer$1 } from "node:buffer";
8
+ import { Blob as Blob$1, Buffer as Buffer$1, File as File$1 } from "node:buffer";
9
9
  import { gunzipSync, gzipSync } from "node:zlib";
10
10
  import { getCompositeKey } from "@ls-stack/utils/getCompositeKey";
11
11
  import { existsSync } from "node:fs";
12
- import { resultify } from "t-result";
12
+ import { Result, resultify } from "t-result";
13
13
  import { fileURLToPath, pathToFileURL } from "node:url";
14
14
  //#region ../sdk/src/defineEval.ts
15
15
  const evalRegistry = /* @__PURE__ */ new Map();
@@ -531,10 +531,13 @@ function recordOpIfActive(scope, op) {
531
531
  if (top) top.ops.push(op);
532
532
  }
533
533
  function toAssertionFailure$1(message, error = void 0) {
534
- return error?.stack ? {
534
+ const name = error?.name;
535
+ const stack = error?.stack ? stripTerminalControlCodes$1(error.stack) : void 0;
536
+ return {
537
+ ...name !== void 0 ? { name } : {},
535
538
  message,
536
- stack: stripTerminalControlCodes$1(error.stack)
537
- } : { message };
539
+ ...stack !== void 0 ? { stack } : {}
540
+ };
538
541
  }
539
542
  /**
540
543
  * Record or replace an output value for the current case scope.
@@ -784,6 +787,67 @@ function evalExpect(value) {
784
787
  return new EvalExpectationImpl(value, false);
785
788
  }
786
789
  //#endregion
790
+ //#region ../sdk/src/manualInputFile.ts
791
+ /**
792
+ * Zod schema describing one file uploaded through the manual-input modal.
793
+ *
794
+ * Use this as the field type on your `manualInput.schema` whenever you mark
795
+ * a field with `{ asFile: true }` in `manualInput.fields`. The UI / CLI stages
796
+ * the selected file on disk, the runner materializes it into the run artifacts
797
+ * directory, and the server validates this JSON metadata against the schema
798
+ * before flowing it into the case input.
799
+ *
800
+ * @example
801
+ * ```ts
802
+ * const schema = z.object({
803
+ * image: manualInputFileValueSchema,
804
+ * note: z.string().optional(),
805
+ * });
806
+ *
807
+ * defineEval({
808
+ * id: 'image-analyzer',
809
+ * manualInput: {
810
+ * schema,
811
+ * fields: { image: { asFile: true, accept: 'image/*' } },
812
+ * },
813
+ * // ...
814
+ * });
815
+ * ```
816
+ */
817
+ const manualInputFileValueSchema = z.object({
818
+ name: z.string(),
819
+ mimeType: z.string(),
820
+ sizeBytes: z.number().int().nonnegative(),
821
+ sha256: z.string().regex(/^[a-f0-9]{64}$/),
822
+ path: z.string().min(1)
823
+ });
824
+ /**
825
+ * Read a manual-input file artifact from disk and expose common byte, Blob,
826
+ * File, text, and JSON views for eval code.
827
+ *
828
+ * @param value Manual-input file metadata received by an eval.
829
+ * @param options.cwd Directory used to resolve relative paths. Defaults to `process.cwd()`.
830
+ * @returns File bytes plus convenience views for common file-processing flows.
831
+ */
832
+ async function readManualInputFile(value, options = {}) {
833
+ const absolutePath = resolve(options.cwd ?? process.cwd(), value.path);
834
+ const bytes = new Uint8Array(await readFile(absolutePath));
835
+ const arrayBuffer = bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength);
836
+ const blob = new Blob$1([bytes], { type: value.mimeType });
837
+ return {
838
+ value,
839
+ absolutePath,
840
+ bytes,
841
+ arrayBuffer,
842
+ blob,
843
+ file: new File$1([bytes], value.name, { type: value.mimeType }),
844
+ text: async () => await blob.text(),
845
+ json: async () => {
846
+ return JSON.parse(await blob.text());
847
+ }
848
+ };
849
+ }
850
+ //#endregion
787
851
  //#region ../sdk/src/repoFile.ts
788
852
  /**
789
853
  * Create a file reference that can be emitted via `setEvalOutput(...)` and rendered
@@ -802,7 +866,8 @@ function repoFile(path, mimeType) {
802
866
  }
803
867
  //#endregion
804
868
  //#region ../sdk/src/cacheSerialization.ts
805
- const serializedCacheValueMarker = "__agentEvalsCacheSerialization";
869
+ const serializedCacheValueMarker = "__aecs";
870
+ const legacySerializedCacheValueMarker = "__agentEvalsCacheSerialization";
806
871
  const jsonSafeCacheValueVersion = "json-safe-v1";
807
872
  const packedNumberArrayMinLength = 128;
808
873
  const compressedStringMinBytes = 16 * 1024;
@@ -812,7 +877,7 @@ function isRecordLike$3(value) {
812
877
  return typeof value === "object" && value !== null && !Array.isArray(value);
813
878
  }
814
879
  function isJsonSafeSerializedCacheValue(value) {
815
- return isRecordLike$3(value) && value[serializedCacheValueMarker] === jsonSafeCacheValueVersion && typeof value.type === "string";
880
+ return isRecordLike$3(value) && serializationMarkerValue(value) === jsonSafeCacheValueVersion && typeof value.type === "string";
816
881
  }
817
882
  function jsonSafeValue(type, value) {
818
883
  return value === void 0 ? {
@@ -825,32 +890,39 @@ function jsonSafeValue(type, value) {
825
890
  };
826
891
  }
827
892
  function hasSerializationMarkerKey(value) {
828
- return Object.hasOwn(value, serializedCacheValueMarker);
893
+ return Object.hasOwn(value, serializedCacheValueMarker) || Object.hasOwn(value, legacySerializedCacheValueMarker);
894
+ }
895
+ function serializationMarkerValue(value) {
896
+ return value[serializedCacheValueMarker] ?? value[legacySerializedCacheValueMarker];
829
897
  }
830
898
  /**
831
899
  * Serialize one cached value while keeping plain JSON as plain JSON.
832
900
  *
833
- * Rich runtime values use small tagged wrappers.
901
+ * Rich runtime values use small tagged wrappers. Undefined values are omitted
902
+ * by default; pass `preserveUndefined: true` to round-trip them explicitly.
834
903
  */
835
- async function serializeCacheValue(value) {
836
- return serializeJsonSafeValue(value, /* @__PURE__ */ new WeakSet(), 0);
904
+ async function serializeCacheValue(value, options = void 0) {
905
+ return serializeJsonSafeValue(value, /* @__PURE__ */ new WeakSet(), 0, normalizeCacheSerializationOptions(options));
837
906
  }
838
907
  /** Revive one cached value, while preserving legacy JSON-round-tripped data. */
839
908
  function deserializeCacheValue(value) {
840
909
  return deserializeJsonSafeValue(value);
841
910
  }
842
911
  /** Clone one value through the same serialization path used for cache data. */
843
- async function cloneCacheValue(value) {
844
- return deserializeCacheValue(await serializeCacheValue(value));
912
+ async function cloneCacheValue(value, options = void 0) {
913
+ return deserializeCacheValue(await serializeCacheValue(value, options));
914
+ }
915
+ function normalizeCacheSerializationOptions(options) {
916
+ return { preserveUndefined: options?.preserveUndefined === true };
845
917
  }
846
- async function serializeJsonSafeValue(value, refs, depth) {
847
- if (value === void 0) return jsonSafeValue("Undefined");
918
+ async function serializeJsonSafeValue(value, refs, depth, config) {
919
+ if (value === void 0) return config.preserveUndefined ? jsonSafeValue("Undefined") : void 0;
848
920
  if (typeof value === "bigint") return jsonSafeValue("BigInt", value.toString());
849
921
  if (typeof value === "number") return serializeNumber(value);
850
922
  if (typeof value === "string") return serializeString(value, depth);
851
923
  if (value instanceof Date) return jsonSafeValue("Date", value.toISOString());
852
- if (value instanceof Map) return serializeMap(value, refs, depth);
853
- if (value instanceof Set) return serializeSet(value, refs, depth);
924
+ if (value instanceof Map) return serializeMap(value, refs, depth, config);
925
+ if (value instanceof Set) return serializeSet(value, refs, depth, config);
854
926
  if (value instanceof RegExp) return jsonSafeValue("RegExp", {
855
927
  flags: value.flags,
856
928
  source: value.source
@@ -869,7 +941,7 @@ async function serializeJsonSafeValue(value, refs, depth) {
869
941
  type: value.type
870
942
  });
871
943
  if (value instanceof ArrayBuffer) return jsonSafeValue("ArrayBuffer", bytesToBase64(new Uint8Array(value)));
872
- if (value instanceof Error) return serializeError(value, refs, depth);
944
+ if (value instanceof Error) return serializeError(value, refs, depth, config);
873
945
  if (!value || typeof value !== "object") return value;
874
946
  if (refs.has(value)) throw new Error("Circular cache values are not supported");
875
947
  refs.add(value);
@@ -882,12 +954,18 @@ async function serializeJsonSafeValue(value, refs, depth) {
882
954
  }
883
955
  }
884
956
  const items = [];
885
- for (const item of value) items.push(await serializeJsonSafeValue(item, refs, depth + 1));
957
+ for (const item of value) {
958
+ const serializedItem = await serializeJsonSafeValue(item, refs, depth + 1, config);
959
+ if (serializedItem !== void 0) items.push(serializedItem);
960
+ }
886
961
  refs.delete(value);
887
962
  return compressNestedJsonValue(items, depth) ?? items;
888
963
  }
889
964
  const entries = [];
890
- for (const [key, entryValue] of Object.entries(value)) entries.push([key, await serializeJsonSafeValue(entryValue, refs, depth + 1)]);
965
+ for (const [key, entryValue] of Object.entries(value)) {
966
+ const serializedEntryValue = await serializeJsonSafeValue(entryValue, refs, depth + 1, config);
967
+ if (serializedEntryValue !== void 0) entries.push([key, serializedEntryValue]);
968
+ }
891
969
  refs.delete(value);
892
970
  const serialized = hasSerializationMarkerKey(value) ? jsonSafeValue("Object", entries) : Object.fromEntries(entries);
893
971
  return compressNestedJsonValue(serialized, depth) ?? serialized;
@@ -957,32 +1035,40 @@ function compressNestedJsonValue(value, depth) {
957
1035
  function compressionIsWorthIt(value, rawSize) {
958
1036
  return Buffer$1.byteLength(JSON.stringify(value)) < rawSize * maxCompressedSizeRatio;
959
1037
  }
960
- async function serializeMap(value, refs, depth) {
1038
+ async function serializeMap(value, refs, depth, config) {
961
1039
  if (refs.has(value)) throw new Error("Circular cache values are not supported");
962
1040
  refs.add(value);
963
1041
  const entries = [];
964
- for (const [key, entryValue] of value.entries()) entries.push([await serializeJsonSafeValue(key, refs, depth + 1), await serializeJsonSafeValue(entryValue, refs, depth + 1)]);
1042
+ for (const [key, entryValue] of value.entries()) {
1043
+ const serializedKey = await serializeJsonSafeValue(key, refs, depth + 1, config);
1044
+ const serializedEntryValue = await serializeJsonSafeValue(entryValue, refs, depth + 1, config);
1045
+ if (serializedKey !== void 0 && serializedEntryValue !== void 0) entries.push([serializedKey, serializedEntryValue]);
1046
+ }
965
1047
  refs.delete(value);
966
1048
  return jsonSafeValue("Map", entries);
967
1049
  }
968
- async function serializeSet(value, refs, depth) {
1050
+ async function serializeSet(value, refs, depth, config) {
969
1051
  if (refs.has(value)) throw new Error("Circular cache values are not supported");
970
1052
  refs.add(value);
971
1053
  const items = [];
972
- for (const item of value.values()) items.push(await serializeJsonSafeValue(item, refs, depth + 1));
1054
+ for (const item of value.values()) {
1055
+ const serializedItem = await serializeJsonSafeValue(item, refs, depth + 1, config);
1056
+ if (serializedItem !== void 0) items.push(serializedItem);
1057
+ }
973
1058
  refs.delete(value);
974
1059
  return jsonSafeValue("Set", items);
975
1060
  }
976
- async function serializeError(value, refs, depth) {
1061
+ async function serializeError(value, refs, depth, config) {
977
1062
  if (refs.has(value)) throw new Error("Circular cache values are not supported");
978
1063
  refs.add(value);
979
1064
  const props = [];
980
1065
  for (const [key, entryValue] of Object.entries(value)) {
981
1066
  if (key === "cause") continue;
982
- props.push([key, await serializeJsonSafeValue(entryValue, refs, depth + 1)]);
1067
+ const serializedEntryValue = await serializeJsonSafeValue(entryValue, refs, depth + 1, config);
1068
+ if (serializedEntryValue !== void 0) props.push([key, serializedEntryValue]);
983
1069
  }
984
1070
  const serialized = jsonSafeValue("Error", {
985
- cause: "cause" in value ? await serializeJsonSafeValue(value.cause, refs, depth + 1) : void 0,
1071
+ cause: "cause" in value ? await serializeJsonSafeValue(value.cause, refs, depth + 1, config) : void 0,
986
1072
  message: value.message,
987
1073
  name: value.name,
988
1074
  props,
@@ -1123,33 +1209,36 @@ function deserializeError(value) {
1123
1209
  });
1124
1210
  return error;
1125
1211
  }
1126
- async function serializeRecordValues(record) {
1212
+ async function serializeRecordValues(record, config) {
1127
1213
  const entries = [];
1128
- for (const [key, value] of Object.entries(record)) entries.push([key, await serializeCacheValue(value)]);
1214
+ for (const [key, value] of Object.entries(record)) {
1215
+ const serializedValue = await serializeCacheValue(value, config);
1216
+ if (serializedValue !== void 0) entries.push([key, serializedValue]);
1217
+ }
1129
1218
  return Object.fromEntries(entries);
1130
1219
  }
1131
1220
  function deserializeRecordValues(record) {
1132
1221
  return Object.fromEntries(Object.entries(record).map(([key, value]) => [key, deserializeCacheValue(value)]));
1133
1222
  }
1134
- async function serializeCacheRecordingOp(op) {
1223
+ async function serializeCacheRecordingOp(op, config) {
1135
1224
  switch (op.kind) {
1136
1225
  case "setOutput":
1137
1226
  case "appendOutput": return {
1138
1227
  ...op,
1139
- value: await serializeCacheValue(op.value)
1228
+ value: await serializeCacheValue(op.value, config)
1140
1229
  };
1141
1230
  case "mergeOutput": return {
1142
1231
  ...op,
1143
- patch: await serializeRecordValues(op.patch)
1232
+ patch: await serializeRecordValues(op.patch, config)
1144
1233
  };
1145
1234
  case "incrementOutput": return op;
1146
1235
  case "checkpoint": return {
1147
1236
  ...op,
1148
- data: await serializeCacheValue(op.data)
1237
+ data: await serializeCacheValue(op.data, config)
1149
1238
  };
1150
1239
  case "subSpan": return {
1151
1240
  ...op,
1152
- span: await serializeCacheSpan(op.span)
1241
+ span: await serializeCacheSpan(op.span, config)
1153
1242
  };
1154
1243
  }
1155
1244
  }
@@ -1175,11 +1264,11 @@ function deserializeCacheRecordingOp(op) {
1175
1264
  };
1176
1265
  }
1177
1266
  }
1178
- async function serializeCacheSpan(span) {
1267
+ async function serializeCacheSpan(span, config) {
1179
1268
  return {
1180
1269
  ...span,
1181
- attributes: span.attributes === void 0 ? void 0 : await serializeRecordValues(span.attributes),
1182
- children: await Promise.all(span.children.map(serializeCacheSpan))
1270
+ attributes: span.attributes === void 0 ? void 0 : await serializeRecordValues(span.attributes, config),
1271
+ children: await Promise.all(span.children.map((child) => serializeCacheSpan(child, config)))
1183
1272
  };
1184
1273
  }
1185
1274
  function deserializeCacheSpan(span) {
@@ -1189,13 +1278,19 @@ function deserializeCacheSpan(span) {
1189
1278
  children: span.children.map(deserializeCacheSpan)
1190
1279
  };
1191
1280
  }
1192
- /** Serialize all rich values captured in a cache recording before persistence. */
1193
- async function serializeCacheRecording(recording) {
1281
+ /**
1282
+ * Serialize all rich values captured in a cache recording before persistence.
1283
+ *
1284
+ * Undefined values are omitted by default; pass `preserveUndefined: true` to
1285
+ * retain the legacy explicit undefined wrappers in the recording payload.
1286
+ */
1287
+ async function serializeCacheRecording(recording, options = void 0) {
1288
+ const config = normalizeCacheSerializationOptions(options);
1194
1289
  return {
1195
1290
  ...recording,
1196
- returnValue: await serializeCacheValue(recording.returnValue),
1197
- finalAttributes: await serializeRecordValues(recording.finalAttributes),
1198
- ops: await Promise.all(recording.ops.map(serializeCacheRecordingOp))
1291
+ returnValue: await serializeCacheValue(recording.returnValue, config),
1292
+ finalAttributes: await serializeRecordValues(recording.finalAttributes, config),
1293
+ ops: await Promise.all(recording.ops.map((op) => serializeCacheRecordingOp(op, config)))
1199
1294
  };
1200
1295
  }
1201
1296
  /** Revive all rich values captured in a cache recording after lookup. */
@@ -1587,7 +1682,9 @@ function createTraceCache(generateSpanId) {
1587
1682
  key: info.key
1588
1683
  }, { serializeFileBytes: info.serializeFileBytes === true });
1589
1684
  const activeSpan = scope.activeSpanStack.at(-1);
1590
- if (cacheCtx.mode === "use") {
1685
+ const canRead = cacheCtx.mode === "use" && cacheCtx.read !== false;
1686
+ const canStore = cacheCtx.mode !== "bypass" && cacheCtx.store !== false;
1687
+ if (canRead) {
1591
1688
  const hit = await cacheCtx.adapter.lookup(namespace, keyHash);
1592
1689
  if (hit) {
1593
1690
  const storedAt = hit.storedAt;
@@ -1610,14 +1707,24 @@ function createTraceCache(generateSpanId) {
1610
1707
  name: info.name,
1611
1708
  namespace,
1612
1709
  key: keyHash,
1613
- status: "miss"
1710
+ status: "miss",
1711
+ ...canStore ? {} : { stored: false }
1614
1712
  });
1615
- } else if (cacheCtx.mode === "refresh") recordCacheRef(scope, activeSpan, {
1713
+ } else if (cacheCtx.mode === "use" && canStore) recordCacheRef(scope, activeSpan, {
1714
+ type: "value",
1715
+ name: info.name,
1716
+ namespace,
1717
+ key: keyHash,
1718
+ status: "miss",
1719
+ read: false
1720
+ });
1721
+ else if (cacheCtx.mode === "refresh") recordCacheRef(scope, activeSpan, {
1616
1722
  type: "value",
1617
1723
  name: info.name,
1618
1724
  namespace,
1619
1725
  key: keyHash,
1620
- status: "refresh"
1726
+ status: "refresh",
1727
+ ...canStore ? {} : { stored: false }
1621
1728
  });
1622
1729
  else recordCacheRef(scope, activeSpan, {
1623
1730
  type: "value",
@@ -1640,7 +1747,7 @@ function createTraceCache(generateSpanId) {
1640
1747
  scope.recordingStack.pop();
1641
1748
  }
1642
1749
  appendSubSpanOps(scope, frame);
1643
- if (cacheCtx.mode !== "bypass") {
1750
+ if (canStore) {
1644
1751
  const finalAttributes = diffNonCacheAttributes(beforeAttributes, await snapshotNonCacheAttributes(activeSpan));
1645
1752
  const recording = {
1646
1753
  returnValue: bodyResult,
@@ -1654,13 +1761,11 @@ function createTraceCache(generateSpanId) {
1654
1761
  operationType: "value",
1655
1762
  operationName: info.name,
1656
1763
  storedAt: new Date(getRealDateNowMs()).toISOString(),
1657
- codeFingerprint: cacheCtx.codeFingerprint,
1658
1764
  recording: await serializeCacheRecording(recording)
1659
1765
  }, {
1660
1766
  rawKey: info.key,
1661
1767
  operationType: "value",
1662
- operationName: info.name,
1663
- codeFingerprint: cacheCtx.codeFingerprint
1768
+ operationName: info.name
1664
1769
  });
1665
1770
  }
1666
1771
  return bodyResult;
@@ -2031,11 +2136,13 @@ async function traceSpanInternal(info, fn) {
2031
2136
  namespace,
2032
2137
  key: cacheOpts.key
2033
2138
  }, { serializeFileBytes: cacheOpts.serializeFileBytes === true });
2139
+ const canRead = ctx.mode === "use" && ctx.read !== false;
2140
+ const canStore = ctx.mode !== "bypass" && ctx.store !== false;
2034
2141
  mergeSpanAttributes(spanRecord, {
2035
2142
  "cache.key": keyHash,
2036
2143
  "cache.namespace": namespace
2037
2144
  });
2038
- if (ctx.mode === "use") {
2145
+ if (canRead) {
2039
2146
  const hit = await ctx.adapter.lookup(namespace, keyHash);
2040
2147
  if (hit) {
2041
2148
  const storedAt = hit.storedAt;
@@ -2050,8 +2157,18 @@ async function traceSpanInternal(info, fn) {
2050
2157
  spanRecord.endedAt = addElapsedMsToTimestamp(spanRecord.startedAt, getRealDateNowMs() - realStartedAt);
2051
2158
  return recording.returnValue;
2052
2159
  }
2053
- mergeSpanAttributes(spanRecord, { "cache.status": "miss" });
2054
- } else if (ctx.mode === "refresh") mergeSpanAttributes(spanRecord, { "cache.status": "refresh" });
2160
+ mergeSpanAttributes(spanRecord, {
2161
+ "cache.status": "miss",
2162
+ ...canStore ? {} : { "cache.stored": false }
2163
+ });
2164
+ } else if (ctx.mode === "use" && canStore) mergeSpanAttributes(spanRecord, {
2165
+ "cache.status": "miss",
2166
+ "cache.read": false
2167
+ });
2168
+ else if (ctx.mode === "refresh") mergeSpanAttributes(spanRecord, {
2169
+ "cache.status": "refresh",
2170
+ ...canStore ? {} : { "cache.stored": false }
2171
+ });
2055
2172
  else mergeSpanAttributes(spanRecord, { "cache.status": "bypass" });
2056
2173
  const frame = {
2057
2174
  baseSpanIndex: scope.spans.length,
@@ -2067,7 +2184,7 @@ async function traceSpanInternal(info, fn) {
2067
2184
  }
2068
2185
  appendSubSpanOps(scope, frame);
2069
2186
  finishSpanWithoutThrownError(spanRecord, realStartedAt);
2070
- if (ctx.mode !== "bypass") {
2187
+ if (canStore) {
2071
2188
  const recording = {
2072
2189
  returnValue: bodyResult,
2073
2190
  finalAttributes: stripCacheAttributes(spanRecord.attributes),
@@ -2087,14 +2204,12 @@ async function traceSpanInternal(info, fn) {
2087
2204
  spanName: info.name,
2088
2205
  spanKind: info.kind,
2089
2206
  storedAt: new Date(getRealDateNowMs()).toISOString(),
2090
- codeFingerprint: ctx.codeFingerprint,
2091
2207
  recording: await serializeCacheRecording(recording)
2092
2208
  };
2093
2209
  await ctx.adapter.write(entry, {
2094
2210
  rawKey: cacheOpts.key,
2095
2211
  operationType: "span",
2096
- operationName: info.name,
2097
- codeFingerprint: ctx.codeFingerprint
2212
+ operationName: info.name
2098
2213
  });
2099
2214
  }
2100
2215
  return bodyResult;
@@ -2287,6 +2402,7 @@ const columnDefSchema = z.object({
2287
2402
  passThreshold: z.number().optional(),
2288
2403
  maxStars: z.number().int().min(2).optional(),
2289
2404
  hideInTable: z.boolean().optional(),
2405
+ hideIfNoValue: z.boolean().optional(),
2290
2406
  align: z.enum([
2291
2407
  "left",
2292
2408
  "center",
@@ -2430,6 +2546,10 @@ const traceCacheRefSchema = z.object({
2430
2546
  namespace: z.string(),
2431
2547
  key: z.string(),
2432
2548
  status: cacheStatusSchema,
2549
+ /** Whether this ref attempted to read from cache. Defaults to true. */
2550
+ read: z.boolean().optional(),
2551
+ /** Whether this ref wrote a persisted cache entry. Defaults to true for misses/refreshes. */
2552
+ stored: z.boolean().optional(),
2433
2553
  storedAt: z.string().optional(),
2434
2554
  age: z.number().optional()
2435
2555
  });
@@ -2442,7 +2562,6 @@ const cacheListItemSchema = z.object({
2442
2562
  spanName: z.string().optional(),
2443
2563
  spanKind: traceSpanKindSchema.optional(),
2444
2564
  storedAt: z.string(),
2445
- codeFingerprint: z.string(),
2446
2565
  sizeBytes: z.number()
2447
2566
  });
2448
2567
  /** Zod schema for `SerializedCacheSpan`, defined lazily for recursion. */
@@ -2524,7 +2643,6 @@ const cacheEntrySchema = z.object({
2524
2643
  spanName: z.string().optional(),
2525
2644
  spanKind: traceSpanKindSchema.optional(),
2526
2645
  storedAt: z.string(),
2527
- codeFingerprint: z.string(),
2528
2646
  recording: cacheRecordingSchema
2529
2647
  });
2530
2648
  /** Debug-only raw key metadata stored outside the reusable cache entry. */
@@ -2535,7 +2653,6 @@ const cacheDebugKeyEntrySchema = z.object({
2535
2653
  operationType: cacheOperationTypeSchema,
2536
2654
  operationName: z.string(),
2537
2655
  storedAt: z.string(),
2538
- codeFingerprint: z.string(),
2539
2656
  rawKey: z.unknown()
2540
2657
  });
2541
2658
  /** Cache lookup response with optional debug-only raw key data. */
@@ -2627,6 +2744,16 @@ const evalChartTooltipExtraSchema = z.discriminatedUnion("source", [z.object({
2627
2744
  const evalChartConfigSchema = z.object({
2628
2745
  /** Optional heading shown above the chart frame in the UI. */
2629
2746
  heading: z.string().optional(),
2747
+ /**
2748
+ * Hide this chart in the UI when none of its metrics has a numeric value in
2749
+ * the rendered history window.
2750
+ */
2751
+ hideIfNoValue: z.boolean().optional(),
2752
+ /**
2753
+ * Drop consecutive history points whose plotted metrics and tooltip extras
2754
+ * have the same values as the previous kept point.
2755
+ */
2756
+ dedupeConsecutiveValues: z.boolean().optional(),
2630
2757
  type: evalChartTypeSchema,
2631
2758
  /** At least one series must be declared. */
2632
2759
  metrics: z.array(evalChartMetricSchema).min(1),
@@ -2652,6 +2779,122 @@ const evalChartConfigSchema = z.object({
2652
2779
  */
2653
2780
  const evalChartsConfigSchema = z.array(evalChartConfigSchema);
2654
2781
  //#endregion
2782
+ //#region ../shared/src/schemas/manualInput.ts
2783
+ /**
2784
+ * Common metadata shared by every manual-input field descriptor exposed to
2785
+ * the web UI. The runner builds these from the eval's authored Zod schema and
2786
+ * any per-field overrides, so the client never needs the schema itself.
2787
+ */
2788
+ const manualInputFieldBaseSchema = z.object({
2789
+ /** Top-level key on the eval input object that this field writes to. */
2790
+ key: z.string(),
2791
+ /** Human-readable label rendered next to the field in the modal. */
2792
+ label: z.string(),
2793
+ /** Optional helper text rendered under the label. */
2794
+ description: z.string().optional(),
2795
+ /** Optional placeholder rendered inside the input element. */
2796
+ placeholder: z.string().optional(),
2797
+ /** Whether the field must be filled before the run can be submitted. */
2798
+ required: z.boolean(),
2799
+ /**
2800
+ * Default value used to prefill the field. Type matches the underlying
2801
+ * widget kind (`string` for text/multiline/select, `number` for number,
2802
+ * `boolean` for boolean, JSON-serialisable for `json`).
2803
+ */
2804
+ defaultValue: z.unknown().optional()
2805
+ });
2806
+ /** One option rendered by the `select` widget. */
2807
+ const manualInputSelectOptionSchema = z.object({
2808
+ value: z.string(),
2809
+ label: z.string()
2810
+ });
2811
+ /** Single line text widget descriptor. */
2812
+ const manualInputTextFieldSchema = manualInputFieldBaseSchema.extend({
2813
+ kind: z.literal("text"),
2814
+ /** Optional minimum character length enforced client-side. */
2815
+ minLength: z.number().int().min(0).optional(),
2816
+ /** Optional maximum character length enforced client-side. */
2817
+ maxLength: z.number().int().min(0).optional()
2818
+ });
2819
+ /** Multi-line textarea widget descriptor. */
2820
+ const manualInputMultilineFieldSchema = manualInputFieldBaseSchema.extend({
2821
+ kind: z.literal("multiline"),
2822
+ /** Optional minimum character length enforced client-side. */
2823
+ minLength: z.number().int().min(0).optional(),
2824
+ /** Optional maximum character length enforced client-side. */
2825
+ maxLength: z.number().int().min(0).optional(),
2826
+ /** Suggested number of visible textarea rows; UI may clamp this. */
2827
+ rows: z.number().int().min(1).optional()
2828
+ });
2829
+ /** Numeric input widget descriptor. */
2830
+ const manualInputNumberFieldSchema = manualInputFieldBaseSchema.extend({
2831
+ kind: z.literal("number"),
2832
+ /** Optional inclusive lower bound. */
2833
+ min: z.number().optional(),
2834
+ /** Optional inclusive upper bound. */
2835
+ max: z.number().optional(),
2836
+ /** Optional UI step increment. */
2837
+ step: z.number().positive().optional(),
2838
+ /** Whether the value must be an integer. */
2839
+ integer: z.boolean().optional()
2840
+ });
2841
+ /** Boolean checkbox/toggle widget descriptor. */
2842
+ const manualInputBooleanFieldSchema = manualInputFieldBaseSchema.extend({ kind: z.literal("boolean") });
2843
+ /** Single-select dropdown widget descriptor. */
2844
+ const manualInputSelectFieldSchema = manualInputFieldBaseSchema.extend({
2845
+ kind: z.literal("select"),
2846
+ options: z.array(manualInputSelectOptionSchema)
2847
+ });
2848
+ /** JSON textarea widget descriptor used for nested objects, arrays, and unions. */
2849
+ const manualInputJsonFieldSchema = manualInputFieldBaseSchema.extend({
2850
+ kind: z.literal("json"),
2851
+ /** Suggested number of visible textarea rows; UI may clamp this. */
2852
+ rows: z.number().int().min(1).optional()
2853
+ });
2854
+ /**
2855
+ * File / image upload widget descriptor. The widget supports clicking to
2856
+ * pick a file, drag-and-drop onto the dropzone, and pasting an image from
2857
+ * the system clipboard. The submitted value references a staged file artifact.
2858
+ */
2859
+ const manualInputFileFieldSchema = manualInputFieldBaseSchema.extend({
2860
+ kind: z.literal("file"),
2861
+ /**
2862
+ * Browser `accept` attribute (e.g. `image/*`, `image/png,image/jpeg`,
2863
+ * `.pdf`). When omitted the picker accepts any file type.
2864
+ */
2865
+ accept: z.string().optional(),
2866
+ /** Optional client-side maximum file size in bytes. */
2867
+ maxSizeBytes: z.number().int().positive().optional()
2868
+ });
2869
+ /**
2870
+ * Discriminated union of all supported manual-input widget kinds. The web UI
2871
+ * dispatches to the matching field component based on `kind`.
2872
+ */
2873
+ const manualInputFieldDescriptorSchema = z.discriminatedUnion("kind", [
2874
+ manualInputTextFieldSchema,
2875
+ manualInputMultilineFieldSchema,
2876
+ manualInputNumberFieldSchema,
2877
+ manualInputBooleanFieldSchema,
2878
+ manualInputSelectFieldSchema,
2879
+ manualInputJsonFieldSchema,
2880
+ manualInputFileFieldSchema
2881
+ ]);
2882
+ /**
2883
+ * Wire-format descriptor attached to an `EvalSummary` when the eval declares
2884
+ * `manualInput`. Carries the ordered list of fields the modal renders and
2885
+ * basic context shown in the modal header.
2886
+ */
2887
+ const manualInputDescriptorSchema = z.object({
2888
+ /** Optional title shown in the modal header. Defaults to the eval title. */
2889
+ title: z.string().optional(),
2890
+ /** Optional helper text shown above the form. */
2891
+ description: z.string().optional(),
2892
+ /** Optional submit button label. Defaults to `Run`. */
2893
+ submitLabel: z.string().optional(),
2894
+ /** Ordered list of fields rendered in the modal. */
2895
+ fields: z.array(manualInputFieldDescriptorSchema)
2896
+ });
2897
+ //#endregion
2655
2898
  //#region ../shared/src/schemas/eval.ts
2656
2899
  /** Freshness signal derived from the latest relevant run plus git state. */
2657
2900
  const evalFreshnessStatusSchema = z.enum([
@@ -2667,17 +2910,31 @@ const evalStatAggregateSchema = z.enum([
2667
2910
  "sum",
2668
2911
  "last"
2669
2912
  ]);
2913
+ const hideIfNoValueShape = {
2914
+ /**
2915
+ * Hide this stat in the UI when the current run has no displayable value.
2916
+ * Missing values, `null`, and empty strings count as no value; `0` remains
2917
+ * visible.
2918
+ */
2919
+ hideIfNoValue: z.boolean().optional() };
2670
2920
  /**
2671
2921
  * One entry in the EvalCard stats row. Built-in kinds use latest run totals;
2672
2922
  * `column` aggregates a score or numeric output column across the latest run.
2673
2923
  */
2674
2924
  const evalStatItemSchema = z.discriminatedUnion("kind", [
2675
- z.object({ kind: z.literal("cases") }),
2925
+ z.object({
2926
+ kind: z.literal("cases"),
2927
+ ...hideIfNoValueShape
2928
+ }),
2676
2929
  z.object({
2677
2930
  kind: z.literal("passRate"),
2678
- accent: z.boolean().optional()
2931
+ accent: z.boolean().optional(),
2932
+ ...hideIfNoValueShape
2933
+ }),
2934
+ z.object({
2935
+ kind: z.literal("duration"),
2936
+ ...hideIfNoValueShape
2679
2937
  }),
2680
- z.object({ kind: z.literal("duration") }),
2681
2938
  z.object({
2682
2939
  kind: z.literal("column"),
2683
2940
  key: z.string(),
@@ -2686,7 +2943,8 @@ const evalStatItemSchema = z.discriminatedUnion("kind", [
2686
2943
  format: columnFormatSchema.optional(),
2687
2944
  /** Number presentation options applied when `format: 'number'`. */
2688
2945
  numberFormat: numberDisplayOptionsSchema.optional(),
2689
- accent: z.boolean().optional()
2946
+ accent: z.boolean().optional(),
2947
+ ...hideIfNoValueShape
2690
2948
  })
2691
2949
  ]);
2692
2950
  /** Ordered list of stats rendered in the EvalCard stats row. */
@@ -2734,7 +2992,13 @@ const evalSummarySchema = z.object({
2734
2992
  * Ordered per-eval history chart configuration for the EvalCard. Opt-in:
2735
2993
  * when omitted or empty, the UI renders no history chart at all.
2736
2994
  */
2737
- charts: evalChartsConfigSchema.optional()
2995
+ charts: evalChartsConfigSchema.optional(),
2996
+ /**
2997
+ * Manual-input form descriptor when the eval declares `manualInput`. The
2998
+ * web UI renders these fields in a modal before kicking off a run; the
2999
+ * runner consumes the validated values as the case input.
3000
+ */
3001
+ manualInput: manualInputDescriptorSchema.optional()
2738
3002
  });
2739
3003
  /** Schema for one case row in an eval run result table. */
2740
3004
  const caseRowSchema = z.object({
@@ -2767,6 +3031,12 @@ const caseRowSchema = z.object({
2767
3031
  });
2768
3032
  /** Structured assertion failure metadata captured for one case run. */
2769
3033
  const assertionFailureSchema = z.object({
3034
+ /**
3035
+ * Error class or category label rendered alongside the message (e.g.
3036
+ * `EvalAssertionError`, `OutputsSchemaError`). Optional for legacy entries
3037
+ * and synthetic failures without an originating Error.
3038
+ */
3039
+ name: z.string().optional(),
2770
3040
  /** Human-readable assertion failure message shown in the UI and artifacts. */
2771
3041
  message: z.string(),
2772
3042
  /** Stack trace captured from the originating error when available. */
@@ -2868,7 +3138,7 @@ const caseDetailSchema = z.object({
2868
3138
  });
2869
3139
  /** Schema for discovery problems that should be shown before running evals. */
2870
3140
  const discoveryIssueSchema = z.object({
2871
- type: z.enum(["duplicate-eval-id"]),
3141
+ type: z.enum(["duplicate-eval-id", "manual-input-with-cases"]),
2872
3142
  severity: z.enum(["error"]),
2873
3143
  filePath: z.string(),
2874
3144
  evalId: z.string(),
@@ -2915,6 +3185,25 @@ const defaultConfigKeySchema = z.enum([
2915
3185
  ]);
2916
3186
  /** Removal config for built-in eval-level outputs and UI metadata. */
2917
3187
  const removeDefaultConfigSchema = z.union([z.literal(true), z.array(defaultConfigKeySchema)]);
3188
+ const evalDeriveValueFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a derive output function" });
3189
+ /** Schema for keyed or object-returning trace-derived output config. */
3190
+ const evalDeriveConfigSchema = z.union([z.custom((value) => typeof value === "function", { message: "Expected a deriveFromTracing function" }), z.record(z.string().min(1), evalDeriveValueFnSchema)]);
3191
+ /** Schema for UI overrides on derived or scored columns. */
3192
+ const evalColumnOverrideSchema = z.object({
3193
+ label: z.string().optional(),
3194
+ format: columnFormatSchema.optional(),
3195
+ numberFormat: numberDisplayOptionsSchema.optional(),
3196
+ hideInTable: z.boolean().optional(),
3197
+ hideIfNoValue: z.boolean().optional(),
3198
+ align: z.enum([
3199
+ "left",
3200
+ "center",
3201
+ "right"
3202
+ ]).optional(),
3203
+ maxStars: z.number().int().min(2).optional()
3204
+ });
3205
+ /** Schema for column override maps keyed by output or score field name. */
3206
+ const evalColumnsSchema = z.record(z.string(), evalColumnOverrideSchema);
2918
3207
  /** Render formats supported by an LLM-call metric in the UI. */
2919
3208
  const llmCallMetricFormatSchema = z.enum([
2920
3209
  "string",
@@ -2992,18 +3281,9 @@ const apiCallMetricSchema = z.object({
2992
3281
  placements: z.array(apiCallMetricPlacementSchema).nonempty().optional()
2993
3282
  });
2994
3283
  /**
2995
- * Schema for one model/provider pricing entry used to derive LLM-call costs
2996
- * from token counts.
3284
+ * Schema for pricing rates used to derive LLM-call costs from token counts.
2997
3285
  */
2998
- const llmCallPricingSchema = z.object({
2999
- /** Exact model name read from the configured `attributes.model` path. */
3000
- model: z.string().min(1),
3001
- /**
3002
- * Optional provider discriminator read from `attributes.provider`. When set,
3003
- * the entry only applies to calls from that provider; provider-specific
3004
- * entries take precedence over generic entries for the same model.
3005
- */
3006
- provider: z.string().min(1).optional(),
3286
+ const llmCallPricingRateSchema = z.object({
3007
3287
  /** USD per one million non-cached input tokens. */
3008
3288
  inputUsdPerMillion: z.number().nonnegative().optional(),
3009
3289
  /** USD per one million output tokens. */
@@ -3017,6 +3297,23 @@ const llmCallPricingSchema = z.object({
3017
3297
  /** USD per one million reasoning tokens when reported separately. */
3018
3298
  reasoningUsdPerMillion: z.number().nonnegative().optional()
3019
3299
  });
3300
+ /**
3301
+ * Schema for one model's pricing config. The object key is the exact model
3302
+ * name. Use `providers` when a model has provider-specific rates in addition
3303
+ * to, or instead of, generic model rates.
3304
+ */
3305
+ const llmCallPricingSchema = llmCallPricingRateSchema.extend({
3306
+ /**
3307
+ * Optional provider discriminator read from `attributes.provider`. When set,
3308
+ * the top-level entry only applies to calls from that provider.
3309
+ */
3310
+ provider: z.string().min(1).optional(),
3311
+ /**
3312
+ * Provider-specific pricing for the model. Provider entries take precedence
3313
+ * over generic rates for the same model.
3314
+ */
3315
+ providers: z.record(z.string().min(1), llmCallPricingRateSchema).optional()
3316
+ });
3020
3317
  /** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
3021
3318
  const llmCallsConfigSchema = z.object({
3022
3319
  /** Span kinds treated as LLM calls. Defaults to `['llm']`. */
@@ -3053,10 +3350,10 @@ const llmCallsConfigSchema = z.object({
3053
3350
  */
3054
3351
  derivedAttributes: z.record(z.string().min(1), callDerivedAttributeSchema).optional(),
3055
3352
  /**
3056
- * Model/provider pricing registry used to calculate LLM-call costs from
3057
- * token counts. Built-in LLM cost fields are only derived from this registry.
3353
+ * Model-keyed pricing registry used to calculate LLM-call costs from token
3354
+ * counts. Built-in LLM cost fields are only derived from this registry.
3058
3355
  */
3059
- pricing: z.array(llmCallPricingSchema).optional(),
3356
+ pricing: z.record(z.string().min(1), llmCallPricingSchema).optional(),
3060
3357
  /** Custom user-defined metrics surfaced on each LLM call. */
3061
3358
  metrics: z.array(llmCallMetricSchema).optional()
3062
3359
  });
@@ -3172,6 +3469,33 @@ function resolveApiCallMetric(metric) {
3172
3469
  placements: metric.placements ? [...metric.placements] : ["body"]
3173
3470
  };
3174
3471
  }
3472
+ function hasPricingRates(pricing) {
3473
+ return pricing.inputUsdPerMillion !== void 0 || pricing.outputUsdPerMillion !== void 0 || pricing.cachedInputUsdPerMillion !== void 0 || pricing.cacheCreationInputUsdPerMillion !== void 0 || pricing.cacheCreationInput1hUsdPerMillion !== void 0 || pricing.reasoningUsdPerMillion !== void 0;
3474
+ }
3475
+ function copyPricingRates(pricing) {
3476
+ return {
3477
+ inputUsdPerMillion: pricing.inputUsdPerMillion,
3478
+ outputUsdPerMillion: pricing.outputUsdPerMillion,
3479
+ cachedInputUsdPerMillion: pricing.cachedInputUsdPerMillion,
3480
+ cacheCreationInputUsdPerMillion: pricing.cacheCreationInputUsdPerMillion,
3481
+ cacheCreationInput1hUsdPerMillion: pricing.cacheCreationInput1hUsdPerMillion,
3482
+ reasoningUsdPerMillion: pricing.reasoningUsdPerMillion
3483
+ };
3484
+ }
3485
+ function resolveLlmCallPricingEntries(model, pricing) {
3486
+ const entries = [];
3487
+ if (hasPricingRates(pricing)) entries.push({
3488
+ model,
3489
+ provider: pricing.provider,
3490
+ ...copyPricingRates(pricing)
3491
+ });
3492
+ for (const [provider, providerPricing] of Object.entries(pricing.providers ?? {})) entries.push({
3493
+ model,
3494
+ provider,
3495
+ ...copyPricingRates(providerPricing)
3496
+ });
3497
+ return entries;
3498
+ }
3175
3499
  /**
3176
3500
  * Resolve the user-authored LLM-calls config to a fully-defaulted shape used
3177
3501
  * by the UI to derive the LLM calls tab.
@@ -3182,7 +3506,7 @@ function resolveApiCallMetric(metric) {
3182
3506
  * - Missing `metrics[].format` defaults to `'string'`.
3183
3507
  * - Missing `metrics[].placements` defaults to `['body']`.
3184
3508
  * - Missing `pricing` defaults to an empty registry; built-in costs are only
3185
- * derived from configured pricing and token counts.
3509
+ * derived from configured model-keyed pricing and token counts.
3186
3510
  */
3187
3511
  function resolveLlmCallsConfig(input) {
3188
3512
  return {
@@ -3193,16 +3517,7 @@ function resolveLlmCallsConfig(input) {
3193
3517
  },
3194
3518
  derivedAttributes: resolveDerivedAttributes(input?.derivedAttributes),
3195
3519
  metrics: (input?.metrics ?? []).map(resolveLlmCallMetric),
3196
- pricing: (input?.pricing ?? []).map((p) => ({
3197
- model: p.model,
3198
- provider: p.provider,
3199
- inputUsdPerMillion: p.inputUsdPerMillion,
3200
- outputUsdPerMillion: p.outputUsdPerMillion,
3201
- cachedInputUsdPerMillion: p.cachedInputUsdPerMillion,
3202
- cacheCreationInputUsdPerMillion: p.cacheCreationInputUsdPerMillion,
3203
- cacheCreationInput1hUsdPerMillion: p.cacheCreationInput1hUsdPerMillion,
3204
- reasoningUsdPerMillion: p.reasoningUsdPerMillion
3205
- }))
3520
+ pricing: Object.entries(input?.pricing ?? {}).flatMap(([model, pricing]) => resolveLlmCallPricingEntries(model, pricing))
3206
3521
  };
3207
3522
  }
3208
3523
  /**
@@ -3236,6 +3551,9 @@ const agentEvalsConfigSchema = z.object({
3236
3551
  staleAfterDays: z.number().optional(),
3237
3552
  allowCliRunAll: z.boolean().optional(),
3238
3553
  traceDisplay: traceDisplayInputConfigSchema.optional(),
3554
+ columns: evalColumnsSchema.optional(),
3555
+ deriveFromTracing: evalDeriveConfigSchema.optional(),
3556
+ stats: evalStatsConfigSchema.optional(),
3239
3557
  llmCalls: llmCallsConfigSchema.optional(),
3240
3558
  removeDefaultConfig: removeDefaultConfigSchema.optional(),
3241
3559
  apiCalls: apiCallsConfigSchema.optional(),
@@ -3888,6 +4206,11 @@ function readNumber(attributes, key) {
3888
4206
  const value = attributes[key];
3889
4207
  return typeof value === "number" && Number.isFinite(value) ? value : void 0;
3890
4208
  }
4209
+ function readBoolean(attributes, key) {
4210
+ if (!isRecord$2(attributes)) return void 0;
4211
+ const value = attributes[key];
4212
+ return typeof value === "boolean" ? value : void 0;
4213
+ }
3891
4214
  function readArray(attributes, key) {
3892
4215
  if (!isRecord$2(attributes)) return [];
3893
4216
  const value = attributes[key];
@@ -3916,12 +4239,14 @@ function extractCacheEntries(spans, caseCacheRefs) {
3916
4239
  const namespace = readString(span.attributes, "cache.namespace");
3917
4240
  if (key !== void 0 && namespace !== void 0) {
3918
4241
  const isHit = status === "hit";
4242
+ const stored = isHit ? true : readBoolean(span.attributes, "cache.stored") !== false;
3919
4243
  entries.push({
3920
4244
  id: span.id,
3921
4245
  source: "span",
3922
4246
  origin: "span",
3923
- action: isHit ? "hit" : "added",
4247
+ action: isHit ? "hit" : stored ? "added" : "notStored",
3924
4248
  status,
4249
+ stored,
3925
4250
  name: span.name,
3926
4251
  namespace,
3927
4252
  key,
@@ -3938,12 +4263,14 @@ function extractCacheEntries(spans, caseCacheRefs) {
3938
4263
  const ref = parsed.data;
3939
4264
  if (ref.status === "bypass") continue;
3940
4265
  const isHit = ref.status === "hit";
4266
+ const stored = isHit ? true : ref.stored !== false;
3941
4267
  entries.push({
3942
4268
  id: `${span.id}:value:${String(index)}`,
3943
4269
  source: "value",
3944
4270
  origin: "span",
3945
- action: isHit ? "hit" : "added",
4271
+ action: isHit ? "hit" : stored ? "added" : "notStored",
3946
4272
  status: ref.status,
4273
+ stored,
3947
4274
  name: ref.name,
3948
4275
  namespace: ref.namespace,
3949
4276
  key: ref.key,
@@ -3956,12 +4283,14 @@ function extractCacheEntries(spans, caseCacheRefs) {
3956
4283
  for (const [index, ref] of caseCacheRefs.entries()) {
3957
4284
  if (ref.status === "bypass") continue;
3958
4285
  const isHit = ref.status === "hit";
4286
+ const stored = isHit ? true : ref.stored !== false;
3959
4287
  entries.push({
3960
4288
  id: `case:value:${String(index)}`,
3961
4289
  source: "value",
3962
4290
  origin: "caseRoot",
3963
- action: isHit ? "hit" : "added",
4291
+ action: isHit ? "hit" : stored ? "added" : "notStored",
3964
4292
  status: ref.status,
4293
+ stored,
3965
4294
  name: ref.name,
3966
4295
  namespace: ref.namespace,
3967
4296
  key: ref.key,
@@ -3987,6 +4316,7 @@ function isCacheHitEntry(entry) {
3987
4316
  }
3988
4317
  z.enum([
3989
4318
  "discovery.updated",
4319
+ "config.reload",
3990
4320
  "run.started",
3991
4321
  "run.summary",
3992
4322
  "case.started",
@@ -4006,6 +4336,19 @@ const sseEnvelopeSchema = z.object({
4006
4336
  });
4007
4337
  //#endregion
4008
4338
  //#region ../shared/src/schemas/api.ts
4339
+ /** Lifecycle state for an app config reload triggered by `agent-evals.config.ts`. */
4340
+ const configReloadStatusSchema = z.enum([
4341
+ "idle",
4342
+ "pending",
4343
+ "reloading"
4344
+ ]);
4345
+ /** UI/API-visible state for config reloads in `agent-evals app`. */
4346
+ const configReloadStateSchema = z.object({
4347
+ status: configReloadStatusSchema,
4348
+ activeRunCount: z.number().int().min(0),
4349
+ lastChangedAt: z.string().nullable(),
4350
+ lastReloadedAt: z.string().nullable()
4351
+ });
4009
4352
  /** Schema for the API request that starts a new eval run. */
4010
4353
  const createRunRequestSchema = z.object({
4011
4354
  target: z.object({
@@ -4026,14 +4369,22 @@ const createRunRequestSchema = z.object({
4026
4369
  * Optional cache controls for the run. When omitted, the cache is used in
4027
4370
  * its default read-through / write-on-miss mode.
4028
4371
  */
4029
- cache: z.object({ mode: cacheModeSchema.default("use") }).optional()
4372
+ cache: z.object({ mode: cacheModeSchema.default("use") }).optional(),
4373
+ /**
4374
+ * Manual-input values keyed by eval `key` (workspace-relative file path
4375
+ * plus authored eval id). Required for any targeted eval that declares
4376
+ * `manualInput` in its definition; the server validates each entry against
4377
+ * the eval's authored Zod schema before starting the run.
4378
+ */
4379
+ manualInputs: z.record(z.string(), z.unknown()).optional()
4030
4380
  });
4031
4381
  /** Schema for updating a UI-authored manual score on one persisted case. */
4032
4382
  const updateManualScoreRequestSchema = z.object({ value: z.number().min(0).max(1).nullable() });
4033
4383
  //#endregion
4034
4384
  //#region ../runner/src/cacheStore.ts
4035
4385
  const defaultMaxEntriesPerNamespace = 100;
4036
- const cacheSerializationMarker = "__agentEvalsCacheSerialization";
4386
+ const cacheSerializationMarker = "__aecs";
4387
+ const legacyCacheSerializationMarker = "__agentEvalsCacheSerialization";
4037
4388
  const supportedCacheSerializationVersion = "json-safe-v1";
4038
4389
  /**
4039
4390
  * Create a filesystem-backed cache adapter rooted at `<workspaceRoot>/<dir>`.
@@ -4118,7 +4469,6 @@ function createFsCacheStore(options) {
4118
4469
  spanName: entry.spanName,
4119
4470
  spanKind: entry.spanKind,
4120
4471
  storedAt: entry.storedAt,
4121
- codeFingerprint: entry.codeFingerprint,
4122
4472
  sizeBytes: Buffer.byteLength(JSON.stringify(entry), "utf8")
4123
4473
  });
4124
4474
  }
@@ -4247,7 +4597,7 @@ async function readCacheFilePath(filePath) {
4247
4597
  function usesSupportedCacheSerialization(value) {
4248
4598
  if (Array.isArray(value)) return value.every(usesSupportedCacheSerialization);
4249
4599
  if (!isRecordLike(value)) return true;
4250
- if (Object.hasOwn(value, cacheSerializationMarker) && value[cacheSerializationMarker] !== supportedCacheSerializationVersion) return false;
4600
+ for (const marker of [cacheSerializationMarker, legacyCacheSerializationMarker]) if (Object.hasOwn(value, marker) && value[marker] !== supportedCacheSerializationVersion) return false;
4251
4601
  return Object.values(value).every(usesSupportedCacheSerialization);
4252
4602
  }
4253
4603
  async function writeOrRemoveCacheFile(cacheDir, cacheFile) {
@@ -4291,7 +4641,6 @@ async function writeDebugKeyEntry(params) {
4291
4641
  operationType: debugKey.operationType,
4292
4642
  operationName: debugKey.operationName,
4293
4643
  storedAt: entry.storedAt,
4294
- codeFingerprint: debugKey.codeFingerprint,
4295
4644
  rawKey: debugKey.rawKey
4296
4645
  };
4297
4646
  await writeDebugKeyFile(debugDir, {
@@ -4507,6 +4856,7 @@ function getScoreOverride(def) {
4507
4856
  format: def.format,
4508
4857
  numberFormat: def.numberFormat,
4509
4858
  hideInTable: def.hideInTable,
4859
+ hideIfNoValue: def.hideIfNoValue,
4510
4860
  align: def.align,
4511
4861
  maxStars: def.maxStars
4512
4862
  };
@@ -4519,6 +4869,7 @@ function mergeOverrides(base, override) {
4519
4869
  format: override.format ?? base.format,
4520
4870
  numberFormat: override.numberFormat ?? base.numberFormat,
4521
4871
  hideInTable: override.hideInTable ?? base.hideInTable,
4872
+ hideIfNoValue: override.hideIfNoValue ?? base.hideIfNoValue,
4522
4873
  align: override.align ?? base.align,
4523
4874
  maxStars: override.maxStars ?? base.maxStars
4524
4875
  };
@@ -4633,6 +4984,7 @@ function createColumnDef(params) {
4633
4984
  if (override?.numberFormat !== void 0) def.numberFormat = override.numberFormat;
4634
4985
  if (override?.maxStars !== void 0) def.maxStars = override.maxStars;
4635
4986
  if (override?.hideInTable !== void 0) def.hideInTable = override.hideInTable;
4987
+ if (override?.hideIfNoValue !== void 0) def.hideIfNoValue = override.hideIfNoValue;
4636
4988
  if (override?.align !== void 0) def.align = override.align;
4637
4989
  if (!isScore) return def;
4638
4990
  def.isScore = true;
@@ -4676,7 +5028,9 @@ async function loadConfig() {
4676
5028
  const configPath = resolve(process.cwd(), "agent-evals.config.ts");
4677
5029
  if (!existsSync(configPath)) return defaultConfig;
4678
5030
  try {
4679
- const imported = await import(pathToFileURL(configPath).href);
5031
+ const configUrl = pathToFileURL(configPath);
5032
+ configUrl.searchParams.set("v", randomUUID());
5033
+ const imported = await import(configUrl.href);
4680
5034
  const configModule = configModuleSchema.parse(imported);
4681
5035
  const userConfig = configModule.default ?? configModule.config;
4682
5036
  if (!userConfig) return defaultConfig;
@@ -4717,60 +5071,70 @@ const DEFAULT_COLUMNS = {
4717
5071
  label: "API Calls",
4718
5072
  format: "number",
4719
5073
  numberFormat: countNumberFormat,
4720
- align: "right"
5074
+ align: "right",
5075
+ hideIfNoValue: true
4721
5076
  },
4722
5077
  costUsd: {
4723
5078
  label: "Cost",
4724
5079
  format: "number",
4725
5080
  numberFormat: costNumberFormat,
4726
- align: "right"
5081
+ align: "right",
5082
+ hideIfNoValue: true
4727
5083
  },
4728
5084
  llmTurns: {
4729
5085
  label: "LLM Turns",
4730
5086
  format: "number",
4731
5087
  numberFormat: countNumberFormat,
4732
- align: "right"
5088
+ align: "right",
5089
+ hideIfNoValue: true
4733
5090
  },
4734
5091
  inputTokens: {
4735
5092
  label: "Input Tokens",
4736
5093
  format: "number",
4737
5094
  numberFormat: tokenNumberFormat,
4738
- align: "right"
5095
+ align: "right",
5096
+ hideIfNoValue: true
4739
5097
  },
4740
5098
  outputTokens: {
4741
5099
  label: "Output Tokens",
4742
5100
  format: "number",
4743
5101
  numberFormat: tokenNumberFormat,
4744
- align: "right"
5102
+ align: "right",
5103
+ hideIfNoValue: true
4745
5104
  },
4746
5105
  totalTokens: {
4747
5106
  label: "Total Tokens",
4748
5107
  format: "number",
4749
5108
  numberFormat: tokenNumberFormat,
4750
- align: "right"
5109
+ align: "right",
5110
+ hideIfNoValue: true
4751
5111
  },
4752
5112
  cachedInputTokens: {
4753
5113
  label: "Cached Input Tokens",
4754
5114
  format: "number",
4755
5115
  numberFormat: tokenNumberFormat,
4756
- align: "right"
5116
+ align: "right",
5117
+ hideIfNoValue: true
4757
5118
  },
4758
5119
  cacheCreationInputTokens: {
4759
5120
  label: "Cache Write Tokens",
4760
5121
  format: "number",
4761
5122
  numberFormat: tokenNumberFormat,
4762
- align: "right"
5123
+ align: "right",
5124
+ hideIfNoValue: true
4763
5125
  },
4764
5126
  reasoningTokens: {
4765
5127
  label: "Reasoning Tokens",
4766
5128
  format: "number",
4767
5129
  numberFormat: tokenNumberFormat,
4768
- align: "right"
5130
+ align: "right",
5131
+ hideIfNoValue: true
4769
5132
  },
4770
5133
  llmDurationMs: {
4771
5134
  label: "LLM Duration",
4772
5135
  format: "duration",
4773
- align: "right"
5136
+ align: "right",
5137
+ hideIfNoValue: true
4774
5138
  }
4775
5139
  };
4776
5140
  function resolveRemovedKeys(globalRemove, evalRemove) {
@@ -4783,9 +5147,16 @@ function getActiveDefaultConfigKeys(params) {
4783
5147
  }
4784
5148
  function mergeDefaultColumns(params) {
4785
5149
  const activeKeys = getActiveDefaultConfigKeys(params);
4786
- if (activeKeys.length === 0) return params.columns;
5150
+ if (activeKeys.length === 0) {
5151
+ const merged = {
5152
+ ...params.globalColumns,
5153
+ ...params.columns
5154
+ };
5155
+ return Object.keys(merged).length > 0 ? merged : void 0;
5156
+ }
4787
5157
  return {
4788
5158
  ...Object.fromEntries(activeKeys.map((key) => [key, DEFAULT_COLUMNS[key]])),
5159
+ ...params.globalColumns,
4789
5160
  ...params.columns
4790
5161
  };
4791
5162
  }
@@ -4797,30 +5168,38 @@ function appendDefaultStats(params) {
4797
5168
  key: "apiCalls",
4798
5169
  label: "API Calls",
4799
5170
  aggregate: "avg",
4800
- numberFormat: countNumberFormat
5171
+ numberFormat: countNumberFormat,
5172
+ hideIfNoValue: true
4801
5173
  });
4802
5174
  if (activeKeys.has("costUsd")) defaults.push({
4803
5175
  kind: "column",
4804
5176
  key: "costUsd",
4805
5177
  label: "LLM Cost",
4806
5178
  aggregate: "avg",
4807
- numberFormat: costNumberFormat
5179
+ numberFormat: costNumberFormat,
5180
+ hideIfNoValue: true
4808
5181
  });
4809
5182
  if (activeKeys.has("totalTokens")) defaults.push({
4810
5183
  kind: "column",
4811
5184
  key: "totalTokens",
4812
5185
  label: "Tokens",
4813
5186
  aggregate: "avg",
4814
- numberFormat: tokenNumberFormat
5187
+ numberFormat: tokenNumberFormat,
5188
+ hideIfNoValue: true
4815
5189
  });
4816
5190
  if (activeKeys.has("llmTurns")) defaults.push({
4817
5191
  kind: "column",
4818
5192
  key: "llmTurns",
4819
5193
  label: "LLM Turns",
4820
5194
  aggregate: "avg",
4821
- numberFormat: countNumberFormat
5195
+ numberFormat: countNumberFormat,
5196
+ hideIfNoValue: true
4822
5197
  });
4823
- const merged = [...params.stats ?? [], ...defaults];
5198
+ const merged = [
5199
+ ...params.globalStats ?? [],
5200
+ ...params.stats ?? [],
5201
+ ...defaults
5202
+ ];
4824
5203
  return merged.length > 0 ? merged : void 0;
4825
5204
  }
4826
5205
  function appendDefaultCharts(params) {
@@ -4828,6 +5207,8 @@ function appendDefaultCharts(params) {
4828
5207
  const defaults = [];
4829
5208
  if (activeKeys.has("costUsd")) defaults.push({
4830
5209
  heading: "LLM Cost",
5210
+ hideIfNoValue: true,
5211
+ dedupeConsecutiveValues: true,
4831
5212
  type: "area",
4832
5213
  metrics: [{
4833
5214
  source: "column",
@@ -4837,7 +5218,7 @@ function appendDefaultCharts(params) {
4837
5218
  color: "warning"
4838
5219
  }]
4839
5220
  });
4840
- const tokenMetrics = [
5221
+ const inputTokenMetrics = [
4841
5222
  activeKeys.has("inputTokens") ? {
4842
5223
  source: "column",
4843
5224
  key: "inputTokens",
@@ -4845,13 +5226,6 @@ function appendDefaultCharts(params) {
4845
5226
  label: "Input",
4846
5227
  color: "accent"
4847
5228
  } : null,
4848
- activeKeys.has("outputTokens") ? {
4849
- source: "column",
4850
- key: "outputTokens",
4851
- aggregate: "avg",
4852
- label: "Output",
4853
- color: "success"
4854
- } : null,
4855
5229
  activeKeys.has("cachedInputTokens") ? {
4856
5230
  source: "column",
4857
5231
  key: "cachedInputTokens",
@@ -4867,16 +5241,25 @@ function appendDefaultCharts(params) {
4867
5241
  color: "warning"
4868
5242
  } : null
4869
5243
  ].filter((metric) => metric !== null);
4870
- if (tokenMetrics.length > 0) defaults.push({
4871
- heading: "LLM Tokens",
5244
+ if (inputTokenMetrics.length > 0) defaults.push({
5245
+ heading: "LLM Input Tokens",
5246
+ hideIfNoValue: true,
5247
+ dedupeConsecutiveValues: true,
4872
5248
  type: "bar",
4873
- metrics: tokenMetrics,
4874
- tooltipExtras: activeKeys.has("totalTokens") ? [{
5249
+ metrics: inputTokenMetrics
5250
+ });
5251
+ if (activeKeys.has("outputTokens")) defaults.push({
5252
+ heading: "LLM Output Tokens",
5253
+ hideIfNoValue: true,
5254
+ dedupeConsecutiveValues: true,
5255
+ type: "bar",
5256
+ metrics: [{
4875
5257
  source: "column",
4876
- key: "totalTokens",
5258
+ key: "outputTokens",
4877
5259
  aggregate: "avg",
4878
- label: "Total"
4879
- }] : void 0
5260
+ label: "Output",
5261
+ color: "success"
5262
+ }]
4880
5263
  });
4881
5264
  const merged = [...params.charts ?? [], ...defaults];
4882
5265
  return merged.length > 0 ? merged : void 0;
@@ -4885,11 +5268,13 @@ function resolveEvalDefaultConfig(params) {
4885
5268
  const evalRemove = params.evalDef.removeDefaultConfig;
4886
5269
  return {
4887
5270
  columns: mergeDefaultColumns({
5271
+ globalColumns: params.globalColumns,
4888
5272
  columns: params.evalDef.columns,
4889
5273
  globalRemove: params.globalRemove,
4890
5274
  evalRemove
4891
5275
  }),
4892
5276
  stats: appendDefaultStats({
5277
+ globalStats: params.globalStats,
4893
5278
  stats: params.evalDef.stats,
4894
5279
  globalRemove: params.globalRemove,
4895
5280
  evalRemove
@@ -5144,6 +5529,371 @@ function getRunFreshnessTimestamp(manifest) {
5144
5529
  return manifest.endedAt ?? manifest.startedAt;
5145
5530
  }
5146
5531
  //#endregion
5532
+ //#region ../runner/src/manualInput/walker.ts
5533
+ function isObject(value) {
5534
+ return typeof value === "object" && value !== null;
5535
+ }
5536
+ function getZodDef(schema) {
5537
+ if (!isObject(schema)) return null;
5538
+ const zodHolder = schema._zod;
5539
+ if (!isObject(zodHolder)) return null;
5540
+ const def = zodHolder.def;
5541
+ if (!isObject(def)) return null;
5542
+ if (typeof def.type !== "string") return null;
5543
+ return {
5544
+ ...def,
5545
+ type: def.type
5546
+ };
5547
+ }
5548
+ function getDescription(schema) {
5549
+ if (!isObject(schema)) return void 0;
5550
+ const description = schema.description;
5551
+ return typeof description === "string" ? description : void 0;
5552
+ }
5553
+ function getInnerSchema(def) {
5554
+ return def.innerType;
5555
+ }
5556
+ function getChecks(def) {
5557
+ const checks = def.checks;
5558
+ if (!Array.isArray(checks)) return [];
5559
+ const out = [];
5560
+ for (const check of checks) {
5561
+ if (!isObject(check)) continue;
5562
+ const zodHolder = check._zod;
5563
+ if (!isObject(zodHolder)) continue;
5564
+ const checkDef = zodHolder.def;
5565
+ if (!isObject(checkDef)) continue;
5566
+ if (typeof checkDef.check !== "string") continue;
5567
+ out.push({
5568
+ ...checkDef,
5569
+ check: checkDef.check
5570
+ });
5571
+ }
5572
+ return out;
5573
+ }
5574
+ function findCheck(checks, name) {
5575
+ return checks.find((check) => check.check === name);
5576
+ }
5577
+ function unwrap(schema) {
5578
+ let current = schema;
5579
+ let required = true;
5580
+ let defaultValue = void 0;
5581
+ for (let depth = 0; depth < 8; depth += 1) {
5582
+ const def = getZodDef(current);
5583
+ if (!def) return null;
5584
+ if (def.type === "optional" || def.type === "nullable") {
5585
+ required = false;
5586
+ current = getInnerSchema(def);
5587
+ continue;
5588
+ }
5589
+ if (def.type === "nullish") {
5590
+ required = false;
5591
+ current = getInnerSchema(def);
5592
+ continue;
5593
+ }
5594
+ if (def.type === "default" || def.type === "prefault") {
5595
+ const raw = def.defaultValue;
5596
+ if (typeof raw === "function") defaultValue = Reflect.apply(raw, void 0, []);
5597
+ else defaultValue = raw;
5598
+ current = getInnerSchema(def);
5599
+ continue;
5600
+ }
5601
+ if (def.type === "readonly" || def.type === "pipe") {
5602
+ current = getInnerSchema(def) ?? def.in;
5603
+ continue;
5604
+ }
5605
+ return {
5606
+ schema: current,
5607
+ def,
5608
+ required,
5609
+ defaultValue
5610
+ };
5611
+ }
5612
+ return null;
5613
+ }
5614
+ function humaniseKey(key) {
5615
+ const spaced = key.replace(/([a-z0-9])([A-Z])/g, "$1 $2").replace(/[_-]+/g, " ").trim();
5616
+ if (!spaced) return key;
5617
+ const lowered = spaced.toLowerCase();
5618
+ return lowered.charAt(0).toUpperCase() + lowered.slice(1);
5619
+ }
5620
+ function normaliseSelectOptions(raw) {
5621
+ if (!raw) return void 0;
5622
+ return raw.map((entry) => {
5623
+ if (typeof entry === "string") return {
5624
+ value: entry,
5625
+ label: entry
5626
+ };
5627
+ return {
5628
+ value: entry.value,
5629
+ label: entry.label ?? entry.value
5630
+ };
5631
+ });
5632
+ }
5633
+ function enumOptionsFromEntries(def) {
5634
+ const entries = def.entries;
5635
+ if (!isObject(entries)) return null;
5636
+ const out = [];
5637
+ for (const [label, value] of Object.entries(entries)) if (typeof value === "string") out.push({
5638
+ value,
5639
+ label
5640
+ });
5641
+ else if (typeof value === "number") out.push({
5642
+ value: String(value),
5643
+ label
5644
+ });
5645
+ else return null;
5646
+ return out;
5647
+ }
5648
+ function literalUnionOptions(def) {
5649
+ const options = def.options;
5650
+ if (!Array.isArray(options)) return null;
5651
+ const out = [];
5652
+ for (const option of options) {
5653
+ const optDef = getZodDef(option);
5654
+ if (optDef?.type !== "literal") return null;
5655
+ const values = optDef.values;
5656
+ if (!Array.isArray(values) || values.length !== 1) return null;
5657
+ const value = values[0];
5658
+ if (typeof value === "string") out.push({
5659
+ value,
5660
+ label: value
5661
+ });
5662
+ else if (typeof value === "number") {
5663
+ const stringValue = String(value);
5664
+ out.push({
5665
+ value: stringValue,
5666
+ label: stringValue
5667
+ });
5668
+ } else return null;
5669
+ }
5670
+ return out.length > 0 ? out : null;
5671
+ }
5672
+ function literalSelectOptions(def) {
5673
+ const values = def.values;
5674
+ if (!Array.isArray(values)) return null;
5675
+ const out = [];
5676
+ for (const value of values) if (typeof value === "string") out.push({
5677
+ value,
5678
+ label: value
5679
+ });
5680
+ else if (typeof value === "number") {
5681
+ const stringValue = String(value);
5682
+ out.push({
5683
+ value: stringValue,
5684
+ label: stringValue
5685
+ });
5686
+ } else return null;
5687
+ return out;
5688
+ }
5689
+ function readStringChecks(def) {
5690
+ const checks = getChecks(def);
5691
+ const out = {};
5692
+ const min = findCheck(checks, "min_length");
5693
+ if (min && typeof min.minimum === "number") out.minLength = min.minimum;
5694
+ const max = findCheck(checks, "max_length");
5695
+ if (max && typeof max.maximum === "number") out.maxLength = max.maximum;
5696
+ return out;
5697
+ }
5698
+ const integerNumberFormats = new Set([
5699
+ "int",
5700
+ "safeint",
5701
+ "int32",
5702
+ "uint32",
5703
+ "int64",
5704
+ "uint64"
5705
+ ]);
5706
+ function readNumberChecks(def) {
5707
+ const checks = getChecks(def);
5708
+ const out = {};
5709
+ const gt = findCheck(checks, "greater_than");
5710
+ if (gt && typeof gt.value === "number" && gt.inclusive === true) out.min = gt.value;
5711
+ const lt = findCheck(checks, "less_than");
5712
+ if (lt && typeof lt.value === "number" && lt.inclusive === true) out.max = lt.value;
5713
+ const format = findCheck(checks, "number_format");
5714
+ if (format && typeof format.format === "string" && integerNumberFormats.has(format.format)) out.integer = true;
5715
+ return out;
5716
+ }
5717
+ function buildField(key, fieldSchema, override) {
5718
+ const unwrapped = unwrap(fieldSchema);
5719
+ if (!unwrapped) return Result.err(/* @__PURE__ */ new Error(`manualInput: field "${key}" uses an unsupported Zod schema (could not introspect)`));
5720
+ const inner = unwrapped.def;
5721
+ const description = override?.description ?? getDescription(unwrapped.schema);
5722
+ const base = {
5723
+ key,
5724
+ label: override?.label ?? humaniseKey(key),
5725
+ description,
5726
+ placeholder: override?.placeholder,
5727
+ required: unwrapped.required,
5728
+ defaultValue: override?.defaultValue !== void 0 ? override.defaultValue : unwrapped.defaultValue
5729
+ };
5730
+ if (override?.asJson === true) {
5731
+ const rows = override.rows;
5732
+ return Result.ok({
5733
+ ...base,
5734
+ kind: "json",
5735
+ rows
5736
+ });
5737
+ }
5738
+ if (override?.asFile === true) return Result.ok({
5739
+ ...base,
5740
+ kind: "file",
5741
+ accept: override.accept,
5742
+ maxSizeBytes: override.maxSizeBytes
5743
+ });
5744
+ const overrideOptions = normaliseSelectOptions(override?.options);
5745
+ if (overrideOptions) return Result.ok({
5746
+ ...base,
5747
+ kind: "select",
5748
+ options: overrideOptions
5749
+ });
5750
+ switch (inner.type) {
5751
+ case "string": {
5752
+ const checks = readStringChecks(inner);
5753
+ if (override?.multiline === true) return Result.ok({
5754
+ ...base,
5755
+ kind: "multiline",
5756
+ rows: override.rows,
5757
+ minLength: checks.minLength,
5758
+ maxLength: checks.maxLength
5759
+ });
5760
+ return Result.ok({
5761
+ ...base,
5762
+ kind: "text",
5763
+ minLength: checks.minLength,
5764
+ maxLength: checks.maxLength
5765
+ });
5766
+ }
5767
+ case "number":
5768
+ case "int":
5769
+ case "bigint": {
5770
+ const checks = readNumberChecks(inner);
5771
+ return Result.ok({
5772
+ ...base,
5773
+ kind: "number",
5774
+ min: checks.min,
5775
+ max: checks.max,
5776
+ integer: checks.integer
5777
+ });
5778
+ }
5779
+ case "boolean": return Result.ok({
5780
+ ...base,
5781
+ kind: "boolean"
5782
+ });
5783
+ case "enum": {
5784
+ const options = enumOptionsFromEntries(inner);
5785
+ if (options) return Result.ok({
5786
+ ...base,
5787
+ kind: "select",
5788
+ options
5789
+ });
5790
+ return Result.ok({
5791
+ ...base,
5792
+ kind: "json",
5793
+ rows: override?.rows
5794
+ });
5795
+ }
5796
+ case "literal": {
5797
+ const options = literalSelectOptions(inner);
5798
+ if (options && options.length > 0) return Result.ok({
5799
+ ...base,
5800
+ kind: "select",
5801
+ options
5802
+ });
5803
+ return Result.ok({
5804
+ ...base,
5805
+ kind: "json",
5806
+ rows: override?.rows
5807
+ });
5808
+ }
5809
+ case "union": {
5810
+ const options = literalUnionOptions(inner);
5811
+ if (options) return Result.ok({
5812
+ ...base,
5813
+ kind: "select",
5814
+ options
5815
+ });
5816
+ return Result.ok({
5817
+ ...base,
5818
+ kind: "json",
5819
+ rows: override?.rows
5820
+ });
5821
+ }
5822
+ default: return Result.ok({
5823
+ ...base,
5824
+ kind: "json",
5825
+ rows: override?.rows
5826
+ });
5827
+ }
5828
+ }
5829
+ function getObjectShape(schema) {
5830
+ const def = getZodDef(schema);
5831
+ if (!def) return null;
5832
+ if (def.type !== "object") return null;
5833
+ const shape = def.shape;
5834
+ if (!isObject(shape)) return null;
5835
+ return shape;
5836
+ }
5837
+ /**
5838
+ * Walk an eval's `manualInput` configuration and produce the wire-format
5839
+ * descriptor consumed by the web UI. The schema must resolve to a top-level
5840
+ * `z.object(...)`; nested objects, arrays, unions, and other unsupported
5841
+ * shapes inside fields fall back to the JSON textarea widget.
5842
+ *
5843
+ * Returns a `Result` so the caller (eval discovery) can surface a discovery
5844
+ * issue without throwing when the schema is incompatible.
5845
+ */
5846
+ function buildManualInputDescriptor(config) {
5847
+ const shape = getObjectShape(config.schema);
5848
+ if (!shape) return Result.err(/* @__PURE__ */ new Error("manualInput.schema must be a top-level z.object(...). Wrap nested types in an object schema."));
5849
+ const overrides = {};
5850
+ const rawOverrides = config.fields;
5851
+ if (rawOverrides) {
5852
+ for (const [key, override] of Object.entries(rawOverrides)) if (override) overrides[key] = override;
5853
+ }
5854
+ const fields = [];
5855
+ for (const [key, fieldSchema] of Object.entries(shape)) {
5856
+ const fieldResult = buildField(key, fieldSchema, overrides[key]);
5857
+ if (fieldResult.error) return fieldResult.errorResult();
5858
+ fields.push(fieldResult.value);
5859
+ }
5860
+ return Result.ok({
5861
+ title: config.title,
5862
+ description: config.description,
5863
+ submitLabel: config.submitLabel,
5864
+ fields
5865
+ });
5866
+ }
5867
+ /**
5868
+ * Resolve an eval's `manualInput` Zod schema against a raw user submission.
5869
+ * Returns the parsed value typed against the eval's `TInput` generic, or a
5870
+ * structured `Error` carrying the Zod issues for the caller to surface.
5871
+ */
5872
+ function parseManualInputValues(config, raw) {
5873
+ const parsed = config.schema.safeParse(raw);
5874
+ if (!parsed.success) return Result.err(new ManualInputValidationError(parsed.error.issues.map(formatIssue)));
5875
+ return Result.ok(parsed.data);
5876
+ }
5877
+ /**
5878
+ * Error thrown / returned when manual-input values fail validation against
5879
+ * the eval's `manualInput.schema`. Carries the structured Zod issues so the
5880
+ * CLI and HTTP layers can surface them per-field.
5881
+ */
5882
+ var ManualInputValidationError = class extends Error {
5883
+ issues;
5884
+ constructor(issues) {
5885
+ super(issues.length === 0 ? "manualInput validation failed" : `manualInput validation failed: ${issues.map((issue) => issue.path ? `${issue.path}: ${issue.message}` : issue.message).join("; ")}`);
5886
+ this.name = "ManualInputValidationError";
5887
+ this.issues = issues;
5888
+ }
5889
+ };
5890
+ function formatIssue(issue) {
5891
+ return {
5892
+ path: issue.path.map((segment) => typeof segment === "string" || typeof segment === "number" ? String(segment) : "").filter((segment) => segment !== "").join("."),
5893
+ message: issue.message
5894
+ };
5895
+ }
5896
+ //#endregion
5147
5897
  //#region ../runner/src/outputArtifacts.ts
5148
5898
  const mimeTypeExtensionMap = {
5149
5899
  "application/json": ".json",
@@ -5227,6 +5977,65 @@ function isFile(value) {
5227
5977
  return value instanceof File;
5228
5978
  }
5229
5979
  //#endregion
5980
+ //#region ../runner/src/traceDisplay.ts
5981
+ function isRecord$1(value) {
5982
+ return typeof value === "object" && value !== null;
5983
+ }
5984
+ function mergeNestedAttribute(value, path, attributeValue) {
5985
+ const root = value === void 0 ? {} : { ...value };
5986
+ const parts = path.split(".");
5987
+ let current = root;
5988
+ for (const [index, part] of parts.entries()) {
5989
+ if (index === parts.length - 1) {
5990
+ current[part] = attributeValue;
5991
+ continue;
5992
+ }
5993
+ const nextValue = current[part];
5994
+ const nextRecord = isRecord$1(nextValue) ? { ...nextValue } : {};
5995
+ current[part] = nextRecord;
5996
+ current = nextRecord;
5997
+ }
5998
+ return root;
5999
+ }
6000
+ function resolveTracePresentation(spans, globalTraceDisplay, evalTraceDisplay) {
6001
+ const merged = /* @__PURE__ */ new Map();
6002
+ for (const attribute of globalTraceDisplay?.attributes ?? []) merged.set(attribute.key ?? attribute.path, attribute);
6003
+ for (const attribute of evalTraceDisplay?.attributes ?? []) merged.set(attribute.key ?? attribute.path, attribute);
6004
+ const resolvedAttributes = [];
6005
+ const transformedTrace = spans.map((span) => ({
6006
+ ...span,
6007
+ attributes: span.attributes === void 0 ? void 0 : { ...span.attributes }
6008
+ }));
6009
+ for (const attribute of merged.values()) {
6010
+ const resolvedPath = attribute.transform ? `__display.${attribute.key ?? attribute.path}` : attribute.path;
6011
+ resolvedAttributes.push({
6012
+ key: attribute.key,
6013
+ path: resolvedPath,
6014
+ label: attribute.label,
6015
+ format: attribute.format,
6016
+ numberFormat: attribute.numberFormat,
6017
+ placements: attribute.placements,
6018
+ scope: attribute.scope,
6019
+ mode: attribute.mode
6020
+ });
6021
+ if (!attribute.transform) continue;
6022
+ for (const span of transformedTrace) {
6023
+ const sourceValue = getNestedAttribute(span.attributes, attribute.path);
6024
+ if (sourceValue === void 0) continue;
6025
+ const transformedValue = attribute.transform({
6026
+ value: sourceValue,
6027
+ span
6028
+ });
6029
+ if (transformedValue === void 0) continue;
6030
+ span.attributes = mergeNestedAttribute(span.attributes, resolvedPath, transformedValue);
6031
+ }
6032
+ }
6033
+ return {
6034
+ trace: transformedTrace,
6035
+ traceDisplay: { attributes: resolvedAttributes }
6036
+ };
6037
+ }
6038
+ //#endregion
5230
6039
  //#region ../runner/src/runMaintenance.ts
5231
6040
  async function persistRunState(runState) {
5232
6041
  await writeFile(join(runState.runDir, "summary.json"), JSON.stringify(runState.summary, null, 2));
@@ -5551,65 +6360,6 @@ function stripTerminalControlCodes(value) {
5551
6360
  return stripVTControlCharacters(value).replaceAll(orphanedAnsiSgrPattern, "");
5552
6361
  }
5553
6362
  //#endregion
5554
- //#region ../runner/src/traceDisplay.ts
5555
- function isRecord$1(value) {
5556
- return typeof value === "object" && value !== null;
5557
- }
5558
- function mergeNestedAttribute(value, path, attributeValue) {
5559
- const root = value === void 0 ? {} : { ...value };
5560
- const parts = path.split(".");
5561
- let current = root;
5562
- for (const [index, part] of parts.entries()) {
5563
- if (index === parts.length - 1) {
5564
- current[part] = attributeValue;
5565
- continue;
5566
- }
5567
- const nextValue = current[part];
5568
- const nextRecord = isRecord$1(nextValue) ? { ...nextValue } : {};
5569
- current[part] = nextRecord;
5570
- current = nextRecord;
5571
- }
5572
- return root;
5573
- }
5574
- function resolveTracePresentation(spans, globalTraceDisplay, evalTraceDisplay) {
5575
- const merged = /* @__PURE__ */ new Map();
5576
- for (const attribute of globalTraceDisplay?.attributes ?? []) merged.set(attribute.key ?? attribute.path, attribute);
5577
- for (const attribute of evalTraceDisplay?.attributes ?? []) merged.set(attribute.key ?? attribute.path, attribute);
5578
- const resolvedAttributes = [];
5579
- const transformedTrace = spans.map((span) => ({
5580
- ...span,
5581
- attributes: span.attributes === void 0 ? void 0 : { ...span.attributes }
5582
- }));
5583
- for (const attribute of merged.values()) {
5584
- const resolvedPath = attribute.transform ? `__display.${attribute.key ?? attribute.path}` : attribute.path;
5585
- resolvedAttributes.push({
5586
- key: attribute.key,
5587
- path: resolvedPath,
5588
- label: attribute.label,
5589
- format: attribute.format,
5590
- numberFormat: attribute.numberFormat,
5591
- placements: attribute.placements,
5592
- scope: attribute.scope,
5593
- mode: attribute.mode
5594
- });
5595
- if (!attribute.transform) continue;
5596
- for (const span of transformedTrace) {
5597
- const sourceValue = getNestedAttribute(span.attributes, attribute.path);
5598
- if (sourceValue === void 0) continue;
5599
- const transformedValue = attribute.transform({
5600
- value: sourceValue,
5601
- span
5602
- });
5603
- if (transformedValue === void 0) continue;
5604
- span.attributes = mergeNestedAttribute(span.attributes, resolvedPath, transformedValue);
5605
- }
5606
- }
5607
- return {
5608
- trace: transformedTrace,
5609
- traceDisplay: { attributes: resolvedAttributes }
5610
- };
5611
- }
5612
- //#endregion
5613
6363
  //#region ../runner/src/runExecution.ts
5614
6364
  function filterEvalCases(cases, caseIds) {
5615
6365
  if (!caseIds || caseIds.length === 0) return cases;
@@ -5639,8 +6389,54 @@ function buildScopedEvalIdPrefix(params) {
5639
6389
  async function callWithUnknownResult(fn, args) {
5640
6390
  return await Reflect.apply(fn, void 0, args);
5641
6391
  }
6392
+ async function callUnknownFunction(fn, args) {
6393
+ if (typeof fn !== "function") throw new Error("Expected a function");
6394
+ return await Reflect.apply(fn, void 0, args);
6395
+ }
6396
+ function assignDerivedOutputs(params) {
6397
+ for (const [key, value] of Object.entries(params.derived)) {
6398
+ if (key in params.outputs) continue;
6399
+ params.outputs[key] = value;
6400
+ }
6401
+ }
6402
+ async function resolveDeriveFromTracingConfig(params) {
6403
+ const ctx = {
6404
+ trace: params.traceTree,
6405
+ input: params.evalCase.input,
6406
+ case: params.evalCase
6407
+ };
6408
+ if (typeof params.deriveFromTracing === "function") {
6409
+ const derived = await callUnknownFunction(params.deriveFromTracing, [ctx]);
6410
+ if (!isRecord(derived)) throw new Error("deriveFromTracing must return an object");
6411
+ return derived;
6412
+ }
6413
+ const derived = {};
6414
+ for (const [key, compute] of Object.entries(params.deriveFromTracing)) {
6415
+ const value = await callUnknownFunction(compute, [ctx]);
6416
+ if (value !== void 0) derived[key] = value;
6417
+ }
6418
+ return derived;
6419
+ }
6420
+ async function runDeriveFromTracingConfig(params) {
6421
+ if (params.deriveFromTracing === void 0) return;
6422
+ const { deriveFromTracing } = params;
6423
+ try {
6424
+ const derived = await runInExistingEvalScope(params.scope, "derive", async () => await resolveDeriveFromTracingConfig({
6425
+ deriveFromTracing,
6426
+ traceTree: params.traceTree,
6427
+ evalCase: params.evalCase
6428
+ }));
6429
+ assignDerivedOutputs({
6430
+ outputs: params.scope.outputs,
6431
+ derived
6432
+ });
6433
+ } catch (e) {
6434
+ const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
6435
+ params.scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
6436
+ }
6437
+ }
5642
6438
  async function runCase(params) {
5643
- const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, codeFingerprint, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
6439
+ const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, globalColumns, globalDeriveFromTracing, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
5644
6440
  const scopedIdPrefix = buildScopedEvalIdPrefix({
5645
6441
  evalId,
5646
6442
  evalFilePath,
@@ -5672,7 +6468,8 @@ async function runCase(params) {
5672
6468
  adapter: cacheAdapter,
5673
6469
  mode: cacheMode,
5674
6470
  evalId,
5675
- codeFingerprint
6471
+ read: evalDef.cache?.read,
6472
+ store: evalDef.cache?.store
5676
6473
  } : void 0,
5677
6474
  startTime: evalDef.startTime,
5678
6475
  freezeTime: evalDef.freezeTime
@@ -5685,22 +6482,19 @@ async function runCase(params) {
5685
6482
  const traceTree = buildTraceTree(spansWithDerivedAttributes, scope.checkpoints);
5686
6483
  const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
5687
6484
  if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) scope.assertionFailures.push(toAssertionFailure(executeError.message, executeError));
5688
- if (!nonAssertError && evalDef.deriveFromTracing) {
5689
- const { deriveFromTracing } = evalDef;
5690
- try {
5691
- const derived = await runInExistingEvalScope(scope, "derive", async () => {
5692
- return await callWithUnknownResult(deriveFromTracing, [{
5693
- trace: traceTree,
5694
- input: evalCase.input,
5695
- case: evalCase
5696
- }]);
5697
- });
5698
- if (!isRecord(derived)) throw new Error("deriveFromTracing must return an object");
5699
- for (const [key, value] of Object.entries(derived)) if (!(key in scope.outputs)) scope.outputs[key] = value;
5700
- } catch (e) {
5701
- const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
5702
- scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
5703
- }
6485
+ if (!nonAssertError) {
6486
+ await runDeriveFromTracingConfig({
6487
+ deriveFromTracing: globalDeriveFromTracing,
6488
+ scope,
6489
+ traceTree,
6490
+ evalCase
6491
+ });
6492
+ await runDeriveFromTracingConfig({
6493
+ deriveFromTracing: evalDef.deriveFromTracing,
6494
+ scope,
6495
+ traceTree,
6496
+ evalCase
6497
+ });
5704
6498
  }
5705
6499
  if (!nonAssertError) addDefaultOutputs({
5706
6500
  outputs: scope.outputs,
@@ -5717,7 +6511,7 @@ async function runCase(params) {
5717
6511
  ...scope.outputs,
5718
6512
  ...parsedOutputs.data
5719
6513
  };
5720
- else scope.assertionFailures.push(toAssertionFailure(formatOutputsSchemaError(parsedOutputs.error)));
6514
+ else scope.assertionFailures.push(toAssertionFailure(formatOutputsSchemaError(parsedOutputs.error), void 0, "OutputsSchemaError"));
5721
6515
  }
5722
6516
  const scoreResults = /* @__PURE__ */ new Map();
5723
6517
  const scoringTraces = {};
@@ -5740,7 +6534,8 @@ async function runCase(params) {
5740
6534
  adapter: cacheAdapter,
5741
6535
  mode: cacheMode,
5742
6536
  evalId: `${evalId}__score__${key}`,
5743
- codeFingerprint
6537
+ read: evalDef.cache?.read,
6538
+ store: evalDef.cache?.store
5744
6539
  } : void 0,
5745
6540
  startTime: scoreStartTime,
5746
6541
  freezeTime: evalDef.freezeTime
@@ -5795,6 +6590,7 @@ async function runCase(params) {
5795
6590
  const { trace: displayTrace, traceDisplay } = resolveTracePresentation(spansWithDerivedAttributes, globalTraceDisplay, evalDef.traceDisplay);
5796
6591
  const columns = {};
5797
6592
  const columnOverrides = mergeDefaultColumns({
6593
+ globalColumns,
5798
6594
  columns: evalDef.columns,
5799
6595
  globalRemove: globalRemoveDefaultConfig,
5800
6596
  evalRemove: evalDef.removeDefaultConfig
@@ -5858,14 +6654,17 @@ function formatOutputsSchemaError(error) {
5858
6654
  const issueLines = error.issues.map((issue) => {
5859
6655
  return `${issue.path.length > 0 ? issue.path.join(".") : "<root>"}: ${issue.message}`;
5860
6656
  });
5861
- if (issueLines.length === 0) return "outputsSchema validation failed";
5862
- return `outputsSchema validation failed:\n${issueLines.join("\n")}`;
6657
+ if (issueLines.length === 0) return "outputs did not match the configured schema";
6658
+ return issueLines.join("\n");
5863
6659
  }
5864
- function toAssertionFailure(message, error = void 0) {
5865
- return error?.stack ? {
6660
+ function toAssertionFailure(message, error = void 0, nameOverride = void 0) {
6661
+ const name = nameOverride ?? error?.name;
6662
+ const stack = error?.stack ? stripTerminalControlCodes(error.stack) : void 0;
6663
+ return {
6664
+ ...name !== void 0 ? { name } : {},
5866
6665
  message,
5867
- stack: stripTerminalControlCodes(error.stack)
5868
- } : { message };
6666
+ ...stack !== void 0 ? { stack } : {}
6667
+ };
5869
6668
  }
5870
6669
  //#endregion
5871
6670
  //#region ../runner/src/runQueue.ts
@@ -6095,15 +6894,15 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6095
6894
  const apiCallsConfig = resolveApiCallsConfig(config.apiCalls);
6096
6895
  for (const evalMeta of targetEvals) {
6097
6896
  const evalFilePath = evalMeta.sourceFilePath;
6098
- let codeFingerprint = "";
6897
+ let sourceFingerprint = "";
6099
6898
  try {
6100
- codeFingerprint = getSourceFingerprint(await readFile(evalFilePath, "utf-8"));
6899
+ sourceFingerprint = getSourceFingerprint(await readFile(evalFilePath, "utf-8"));
6101
6900
  } catch {
6102
- codeFingerprint = "";
6901
+ sourceFingerprint = "";
6103
6902
  }
6104
- if (codeFingerprint.length > 0) {
6105
- runState.manifest.evalSourceFingerprints[evalMeta.key] = codeFingerprint;
6106
- evalMeta.sourceFingerprint = codeFingerprint;
6903
+ if (sourceFingerprint.length > 0) {
6904
+ runState.manifest.evalSourceFingerprints[evalMeta.key] = sourceFingerprint;
6905
+ evalMeta.sourceFingerprint = sourceFingerprint;
6107
6906
  } else {
6108
6907
  delete runState.manifest.evalSourceFingerprints[evalMeta.key];
6109
6908
  evalMeta.sourceFingerprint = null;
@@ -6112,7 +6911,7 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6112
6911
  const registry = getEvalRegistry();
6113
6912
  await runWithModuleIsolation(moduleIsolation, async () => {
6114
6913
  await runInEvalRuntimeScope("env", async () => {
6115
- await loadEvalModule(evalFilePath, codeFingerprint);
6914
+ await loadEvalModule(evalFilePath, sourceFingerprint);
6116
6915
  });
6117
6916
  });
6118
6917
  const entry = registry.get(evalMeta.id);
@@ -6126,8 +6925,24 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6126
6925
  await runWithModuleIsolation(moduleIsolation, async () => {
6127
6926
  await runInEvalRuntimeScope("cases", async () => {
6128
6927
  await entry.use(async (evalDef) => {
6129
- const runnableCases = resolveRunnableEvalCases({
6130
- cases: await runWithEvalClock(evalDef.startTime, async () => typeof evalDef.cases === "function" ? await evalDef.cases() : evalDef.cases ?? [], { freezeTime: evalDef.freezeTime }),
6928
+ if (evalDef.manualInput && evalDef.cases !== void 0) throw new Error(`Eval "${evalMeta.id}" cannot declare both "cases" and "manualInput". Remove one of them.`);
6929
+ let manualInputCase = null;
6930
+ if (evalDef.manualInput) {
6931
+ const rawValue = request.manualInputs?.[evalMeta.key];
6932
+ if (rawValue === void 0) throw new Error(`Eval "${evalMeta.id}" requires manual input. Provide it via the run modal in the web UI or "--input" / "--input-file" on the CLI.`);
6933
+ const parsed = parseManualInputValues(evalDef.manualInput, rawValue);
6934
+ if (parsed.error) {
6935
+ const formatted = parsed.error.issues.map((issue) => issue.path ? `${issue.path}: ${issue.message}` : issue.message).join("; ");
6936
+ throw new Error(`Invalid manual input for eval "${evalMeta.id}": ${formatted}`);
6937
+ }
6938
+ manualInputCase = {
6939
+ id: `${evalMeta.id}-manual`,
6940
+ input: parsed.value
6941
+ };
6942
+ }
6943
+ const evalCases = manualInputCase ? [manualInputCase] : await runWithEvalClock(evalDef.startTime, async () => typeof evalDef.cases === "function" ? await evalDef.cases() : evalDef.cases ?? [], { freezeTime: evalDef.freezeTime });
6944
+ const runnableCases = manualInputCase ? evalCases : resolveRunnableEvalCases({
6945
+ cases: evalCases,
6131
6946
  evalId: evalMeta.id
6132
6947
  });
6133
6948
  const duplicateCaseIds = findDuplicateCaseIds(runnableCases);
@@ -6136,6 +6951,8 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6136
6951
  runState.summary.totalCases += cases.length;
6137
6952
  const defaultConfig = resolveEvalDefaultConfig({
6138
6953
  evalDef,
6954
+ globalColumns: config.columns,
6955
+ globalStats: config.stats,
6139
6956
  globalRemove: config.removeDefaultConfig
6140
6957
  });
6141
6958
  const declaredColumnDefs = buildDeclaredColumnDefs(defaultConfig.columns, evalDef.scores, evalDef.manualScores);
@@ -6181,6 +6998,8 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6181
6998
  evalKey: evalMeta.key,
6182
6999
  evalCase,
6183
7000
  globalTraceDisplay,
7001
+ globalColumns: config.columns,
7002
+ globalDeriveFromTracing: config.deriveFromTracing,
6184
7003
  llmCallsConfig,
6185
7004
  apiCallsConfig,
6186
7005
  globalRemoveDefaultConfig: config.removeDefaultConfig,
@@ -6188,7 +7007,6 @@ async function executeRun({ runState, request, runDir, config, cacheStore, lastR
6188
7007
  startTime,
6189
7008
  cacheAdapter: bufferedCacheStore ?? (cacheEnabled ? cacheStore : null),
6190
7009
  cacheMode,
6191
- codeFingerprint,
6192
7010
  moduleIsolation,
6193
7011
  evalFilePath,
6194
7012
  evalFileRelativePath: evalMeta.filePath,
@@ -6343,4 +7161,4 @@ function toLastRunStatus(status) {
6343
7161
  return status === "pending" ? null : status;
6344
7162
  }
6345
7163
  //#endregion
6346
- export { llmCallsConfigSchema as $, traceSpanKindSchema as $t, extractApiCalls as A, getCurrentScope as An, evalChartTypeSchema as At, runSummarySchema as B, setEvalOutput as Bn, cacheRecordingOpSchema as Bt, validateCharts as C, evalExpect as Cn, evalChartAggregateSchema as Ct, sseEnvelopeSchema as D, configureEvalRunLogs as Dn, evalChartConfigSchema as Dt, updateManualScoreRequestSchema as E, appendToEvalOutput as En, evalChartColorSchema as Et, getEvalDisplayStatus as F, mergeEvalOutput as Fn, cacheEntryWithDebugKeySchema as Ft, apiCallMetricPlacementSchema as G, traceCacheRefSchema as Gt, DEFAULT_LLM_CALLS_CONFIG as H, startEvalBackgroundJob as Hn, cacheStatusSchema as Ht, deriveScopedSummaryFromCases as I, nextEvalId as In, cacheFileSchema as It, defaultConfigKeySchema as J, traceAttributeDisplayPlacementSchema as Jt, apiCallMetricSchema as K, traceAttributeDisplayFormatSchema as Kt, deriveStatusFromCaseRows as L, runInEvalRuntimeScope as Ln, cacheListItemSchema as Lt, applyDerivedCallAttributes as M, getEvalStartTime as Mn, cacheDebugKeyEntrySchema as Mt, getNestedAttribute as N, incrementEvalOutput as Nn, cacheDebugKeyFileSchema as Nt, extractCacheEntries as O, evalAssert as On, evalChartMetricSchema as Ot, getEvalTitle as P, isInEvalScope as Pn, cacheEntrySchema as Pt, llmCallPricingSchema as Q, traceSpanErrorSchema as Qt, deriveStatusFromChildStatuses as R, runInEvalScope as Rn, cacheModeSchema as Rt, normalizeScoreDef as S, repoFile as Sn, scoreTraceSchema as St, createRunRequestSchema as T, advanceEvalTime as Tn, evalChartBuiltinMetricSchema as Tt, agentEvalsConfigSchema as U, defineEval as Un, serializedCacheSpanSchema as Ut, DEFAULT_API_CALLS_CONFIG as V, setScopeCacheContext as Vn, cacheRecordingSchema as Vt, apiCallMetricFormatSchema as W, getEvalRegistry as Wn, spanCacheOptionsSchema as Wt, llmCallMetricPlacementSchema as X, traceDisplayConfigSchema as Xt, llmCallMetricFormatSchema as Y, traceAttributeDisplaySchema as Yt, llmCallMetricSchema as Z, traceDisplayInputConfigSchema as Zt, loadEvalModule as _, hashCacheKeySync as _n, evalSummarySchema as _t, getLastRunStatuses as a, columnKindSchema as an, buildCaseKey as at, loadConfig as b, serializeCacheRecording as bn, runLogLocationSchema as bt, loadPersistedRunSnapshots as c, numberDisplayOptionsSchema as cn, getCaseRowEvalKey as ct, persistRunState as d, z$1 as dn, caseRowSchema as dt, traceSpanSchema as en, removeDefaultConfigSchema as et, recomputeEvalStatusesInRuns as f, buildTraceTree as fn, discoveryIssueSchema as ft, deriveEvalFreshness as g, hashCacheKey as gn, evalStatsConfigSchema as gt, resolveArtifactPath as h, evalTracer as hn, evalStatItemSchema as ht, generateRunId as i, columnFormatSchema as in, trialSelectionModeSchema as it, extractLlmCalls as j, getEvalCaseInput as jn, evalChartsConfigSchema as jt, extractCacheHits as k, evalLog as kn, evalChartTooltipExtraSchema as kt, nextShortIdFromSnapshots as l, repoFileRefSchema as ln, assertionFailureSchema as lt, runTouchesEval as m, evalSpan as mn, evalStatAggregateSchema as mt, getTargetEvalKeys as n, cellValueSchema as nn, resolveLlmCallsConfig as nt, getLatestRunInfos as o, fileRefSchema as on, buildEvalKey as ot, recomputePersistedCaseStatus as p, captureEvalSpanError as pn, evalFreshnessStatusSchema as pt, apiCallsConfigSchema as q, traceAttributeDisplayInputSchema as qt, getTargetEvals as r, columnDefSchema as rn, runLogsConfigSchema as rt, loadPersistedRunSnapshot as s, jsonCellSchema as sn, getCaseRowCaseKey as st, executeRun as t, traceSpanWarningSchema as tn, resolveApiCallsConfig as tt, persistCaseDetail as u, runArtifactRefSchema as un, caseDetailSchema as ut, parseEvalDiscovery as v, deserializeCacheRecording as vn, runLogEntrySchema as vt, createFsCacheStore as w, EvalAssertionError as wn, evalChartAxisSchema as wt, buildDeclaredColumnDefs as x, serializeCacheValue as xn, runLogPhaseSchema as xt, resolveEvalDefaultConfig as y, deserializeCacheValue as yn, runLogLevelSchema as yt, runManifestSchema as z, runInExistingEvalScope as zn, cacheOperationTypeSchema as zt };
7164
+ export { defaultConfigKeySchema as $, incrementEvalOutput as $n, cacheEntryWithDebugKeySchema as $t, createRunRequestSchema as A, buildTraceTree as An, runLogPhaseSchema as At, getEvalDisplayStatus as B, repoFile as Bn, manualInputTextFieldSchema as Bt, loadConfig as C, columnKindSchema as Cn, evalStatAggregateSchema as Ct, createFsCacheStore as D, repoFileRefSchema as Dn, runLogEntrySchema as Dt, validateCharts as E, numberDisplayOptionsSchema as En, evalSummarySchema as Et, extractApiCalls as F, hashCacheKeySync as Fn, manualInputJsonFieldSchema as Ft, runSummarySchema as G, advanceEvalTime as Gn, evalChartConfigSchema as Gt, deriveStatusFromCaseRows as H, readManualInputFile as Hn, evalChartAxisSchema as Ht, extractLlmCalls as I, deserializeCacheRecording as In, manualInputMultilineFieldSchema as It, agentEvalsConfigSchema as J, evalAssert as Jn, evalChartTypeSchema as Jt, DEFAULT_API_CALLS_CONFIG as K, appendToEvalOutput as Kn, evalChartMetricSchema as Kt, applyDerivedCallAttributes as L, deserializeCacheValue as Ln, manualInputNumberFieldSchema as Lt, sseEnvelopeSchema as M, evalSpan as Mn, manualInputBooleanFieldSchema as Mt, extractCacheEntries as N, evalTracer as Nn, manualInputDescriptorSchema as Nt, configReloadStateSchema as O, runArtifactRefSchema as On, runLogLevelSchema as Ot, extractCacheHits as P, hashCacheKey as Pn, manualInputFieldDescriptorSchema as Pt, apiCallsConfigSchema as Q, getEvalStartTime as Qn, cacheEntrySchema as Qt, getNestedAttribute as R, serializeCacheRecording as Rn, manualInputSelectFieldSchema as Rt, resolveEvalDefaultConfig as S, columnFormatSchema as Sn, evalFreshnessStatusSchema as St, normalizeScoreDef as T, jsonCellSchema as Tn, evalStatsConfigSchema as Tt, deriveStatusFromChildStatuses as U, evalExpect as Un, evalChartBuiltinMetricSchema as Ut, deriveScopedSummaryFromCases as V, manualInputFileValueSchema as Vn, evalChartAggregateSchema as Vt, runManifestSchema as W, EvalAssertionError as Wn, evalChartColorSchema as Wt, apiCallMetricPlacementSchema as X, getCurrentScope as Xn, cacheDebugKeyEntrySchema as Xt, apiCallMetricFormatSchema as Y, evalLog as Yn, evalChartsConfigSchema as Yt, apiCallMetricSchema as Z, getEvalCaseInput as Zn, cacheDebugKeyFileSchema as Zt, buildManualInputDescriptor as _, traceSpanKindSchema as _n, getCaseRowEvalKey as _t, getLastRunStatuses as a, cacheRecordingSchema as an, runInExistingEvalScope as ar, llmCallMetricSchema as at, loadEvalModule as b, cellValueSchema as bn, caseRowSchema as bt, loadPersistedRunSnapshots as c, spanCacheOptionsSchema as cn, startEvalBackgroundJob as cr, llmCallsConfigSchema as ct, persistRunState as d, traceAttributeDisplayInputSchema as dn, resolveLlmCallsConfig as dt, cacheFileSchema as en, isInEvalScope as er, evalColumnOverrideSchema as et, recomputeEvalStatusesInRuns as f, traceAttributeDisplayPlacementSchema as fn, runLogsConfigSchema as ft, resolveArtifactPath as g, traceSpanErrorSchema as gn, getCaseRowCaseKey as gt, resolveTracePresentation as h, traceDisplayInputConfigSchema as hn, buildEvalKey as ht, generateRunId as i, cacheRecordingOpSchema as in, runInEvalScope as ir, llmCallMetricPlacementSchema as it, updateManualScoreRequestSchema as j, captureEvalSpanError as jn, scoreTraceSchema as jt, configReloadStatusSchema as k, z$1 as kn, runLogLocationSchema as kt, nextShortIdFromSnapshots as l, traceCacheRefSchema as ln, defineEval as lr, removeDefaultConfigSchema as lt, runTouchesEval as m, traceDisplayConfigSchema as mn, buildCaseKey as mt, getTargetEvalKeys as n, cacheModeSchema as nn, nextEvalId as nr, evalDeriveConfigSchema as nt, getLatestRunInfos as o, cacheStatusSchema as on, setEvalOutput as or, llmCallPricingRateSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplaySchema as pn, trialSelectionModeSchema as pt, DEFAULT_LLM_CALLS_CONFIG as q, configureEvalRunLogs as qn, evalChartTooltipExtraSchema as qt, getTargetEvals as r, cacheOperationTypeSchema as rn, runInEvalRuntimeScope as rr, llmCallMetricFormatSchema as rt, loadPersistedRunSnapshot as s, serializedCacheSpanSchema as sn, setScopeCacheContext as sr, llmCallPricingSchema as st, executeRun as t, cacheListItemSchema as tn, mergeEvalOutput as tr, evalColumnsSchema as tt, persistCaseDetail as u, traceAttributeDisplayFormatSchema as un, getEvalRegistry as ur, resolveApiCallsConfig as ut, parseManualInputValues as v, traceSpanSchema as vn, assertionFailureSchema as vt, buildDeclaredColumnDefs as w, fileRefSchema as wn, evalStatItemSchema as wt, parseEvalDiscovery as x, columnDefSchema as xn, discoveryIssueSchema as xt, deriveEvalFreshness as y, traceSpanWarningSchema as yn, caseDetailSchema as yt, getEvalTitle as z, serializeCacheValue as zn, manualInputSelectOptionSchema as zt };