@ls-stack/agent-eval 0.52.3 → 0.54.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -7,6 +7,7 @@ declare const repoFileRefSchema: z$1.ZodObject<{
7
7
  source: z$1.ZodLiteral<"repo">;
8
8
  path: z$1.ZodString;
9
9
  mimeType: z$1.ZodOptional<z$1.ZodString>;
10
+ sizeBytes: z$1.ZodOptional<z$1.ZodNumber>;
10
11
  }, z$1.core.$strip>;
11
12
  /** Reference to a file that lives in the authored workspace. */
12
13
  type RepoFileRef = z$1.infer<typeof repoFileRefSchema>;
@@ -27,6 +28,8 @@ declare const columnFormatSchema$1: z$1.ZodEnum<{
27
28
  markdown: "markdown";
28
29
  json: "json";
29
30
  image: "image";
31
+ html: "html";
32
+ pdf: "pdf";
30
33
  audio: "audio";
31
34
  video: "video";
32
35
  duration: "duration";
@@ -141,6 +144,25 @@ declare const traceSpanSchema$2: z$1.ZodObject<{
141
144
  /** Persisted trace span shape stored for each eval case run. */
142
145
  type EvalTraceSpan$2 = z$1.infer<typeof traceSpanSchema$2>; //#endregion
143
146
  //#region ../shared/src/schemas/eval.d.ts
147
+ /**
148
+ * Reducer used to collapse per-case values into a single duration or column
149
+ * stat.
150
+ * `best` selects the highest finite value and `worst` selects the lowest.
151
+ */
152
+ declare const evalStatAggregateSchema$1: z$1.ZodEnum<{
153
+ avg: "avg";
154
+ min: "min";
155
+ max: "max";
156
+ sum: "sum";
157
+ best: "best";
158
+ worst: "worst";
159
+ }>;
160
+ /**
161
+ * Reducer used to collapse per-case values into a single duration or column
162
+ * stat.
163
+ * `best` selects the highest finite value and `worst` selects the lowest.
164
+ */
165
+ type EvalStatAggregate$1 = z$1.infer<typeof evalStatAggregateSchema$1>;
144
166
  /** Ordered list of stats rendered in the EvalCard stats row. */
145
167
  declare const evalStatsConfigSchema$1: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
146
168
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -152,9 +174,25 @@ declare const evalStatsConfigSchema$1: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z
152
174
  }, z$1.core.$strip>, z$1.ZodObject<{
153
175
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
154
176
  kind: z$1.ZodLiteral<"duration">;
177
+ aggregate: z$1.ZodOptional<z$1.ZodEnum<{
178
+ avg: "avg";
179
+ min: "min";
180
+ max: "max";
181
+ sum: "sum";
182
+ best: "best";
183
+ worst: "worst";
184
+ }>>;
155
185
  }, z$1.core.$strip>, z$1.ZodObject<{
156
186
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
157
187
  kind: z$1.ZodLiteral<"cacheHits">;
188
+ aggregate: z$1.ZodOptional<z$1.ZodEnum<{
189
+ avg: "avg";
190
+ min: "min";
191
+ max: "max";
192
+ sum: "sum";
193
+ best: "best";
194
+ worst: "worst";
195
+ }>>;
158
196
  }, z$1.core.$strip>, z$1.ZodObject<{
159
197
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
160
198
  kind: z$1.ZodLiteral<"column">;
@@ -165,7 +203,8 @@ declare const evalStatsConfigSchema$1: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z
165
203
  min: "min";
166
204
  max: "max";
167
205
  sum: "sum";
168
- last: "last";
206
+ best: "best";
207
+ worst: "worst";
169
208
  }>;
170
209
  format: z$1.ZodOptional<z$1.ZodEnum<{
171
210
  number: "number";
@@ -175,6 +214,8 @@ declare const evalStatsConfigSchema$1: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z
175
214
  markdown: "markdown";
176
215
  json: "json";
177
216
  image: "image";
217
+ html: "html";
218
+ pdf: "pdf";
178
219
  audio: "audio";
179
220
  video: "video";
180
221
  percent: "percent";
@@ -375,8 +416,8 @@ type EvalColumnOverride = {
375
416
  * Presentation preset for the value.
376
417
  *
377
418
  * Use this to control how the UI renders the cell and infer table behavior,
378
- * for example `number`, `boolean`, `duration`, `markdown`, `json`, or
379
- * file/media previews.
419
+ * for example `number`, `boolean`, `duration`, `markdown`, `json`,
420
+ * `image`, `html`, `pdf`, or file/media previews.
380
421
  */
381
422
  format?: ColumnFormat$1;
382
423
  /**
@@ -1017,16 +1058,28 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
1017
1058
  * When provided, the stats render in order, left to right.
1018
1059
  *
1019
1060
  * Built-in kinds (`cases`, `passRate`, `duration`, `cacheHits`) read from
1020
- * the latest run summary. `cacheHits` counts Agent Eval operation-level cache
1021
- * hits over total cache operations, not LLM provider prompt-cache read
1022
- * tokens. `kind: 'column'` aggregates a score or numeric output column across
1023
- * the latest run's cases `key` must match one of the eval's score or column
1024
- * keys, and only finite numeric values participate in the reduction. When no
1025
- * case has a numeric value for the key the stat renders an em dash, or hides
1026
- * when `hideIfNoValue` is true. `label`, `format`, and `numberFormat` default
1027
- * to the matching `ColumnDef`.
1061
+ * the latest run. `duration` aggregates finite per-case durations using the
1062
+ * same modes as column stats. `cacheHits` counts Agent Eval operation-level
1063
+ * cache hits over total cache operations, not LLM provider prompt-cache read
1064
+ * tokens. Cache-hit stats have their own aggregate mode and default to `sum`;
1065
+ * `avg` is average per-case hit rate, and min/max/best/worst select cases by
1066
+ * hit rate. `kind: 'column'` aggregates a score or numeric output column
1067
+ * across the latest run's cases — `key` must match one of the eval's score or
1068
+ * column keys, and only finite numeric values participate in the reduction.
1069
+ * When no case has a numeric value for the key the stat renders an em dash, or
1070
+ * hides when `hideIfNoValue` is true. `label`, `format`, and `numberFormat`
1071
+ * default to the matching `ColumnDef`.
1028
1072
  */
1029
1073
  stats?: EvalStatsConfig$1;
1074
+ /**
1075
+ * Initial aggregate mode used for this eval's duration and column stats in
1076
+ * the web UI.
1077
+ *
1078
+ * Overrides `AgentEvalsConfig.defaultStatAggregate`. Individual stat
1079
+ * `aggregate` values still define their authored reducer and remain the
1080
+ * fallback when neither default is configured.
1081
+ */
1082
+ defaultStatAggregate?: EvalStatAggregate$1;
1030
1083
  /**
1031
1084
  * Optional history chart configuration for the EvalCard in the web UI.
1032
1085
  *
@@ -1078,8 +1131,12 @@ type EvalRegistryEntry = {
1078
1131
  /** Return the in-memory registry of evals defined in the current process. */
1079
1132
  declare function getEvalRegistry(): Map<string, EvalRegistryEntry>;
1080
1133
  /**
1081
- * Register an eval definition with the SDK so the runner can discover it
1082
- * after importing the eval module.
1134
+ * Execute a callback with an empty async-local eval registry.
1135
+ *
1136
+ * Runner internals use this when importing eval modules concurrently so
1137
+ * `defineEval(...)` calls from one import cannot overwrite another import's
1138
+ * registered definitions. The callback receives the scoped registry populated
1139
+ * during its async execution.
1083
1140
  */
1084
1141
  //#endregion
1085
1142
  //#region src/evalExpect.d.ts
@@ -1171,13 +1228,15 @@ declare function readManualInputFile(value: ManualInputFileValue, options?: {
1171
1228
  //#region src/repoFile.d.ts
1172
1229
  /**
1173
1230
  * Create a file reference that can be emitted via `setEvalOutput(...)` and rendered
1174
- * by a column configured with `format: 'image' | 'audio' | 'video' | 'file'`.
1231
+ * by a column configured with `format: 'image' | 'html' | 'pdf' | 'audio' |
1232
+ * 'video' | 'file'`.
1175
1233
  *
1176
1234
  * @param path Relative or absolute path to the repository file.
1177
1235
  * @param mimeType Optional MIME type hint for UI rendering.
1236
+ * @param sizeBytes Optional file size hint shown by artifact cards in the UI.
1178
1237
  * @returns A repo-backed file reference suitable for file/media columns.
1179
1238
  */
1180
- declare function repoFile(path: string, mimeType?: string): RepoFileRef; //#endregion
1239
+ declare function repoFile(path: string, mimeType?: string, sizeBytes?: number): RepoFileRef; //#endregion
1181
1240
  //#region src/cacheSerialization.d.ts
1182
1241
  declare const serializedCacheValueMarker = "__aecs";
1183
1242
  type JsonSafeCacheValueType = 'ArrayBuffer' | 'BigInt' | 'Blob' | 'Date' | 'Error' | 'ExternalJson' | 'File' | 'Float64Array' | 'Headers' | 'Map' | 'Number' | 'Object' | 'RegExp' | 'Set' | 'URL' | 'URLSearchParams' | 'Undefined';
@@ -1722,12 +1781,14 @@ declare const columnFormatSchema: z$1.ZodEnum<{
1722
1781
  number: "number";
1723
1782
  boolean: "boolean";
1724
1783
  file: "file";
1725
- markdown: "markdown";
1726
1784
  json: "json";
1785
+ duration: "duration";
1786
+ markdown: "markdown";
1727
1787
  image: "image";
1788
+ html: "html";
1789
+ pdf: "pdf";
1728
1790
  audio: "audio";
1729
1791
  video: "video";
1730
- duration: "duration";
1731
1792
  percent: "percent";
1732
1793
  passFail: "passFail";
1733
1794
  stars: "stars";
@@ -1747,12 +1808,14 @@ declare const columnDefSchema: z$1.ZodObject<{
1747
1808
  number: "number";
1748
1809
  boolean: "boolean";
1749
1810
  file: "file";
1750
- markdown: "markdown";
1751
1811
  json: "json";
1812
+ duration: "duration";
1813
+ markdown: "markdown";
1752
1814
  image: "image";
1815
+ html: "html";
1816
+ pdf: "pdf";
1753
1817
  audio: "audio";
1754
1818
  video: "video";
1755
- duration: "duration";
1756
1819
  percent: "percent";
1757
1820
  passFail: "passFail";
1758
1821
  stars: "stars";
@@ -1766,8 +1829,8 @@ declare const columnDefSchema: z$1.ZodObject<{
1766
1829
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
1767
1830
  align: z$1.ZodOptional<z$1.ZodEnum<{
1768
1831
  left: "left";
1769
- center: "center";
1770
1832
  right: "right";
1833
+ center: "center";
1771
1834
  }>>;
1772
1835
  }, z$1.core.$strip>;
1773
1836
  /** Column definition exposed to the UI for eval and case tables. */
@@ -1777,11 +1840,13 @@ declare const cellValueSchema: z$1.ZodUnion<readonly [z$1.ZodType<string | numbe
1777
1840
  source: z$1.ZodLiteral<"repo">;
1778
1841
  path: z$1.ZodString;
1779
1842
  mimeType: z$1.ZodOptional<z$1.ZodString>;
1843
+ sizeBytes: z$1.ZodOptional<z$1.ZodNumber>;
1780
1844
  }, z$1.core.$strip>, z$1.ZodObject<{
1781
1845
  source: z$1.ZodLiteral<"run">;
1782
1846
  artifactId: z$1.ZodString;
1783
1847
  mimeType: z$1.ZodString;
1784
1848
  fileName: z$1.ZodOptional<z$1.ZodString>;
1849
+ sizeBytes: z$1.ZodOptional<z$1.ZodNumber>;
1785
1850
  }, z$1.core.$strip>]>]>;
1786
1851
  /** Value stored in a rendered eval result table cell. */
1787
1852
  type CellValue = z$1.infer<typeof cellValueSchema>; //#endregion
@@ -1836,8 +1901,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
1836
1901
  }>>;
1837
1902
  mode: z$1.ZodOptional<z$1.ZodEnum<{
1838
1903
  all: "all";
1839
- last: "last";
1840
1904
  sum: "sum";
1905
+ last: "last";
1841
1906
  }>>;
1842
1907
  }, z$1.core.$strip>;
1843
1908
  /**
@@ -1872,8 +1937,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
1872
1937
  }>>;
1873
1938
  mode: z$1.ZodOptional<z$1.ZodEnum<{
1874
1939
  all: "all";
1875
- last: "last";
1876
1940
  sum: "sum";
1941
+ last: "last";
1877
1942
  }>>;
1878
1943
  }, z$1.core.$strip>>>;
1879
1944
  }, z$1.core.$strip>;
@@ -1912,8 +1977,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
1912
1977
  }>>;
1913
1978
  mode: z$1.ZodOptional<z$1.ZodEnum<{
1914
1979
  all: "all";
1915
- last: "last";
1916
1980
  sum: "sum";
1981
+ last: "last";
1917
1982
  }>>;
1918
1983
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
1919
1984
  }, z$1.core.$strip>;
@@ -1950,8 +2015,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
1950
2015
  }>>;
1951
2016
  mode: z$1.ZodOptional<z$1.ZodEnum<{
1952
2017
  all: "all";
1953
- last: "last";
1954
2018
  sum: "sum";
2019
+ last: "last";
1955
2020
  }>>;
1956
2021
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
1957
2022
  }, z$1.core.$strip>>>;
@@ -1986,10 +2051,10 @@ declare const traceSpanSchema$1: z$1.ZodObject<{
1986
2051
  startedAt: z$1.ZodString;
1987
2052
  endedAt: z$1.ZodNullable<z$1.ZodString>;
1988
2053
  status: z$1.ZodEnum<{
1989
- error: "error";
1990
2054
  running: "running";
1991
- ok: "ok";
1992
2055
  cancelled: "cancelled";
2056
+ error: "error";
2057
+ ok: "ok";
1993
2058
  }>;
1994
2059
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
1995
2060
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2028,21 +2093,32 @@ declare const evalFreshnessStatusSchema: z$1.ZodEnum<{
2028
2093
  }>;
2029
2094
  /** Freshness signal derived from the latest relevant run plus git state. */
2030
2095
  type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
2031
- /** Reducer used to collapse a column's per-case values into a single stat. */
2096
+ /**
2097
+ * Reducer used to collapse per-case values into a single duration or column
2098
+ * stat.
2099
+ * `best` selects the highest finite value and `worst` selects the lowest.
2100
+ */
2032
2101
  declare const evalStatAggregateSchema: z$1.ZodEnum<{
2033
- last: "last";
2034
- sum: "sum";
2035
2102
  avg: "avg";
2103
+ sum: "sum";
2036
2104
  min: "min";
2037
2105
  max: "max";
2106
+ best: "best";
2107
+ worst: "worst";
2038
2108
  }>;
2039
- /** Reducer used to collapse a column's per-case values into a single stat. */
2109
+ /**
2110
+ * Reducer used to collapse per-case values into a single duration or column
2111
+ * stat.
2112
+ * `best` selects the highest finite value and `worst` selects the lowest.
2113
+ */
2040
2114
  type EvalStatAggregate = z$1.infer<typeof evalStatAggregateSchema>;
2041
2115
  /**
2042
- * One entry in the EvalCard stats row. Built-in kinds use latest run totals;
2043
- * `cacheHits` counts Agent Eval operation-level cache hits from spans and
2044
- * `evalTracer.cache(...)` refs, not LLM provider prompt-cache read tokens.
2045
- * `column` aggregates a score or numeric output column across the latest run.
2116
+ * One entry in the EvalCard stats row. Built-in kinds read from the latest run;
2117
+ * `duration` aggregates per-case durations, `cacheHits` counts Agent Eval
2118
+ * operation-level cache hits from spans and `evalTracer.cache(...)` refs, not
2119
+ * LLM provider prompt-cache read tokens. Cache hits use an independent
2120
+ * aggregate mode and default to `sum`. `column` aggregates a score or numeric
2121
+ * output column across the latest run.
2046
2122
  */
2047
2123
  declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2048
2124
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -2054,31 +2130,50 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2054
2130
  }, z$1.core.$strip>, z$1.ZodObject<{
2055
2131
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2056
2132
  kind: z$1.ZodLiteral<"duration">;
2133
+ aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2134
+ avg: "avg";
2135
+ sum: "sum";
2136
+ min: "min";
2137
+ max: "max";
2138
+ best: "best";
2139
+ worst: "worst";
2140
+ }>>;
2057
2141
  }, z$1.core.$strip>, z$1.ZodObject<{
2058
2142
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2059
2143
  kind: z$1.ZodLiteral<"cacheHits">;
2144
+ aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2145
+ avg: "avg";
2146
+ sum: "sum";
2147
+ min: "min";
2148
+ max: "max";
2149
+ best: "best";
2150
+ worst: "worst";
2151
+ }>>;
2060
2152
  }, z$1.core.$strip>, z$1.ZodObject<{
2061
2153
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2062
2154
  kind: z$1.ZodLiteral<"column">;
2063
2155
  key: z$1.ZodString;
2064
2156
  label: z$1.ZodOptional<z$1.ZodString>;
2065
2157
  aggregate: z$1.ZodEnum<{
2066
- last: "last";
2067
- sum: "sum";
2068
2158
  avg: "avg";
2159
+ sum: "sum";
2069
2160
  min: "min";
2070
2161
  max: "max";
2162
+ best: "best";
2163
+ worst: "worst";
2071
2164
  }>;
2072
2165
  format: z$1.ZodOptional<z$1.ZodEnum<{
2073
2166
  number: "number";
2074
2167
  boolean: "boolean";
2075
2168
  file: "file";
2076
- markdown: "markdown";
2077
2169
  json: "json";
2170
+ duration: "duration";
2171
+ markdown: "markdown";
2078
2172
  image: "image";
2173
+ html: "html";
2174
+ pdf: "pdf";
2079
2175
  audio: "audio";
2080
2176
  video: "video";
2081
- duration: "duration";
2082
2177
  percent: "percent";
2083
2178
  passFail: "passFail";
2084
2179
  stars: "stars";
@@ -2099,31 +2194,50 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2099
2194
  }, z$1.core.$strip>, z$1.ZodObject<{
2100
2195
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2101
2196
  kind: z$1.ZodLiteral<"duration">;
2197
+ aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2198
+ avg: "avg";
2199
+ sum: "sum";
2200
+ min: "min";
2201
+ max: "max";
2202
+ best: "best";
2203
+ worst: "worst";
2204
+ }>>;
2102
2205
  }, z$1.core.$strip>, z$1.ZodObject<{
2103
2206
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2104
2207
  kind: z$1.ZodLiteral<"cacheHits">;
2208
+ aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2209
+ avg: "avg";
2210
+ sum: "sum";
2211
+ min: "min";
2212
+ max: "max";
2213
+ best: "best";
2214
+ worst: "worst";
2215
+ }>>;
2105
2216
  }, z$1.core.$strip>, z$1.ZodObject<{
2106
2217
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2107
2218
  kind: z$1.ZodLiteral<"column">;
2108
2219
  key: z$1.ZodString;
2109
2220
  label: z$1.ZodOptional<z$1.ZodString>;
2110
2221
  aggregate: z$1.ZodEnum<{
2111
- last: "last";
2112
- sum: "sum";
2113
2222
  avg: "avg";
2223
+ sum: "sum";
2114
2224
  min: "min";
2115
2225
  max: "max";
2226
+ best: "best";
2227
+ worst: "worst";
2116
2228
  }>;
2117
2229
  format: z$1.ZodOptional<z$1.ZodEnum<{
2118
2230
  number: "number";
2119
2231
  boolean: "boolean";
2120
2232
  file: "file";
2121
- markdown: "markdown";
2122
2233
  json: "json";
2234
+ duration: "duration";
2235
+ markdown: "markdown";
2123
2236
  image: "image";
2237
+ html: "html";
2238
+ pdf: "pdf";
2124
2239
  audio: "audio";
2125
2240
  video: "video";
2126
- duration: "duration";
2127
2241
  percent: "percent";
2128
2242
  passFail: "passFail";
2129
2243
  stars: "stars";
@@ -2162,12 +2276,14 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2162
2276
  number: "number";
2163
2277
  boolean: "boolean";
2164
2278
  file: "file";
2165
- markdown: "markdown";
2166
2279
  json: "json";
2280
+ duration: "duration";
2281
+ markdown: "markdown";
2167
2282
  image: "image";
2283
+ html: "html";
2284
+ pdf: "pdf";
2168
2285
  audio: "audio";
2169
2286
  video: "video";
2170
- duration: "duration";
2171
2287
  percent: "percent";
2172
2288
  passFail: "passFail";
2173
2289
  stars: "stars";
@@ -2181,16 +2297,16 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2181
2297
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2182
2298
  align: z$1.ZodOptional<z$1.ZodEnum<{
2183
2299
  left: "left";
2184
- center: "center";
2185
2300
  right: "right";
2301
+ center: "center";
2186
2302
  }>>;
2187
2303
  }, z$1.core.$strip>>;
2188
2304
  caseCount: z$1.ZodNullable<z$1.ZodNumber>;
2189
2305
  caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2190
2306
  lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
2191
- error: "error";
2192
2307
  running: "running";
2193
2308
  cancelled: "cancelled";
2309
+ error: "error";
2194
2310
  pass: "pass";
2195
2311
  fail: "fail";
2196
2312
  unscored: "unscored";
@@ -2205,31 +2321,50 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2205
2321
  }, z$1.core.$strip>, z$1.ZodObject<{
2206
2322
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2207
2323
  kind: z$1.ZodLiteral<"duration">;
2324
+ aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2325
+ avg: "avg";
2326
+ sum: "sum";
2327
+ min: "min";
2328
+ max: "max";
2329
+ best: "best";
2330
+ worst: "worst";
2331
+ }>>;
2208
2332
  }, z$1.core.$strip>, z$1.ZodObject<{
2209
2333
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2210
2334
  kind: z$1.ZodLiteral<"cacheHits">;
2335
+ aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2336
+ avg: "avg";
2337
+ sum: "sum";
2338
+ min: "min";
2339
+ max: "max";
2340
+ best: "best";
2341
+ worst: "worst";
2342
+ }>>;
2211
2343
  }, z$1.core.$strip>, z$1.ZodObject<{
2212
2344
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2213
2345
  kind: z$1.ZodLiteral<"column">;
2214
2346
  key: z$1.ZodString;
2215
2347
  label: z$1.ZodOptional<z$1.ZodString>;
2216
2348
  aggregate: z$1.ZodEnum<{
2217
- last: "last";
2218
- sum: "sum";
2219
2349
  avg: "avg";
2350
+ sum: "sum";
2220
2351
  min: "min";
2221
2352
  max: "max";
2353
+ best: "best";
2354
+ worst: "worst";
2222
2355
  }>;
2223
2356
  format: z$1.ZodOptional<z$1.ZodEnum<{
2224
2357
  number: "number";
2225
2358
  boolean: "boolean";
2226
2359
  file: "file";
2227
- markdown: "markdown";
2228
2360
  json: "json";
2361
+ duration: "duration";
2362
+ markdown: "markdown";
2229
2363
  image: "image";
2364
+ html: "html";
2365
+ pdf: "pdf";
2230
2366
  audio: "audio";
2231
2367
  video: "video";
2232
- duration: "duration";
2233
2368
  percent: "percent";
2234
2369
  passFail: "passFail";
2235
2370
  stars: "stars";
@@ -2237,6 +2372,14 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2237
2372
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2238
2373
  accent: z$1.ZodOptional<z$1.ZodBoolean>;
2239
2374
  }, z$1.core.$strip>], "kind">>>;
2375
+ defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
2376
+ avg: "avg";
2377
+ sum: "sum";
2378
+ min: "min";
2379
+ max: "max";
2380
+ best: "best";
2381
+ worst: "worst";
2382
+ }>>;
2240
2383
  charts: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
2241
2384
  heading: z$1.ZodOptional<z$1.ZodString>;
2242
2385
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -2254,11 +2397,11 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2254
2397
  }>;
2255
2398
  label: z$1.ZodOptional<z$1.ZodString>;
2256
2399
  color: z$1.ZodOptional<z$1.ZodEnum<{
2257
- success: "success";
2258
2400
  error: "error";
2259
- warning: "warning";
2401
+ success: "success";
2260
2402
  accent: "accent";
2261
2403
  accentDim: "accentDim";
2404
+ warning: "warning";
2262
2405
  textMuted: "textMuted";
2263
2406
  }>>;
2264
2407
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2269,8 +2412,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2269
2412
  source: z$1.ZodLiteral<"column">;
2270
2413
  key: z$1.ZodString;
2271
2414
  aggregate: z$1.ZodEnum<{
2272
- sum: "sum";
2273
2415
  avg: "avg";
2416
+ sum: "sum";
2274
2417
  min: "min";
2275
2418
  max: "max";
2276
2419
  latest: "latest";
@@ -2278,11 +2421,11 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2278
2421
  }>;
2279
2422
  label: z$1.ZodOptional<z$1.ZodString>;
2280
2423
  color: z$1.ZodOptional<z$1.ZodEnum<{
2281
- success: "success";
2282
2424
  error: "error";
2283
- warning: "warning";
2425
+ success: "success";
2284
2426
  accent: "accent";
2285
2427
  accentDim: "accentDim";
2428
+ warning: "warning";
2286
2429
  textMuted: "textMuted";
2287
2430
  }>>;
2288
2431
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2311,8 +2454,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2311
2454
  source: z$1.ZodLiteral<"column">;
2312
2455
  key: z$1.ZodString;
2313
2456
  aggregate: z$1.ZodEnum<{
2314
- sum: "sum";
2315
2457
  avg: "avg";
2458
+ sum: "sum";
2316
2459
  min: "min";
2317
2460
  max: "max";
2318
2461
  latest: "latest";
@@ -2410,12 +2553,12 @@ declare const caseRowSchema$1: z$1.ZodObject<{
2410
2553
  evalId: z$1.ZodString;
2411
2554
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2412
2555
  status: z$1.ZodEnum<{
2413
- error: "error";
2556
+ pending: "pending";
2414
2557
  running: "running";
2415
2558
  cancelled: "cancelled";
2559
+ error: "error";
2416
2560
  pass: "pass";
2417
2561
  fail: "fail";
2418
- pending: "pending";
2419
2562
  }>;
2420
2563
  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
2421
2564
  cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
@@ -2425,11 +2568,13 @@ declare const caseRowSchema$1: z$1.ZodObject<{
2425
2568
  source: z$1.ZodLiteral<"repo">;
2426
2569
  path: z$1.ZodString;
2427
2570
  mimeType: z$1.ZodOptional<z$1.ZodString>;
2571
+ sizeBytes: z$1.ZodOptional<z$1.ZodNumber>;
2428
2572
  }, z$1.core.$strip>, z$1.ZodObject<{
2429
2573
  source: z$1.ZodLiteral<"run">;
2430
2574
  artifactId: z$1.ZodString;
2431
2575
  mimeType: z$1.ZodString;
2432
2576
  fileName: z$1.ZodOptional<z$1.ZodString>;
2577
+ sizeBytes: z$1.ZodOptional<z$1.ZodNumber>;
2433
2578
  }, z$1.core.$strip>]>]>>;
2434
2579
  trial: z$1.ZodNumber;
2435
2580
  }, z$1.core.$strip>;
@@ -2509,10 +2654,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2509
2654
  startedAt: z$1.ZodString;
2510
2655
  endedAt: z$1.ZodNullable<z$1.ZodString>;
2511
2656
  status: z$1.ZodEnum<{
2512
- error: "error";
2513
2657
  running: "running";
2514
- ok: "ok";
2515
2658
  cancelled: "cancelled";
2659
+ error: "error";
2660
+ ok: "ok";
2516
2661
  }>;
2517
2662
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
2518
2663
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2563,8 +2708,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2563
2708
  }>>;
2564
2709
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2565
2710
  all: "all";
2566
- last: "last";
2567
2711
  sum: "sum";
2712
+ last: "last";
2568
2713
  }>>;
2569
2714
  }, z$1.core.$strip>>>;
2570
2715
  }, z$1.core.$strip>;
@@ -2574,10 +2719,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2574
2719
  namespace: z$1.ZodString;
2575
2720
  key: z$1.ZodString;
2576
2721
  status: z$1.ZodEnum<{
2722
+ bypass: "bypass";
2723
+ refresh: "refresh";
2577
2724
  hit: "hit";
2578
2725
  miss: "miss";
2579
- refresh: "refresh";
2580
- bypass: "bypass";
2581
2726
  }>;
2582
2727
  read: z$1.ZodOptional<z$1.ZodBoolean>;
2583
2728
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -2595,12 +2740,12 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2595
2740
  evalId: z$1.ZodString;
2596
2741
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2597
2742
  status: z$1.ZodEnum<{
2598
- error: "error";
2743
+ pending: "pending";
2599
2744
  running: "running";
2600
2745
  cancelled: "cancelled";
2746
+ error: "error";
2601
2747
  pass: "pass";
2602
2748
  fail: "fail";
2603
- pending: "pending";
2604
2749
  }>;
2605
2750
  input: z$1.ZodUnknown;
2606
2751
  trace: z$1.ZodArray<z$1.ZodObject<{
@@ -2612,10 +2757,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2612
2757
  startedAt: z$1.ZodString;
2613
2758
  endedAt: z$1.ZodNullable<z$1.ZodString>;
2614
2759
  status: z$1.ZodEnum<{
2615
- error: "error";
2616
2760
  running: "running";
2617
- ok: "ok";
2618
2761
  cancelled: "cancelled";
2762
+ error: "error";
2763
+ ok: "ok";
2619
2764
  }>;
2620
2765
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
2621
2766
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2666,8 +2811,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2666
2811
  }>>;
2667
2812
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2668
2813
  all: "all";
2669
- last: "last";
2670
2814
  sum: "sum";
2815
+ last: "last";
2671
2816
  }>>;
2672
2817
  }, z$1.core.$strip>>>;
2673
2818
  }, z$1.core.$strip>;
@@ -2681,10 +2826,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2681
2826
  startedAt: z$1.ZodString;
2682
2827
  endedAt: z$1.ZodNullable<z$1.ZodString>;
2683
2828
  status: z$1.ZodEnum<{
2684
- error: "error";
2685
2829
  running: "running";
2686
- ok: "ok";
2687
2830
  cancelled: "cancelled";
2831
+ error: "error";
2832
+ ok: "ok";
2688
2833
  }>;
2689
2834
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
2690
2835
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2735,8 +2880,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2735
2880
  }>>;
2736
2881
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2737
2882
  all: "all";
2738
- last: "last";
2739
2883
  sum: "sum";
2884
+ last: "last";
2740
2885
  }>>;
2741
2886
  }, z$1.core.$strip>>>;
2742
2887
  }, z$1.core.$strip>;
@@ -2746,10 +2891,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2746
2891
  namespace: z$1.ZodString;
2747
2892
  key: z$1.ZodString;
2748
2893
  status: z$1.ZodEnum<{
2894
+ bypass: "bypass";
2895
+ refresh: "refresh";
2749
2896
  hit: "hit";
2750
2897
  miss: "miss";
2751
- refresh: "refresh";
2752
- bypass: "bypass";
2753
2898
  }>;
2754
2899
  read: z$1.ZodOptional<z$1.ZodBoolean>;
2755
2900
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -2761,11 +2906,13 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2761
2906
  source: z$1.ZodLiteral<"repo">;
2762
2907
  path: z$1.ZodString;
2763
2908
  mimeType: z$1.ZodOptional<z$1.ZodString>;
2909
+ sizeBytes: z$1.ZodOptional<z$1.ZodNumber>;
2764
2910
  }, z$1.core.$strip>, z$1.ZodObject<{
2765
2911
  source: z$1.ZodLiteral<"run">;
2766
2912
  artifactId: z$1.ZodString;
2767
2913
  mimeType: z$1.ZodString;
2768
2914
  fileName: z$1.ZodOptional<z$1.ZodString>;
2915
+ sizeBytes: z$1.ZodOptional<z$1.ZodNumber>;
2769
2916
  }, z$1.core.$strip>]>]>>;
2770
2917
  assertionFailures: z$1.ZodArray<z$1.ZodUnion<readonly [z$1.ZodObject<{
2771
2918
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -2813,10 +2960,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2813
2960
  namespace: z$1.ZodString;
2814
2961
  key: z$1.ZodString;
2815
2962
  status: z$1.ZodEnum<{
2963
+ bypass: "bypass";
2964
+ refresh: "refresh";
2816
2965
  hit: "hit";
2817
2966
  miss: "miss";
2818
- refresh: "refresh";
2819
- bypass: "bypass";
2820
2967
  }>;
2821
2968
  read: z$1.ZodOptional<z$1.ZodBoolean>;
2822
2969
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -2869,8 +3016,8 @@ declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
2869
3016
  type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
2870
3017
  /** Reducer applied to a numeric column across all cases of a single run. */
2871
3018
  declare const evalChartAggregateSchema: z$1.ZodEnum<{
2872
- sum: "sum";
2873
3019
  avg: "avg";
3020
+ sum: "sum";
2874
3021
  min: "min";
2875
3022
  max: "max";
2876
3023
  latest: "latest";
@@ -2883,11 +3030,11 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
2883
3030
  * not emit raw hex so authored evals stay decoupled from the web theme.
2884
3031
  */
2885
3032
  declare const evalChartColorSchema: z$1.ZodEnum<{
2886
- success: "success";
2887
3033
  error: "error";
2888
- warning: "warning";
3034
+ success: "success";
2889
3035
  accent: "accent";
2890
3036
  accentDim: "accentDim";
3037
+ warning: "warning";
2891
3038
  textMuted: "textMuted";
2892
3039
  }>;
2893
3040
  /** Semantic color token resolved to a theme color by the web UI. */
@@ -2912,11 +3059,11 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2912
3059
  }>;
2913
3060
  label: z$1.ZodOptional<z$1.ZodString>;
2914
3061
  color: z$1.ZodOptional<z$1.ZodEnum<{
2915
- success: "success";
2916
3062
  error: "error";
2917
- warning: "warning";
3063
+ success: "success";
2918
3064
  accent: "accent";
2919
3065
  accentDim: "accentDim";
3066
+ warning: "warning";
2920
3067
  textMuted: "textMuted";
2921
3068
  }>>;
2922
3069
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2927,8 +3074,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2927
3074
  source: z$1.ZodLiteral<"column">;
2928
3075
  key: z$1.ZodString;
2929
3076
  aggregate: z$1.ZodEnum<{
2930
- sum: "sum";
2931
3077
  avg: "avg";
3078
+ sum: "sum";
2932
3079
  min: "min";
2933
3080
  max: "max";
2934
3081
  latest: "latest";
@@ -2936,11 +3083,11 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2936
3083
  }>;
2937
3084
  label: z$1.ZodOptional<z$1.ZodString>;
2938
3085
  color: z$1.ZodOptional<z$1.ZodEnum<{
2939
- success: "success";
2940
3086
  error: "error";
2941
- warning: "warning";
3087
+ success: "success";
2942
3088
  accent: "accent";
2943
3089
  accentDim: "accentDim";
3090
+ warning: "warning";
2944
3091
  textMuted: "textMuted";
2945
3092
  }>>;
2946
3093
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2962,8 +3109,8 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
2962
3109
  source: z$1.ZodLiteral<"column">;
2963
3110
  key: z$1.ZodString;
2964
3111
  aggregate: z$1.ZodEnum<{
2965
- sum: "sum";
2966
3112
  avg: "avg";
3113
+ sum: "sum";
2967
3114
  min: "min";
2968
3115
  max: "max";
2969
3116
  latest: "latest";
@@ -2995,11 +3142,11 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
2995
3142
  }>;
2996
3143
  label: z$1.ZodOptional<z$1.ZodString>;
2997
3144
  color: z$1.ZodOptional<z$1.ZodEnum<{
2998
- success: "success";
2999
3145
  error: "error";
3000
- warning: "warning";
3146
+ success: "success";
3001
3147
  accent: "accent";
3002
3148
  accentDim: "accentDim";
3149
+ warning: "warning";
3003
3150
  textMuted: "textMuted";
3004
3151
  }>>;
3005
3152
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3010,8 +3157,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3010
3157
  source: z$1.ZodLiteral<"column">;
3011
3158
  key: z$1.ZodString;
3012
3159
  aggregate: z$1.ZodEnum<{
3013
- sum: "sum";
3014
3160
  avg: "avg";
3161
+ sum: "sum";
3015
3162
  min: "min";
3016
3163
  max: "max";
3017
3164
  latest: "latest";
@@ -3019,11 +3166,11 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3019
3166
  }>;
3020
3167
  label: z$1.ZodOptional<z$1.ZodString>;
3021
3168
  color: z$1.ZodOptional<z$1.ZodEnum<{
3022
- success: "success";
3023
3169
  error: "error";
3024
- warning: "warning";
3170
+ success: "success";
3025
3171
  accent: "accent";
3026
3172
  accentDim: "accentDim";
3173
+ warning: "warning";
3027
3174
  textMuted: "textMuted";
3028
3175
  }>>;
3029
3176
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3052,8 +3199,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3052
3199
  source: z$1.ZodLiteral<"column">;
3053
3200
  key: z$1.ZodString;
3054
3201
  aggregate: z$1.ZodEnum<{
3055
- sum: "sum";
3056
3202
  avg: "avg";
3203
+ sum: "sum";
3057
3204
  min: "min";
3058
3205
  max: "max";
3059
3206
  latest: "latest";
@@ -3085,11 +3232,11 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3085
3232
  }>;
3086
3233
  label: z$1.ZodOptional<z$1.ZodString>;
3087
3234
  color: z$1.ZodOptional<z$1.ZodEnum<{
3088
- success: "success";
3089
3235
  error: "error";
3090
- warning: "warning";
3236
+ success: "success";
3091
3237
  accent: "accent";
3092
3238
  accentDim: "accentDim";
3239
+ warning: "warning";
3093
3240
  textMuted: "textMuted";
3094
3241
  }>>;
3095
3242
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3100,8 +3247,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3100
3247
  source: z$1.ZodLiteral<"column">;
3101
3248
  key: z$1.ZodString;
3102
3249
  aggregate: z$1.ZodEnum<{
3103
- sum: "sum";
3104
3250
  avg: "avg";
3251
+ sum: "sum";
3105
3252
  min: "min";
3106
3253
  max: "max";
3107
3254
  latest: "latest";
@@ -3109,11 +3256,11 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3109
3256
  }>;
3110
3257
  label: z$1.ZodOptional<z$1.ZodString>;
3111
3258
  color: z$1.ZodOptional<z$1.ZodEnum<{
3112
- success: "success";
3113
3259
  error: "error";
3114
- warning: "warning";
3260
+ success: "success";
3115
3261
  accent: "accent";
3116
3262
  accentDim: "accentDim";
3263
+ warning: "warning";
3117
3264
  textMuted: "textMuted";
3118
3265
  }>>;
3119
3266
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3142,8 +3289,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3142
3289
  source: z$1.ZodLiteral<"column">;
3143
3290
  key: z$1.ZodString;
3144
3291
  aggregate: z$1.ZodEnum<{
3145
- sum: "sum";
3146
3292
  avg: "avg";
3293
+ sum: "sum";
3147
3294
  min: "min";
3148
3295
  max: "max";
3149
3296
  latest: "latest";
@@ -3160,11 +3307,11 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3160
3307
  id: z$1.ZodString;
3161
3308
  shortId: z$1.ZodString;
3162
3309
  status: z$1.ZodEnum<{
3163
- error: "error";
3164
- running: "running";
3165
- cancelled: "cancelled";
3166
3310
  pending: "pending";
3311
+ running: "running";
3167
3312
  completed: "completed";
3313
+ cancelled: "cancelled";
3314
+ error: "error";
3168
3315
  }>;
3169
3316
  temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
3170
3317
  startedAt: z$1.ZodString;
@@ -3174,8 +3321,8 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3174
3321
  target: z$1.ZodObject<{
3175
3322
  mode: z$1.ZodEnum<{
3176
3323
  all: "all";
3177
- caseIds: "caseIds";
3178
3324
  evalIds: "evalIds";
3325
+ caseIds: "caseIds";
3179
3326
  }>;
3180
3327
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
3181
3328
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -3189,9 +3336,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3189
3336
  median: "median";
3190
3337
  }>>>;
3191
3338
  cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
3192
- refresh: "refresh";
3193
- bypass: "bypass";
3194
3339
  use: "use";
3340
+ bypass: "bypass";
3341
+ refresh: "refresh";
3195
3342
  }>>;
3196
3343
  }, z$1.core.$strip>;
3197
3344
  /** Persisted lifecycle metadata for a single eval run. */
@@ -3200,11 +3347,11 @@ type RunManifest = z$1.infer<typeof runManifestSchema$1>;
3200
3347
  declare const runSummarySchema$1: z$1.ZodObject<{
3201
3348
  runId: z$1.ZodString;
3202
3349
  status: z$1.ZodEnum<{
3203
- error: "error";
3204
- running: "running";
3205
- cancelled: "cancelled";
3206
3350
  pending: "pending";
3351
+ running: "running";
3207
3352
  completed: "completed";
3353
+ cancelled: "cancelled";
3354
+ error: "error";
3208
3355
  }>;
3209
3356
  totalCases: z$1.ZodNumber;
3210
3357
  passedCases: z$1.ZodNumber;
@@ -3287,8 +3434,8 @@ type TrialSelectionMode = z$1.infer<typeof trialSelectionModeSchema>;
3287
3434
  /** Built-in eval-level output/column keys. */
3288
3435
  /** Removal config for built-in eval-level outputs and UI metadata. */
3289
3436
  declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<true>, z$1.ZodArray<z$1.ZodEnum<{
3290
- costUsd: "costUsd";
3291
3437
  apiCalls: "apiCalls";
3438
+ costUsd: "costUsd";
3292
3439
  llmTurns: "llmTurns";
3293
3440
  inputTokens: "inputTokens";
3294
3441
  outputTokens: "outputTokens";
@@ -3338,8 +3485,8 @@ type EvalColumnOverride$1 = {
3338
3485
  * Presentation preset for the value.
3339
3486
  *
3340
3487
  * Use this to control how the UI renders the cell and infer table behavior,
3341
- * for example `number`, `boolean`, `duration`, `markdown`, `json`, or
3342
- * file/media previews.
3488
+ * for example `number`, `boolean`, `duration`, `markdown`, `json`,
3489
+ * `image`, `html`, `pdf`, or file/media previews.
3343
3490
  */
3344
3491
  format?: ColumnFormat;
3345
3492
  /**
@@ -3793,6 +3940,15 @@ type AgentEvalsConfig$1 = {
3793
3940
  * appended last unless removed with `removeDefaultConfig`.
3794
3941
  */
3795
3942
  stats?: EvalStatsConfig;
3943
+ /**
3944
+ * Initial aggregate mode used for duration and column stats on every eval
3945
+ * card.
3946
+ *
3947
+ * Per-eval `defaultStatAggregate` overrides this value. Individual stat
3948
+ * `aggregate` values still define their authored reducer and remain the
3949
+ * fallback when no default aggregate is configured.
3950
+ */
3951
+ defaultStatAggregate?: EvalStatAggregate;
3796
3952
  /**
3797
3953
  * Configuration for the "LLM calls" tab in the case-run drawer.
3798
3954
  *
@@ -4093,9 +4249,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
4093
4249
  * - `refresh`: never read, always write (forces re-execution and overwrites).
4094
4250
  */
4095
4251
  declare const cacheModeSchema: z$1.ZodEnum<{
4096
- refresh: "refresh";
4097
- bypass: "bypass";
4098
4252
  use: "use";
4253
+ bypass: "bypass";
4254
+ refresh: "refresh";
4099
4255
  }>;
4100
4256
  /** Mode controlling how cached spans behave during a run. */
4101
4257
  type CacheMode = z$1.infer<typeof cacheModeSchema>;
@@ -4116,10 +4272,10 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
4116
4272
  type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
4117
4273
  /** Status of a cache lookup recorded on a span or case scope. */
4118
4274
  declare const cacheStatusSchema: z$1.ZodEnum<{
4275
+ bypass: "bypass";
4276
+ refresh: "refresh";
4119
4277
  hit: "hit";
4120
4278
  miss: "miss";
4121
- refresh: "refresh";
4122
- bypass: "bypass";
4123
4279
  }>;
4124
4280
  /** Status of a cache lookup recorded on a span or case scope. */
4125
4281
  type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
@@ -4136,10 +4292,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
4136
4292
  namespace: z$1.ZodString;
4137
4293
  key: z$1.ZodString;
4138
4294
  status: z$1.ZodEnum<{
4295
+ bypass: "bypass";
4296
+ refresh: "refresh";
4139
4297
  hit: "hit";
4140
4298
  miss: "miss";
4141
- refresh: "refresh";
4142
- bypass: "bypass";
4143
4299
  }>;
4144
4300
  read: z$1.ZodOptional<z$1.ZodBoolean>;
4145
4301
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -4214,10 +4370,10 @@ declare const cacheRecordingSchema: z$1.ZodObject<{
4214
4370
  returnValue: z$1.ZodUnknown;
4215
4371
  finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
4216
4372
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4217
- error: "error";
4218
4373
  running: "running";
4219
- ok: "ok";
4220
4374
  cancelled: "cancelled";
4375
+ error: "error";
4376
+ ok: "ok";
4221
4377
  }>>;
4222
4378
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4223
4379
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4287,10 +4443,10 @@ declare const cacheEntrySchema: z$1.ZodObject<{
4287
4443
  returnValue: z$1.ZodUnknown;
4288
4444
  finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
4289
4445
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4290
- error: "error";
4291
4446
  running: "running";
4292
- ok: "ok";
4293
4447
  cancelled: "cancelled";
4448
+ error: "error";
4449
+ ok: "ok";
4294
4450
  }>>;
4295
4451
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4296
4452
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4377,10 +4533,10 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4377
4533
  returnValue: z$1.ZodUnknown;
4378
4534
  finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
4379
4535
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4380
- error: "error";
4381
4536
  running: "running";
4382
- ok: "ok";
4383
4537
  cancelled: "cancelled";
4538
+ error: "error";
4539
+ ok: "ok";
4384
4540
  }>>;
4385
4541
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4386
4542
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4456,10 +4612,10 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4456
4612
  returnValue: z$1.ZodUnknown;
4457
4613
  finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
4458
4614
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4459
- error: "error";
4460
4615
  running: "running";
4461
- ok: "ok";
4462
4616
  cancelled: "cancelled";
4617
+ error: "error";
4618
+ ok: "ok";
4463
4619
  }>>;
4464
4620
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4465
4621
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4537,10 +4693,10 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4537
4693
  returnValue: z$1.ZodUnknown;
4538
4694
  finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
4539
4695
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4540
- error: "error";
4541
4696
  running: "running";
4542
- ok: "ok";
4543
4697
  cancelled: "cancelled";
4698
+ error: "error";
4699
+ ok: "ok";
4544
4700
  }>>;
4545
4701
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4546
4702
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4616,10 +4772,10 @@ declare const cacheFileSchema: z$1.ZodObject<{
4616
4772
  returnValue: z$1.ZodUnknown;
4617
4773
  finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
4618
4774
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4619
- error: "error";
4620
4775
  running: "running";
4621
- ok: "ok";
4622
4776
  cancelled: "cancelled";
4777
+ error: "error";
4778
+ ok: "ok";
4623
4779
  }>>;
4624
4780
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4625
4781
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4705,10 +4861,10 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
4705
4861
  returnValue: z$1.ZodUnknown;
4706
4862
  finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
4707
4863
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4708
- error: "error";
4709
4864
  running: "running";
4710
- ok: "ok";
4711
4865
  cancelled: "cancelled";
4866
+ error: "error";
4867
+ ok: "ok";
4712
4868
  }>>;
4713
4869
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4714
4870
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4865,8 +5021,8 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
4865
5021
  target: z$1.ZodObject<{
4866
5022
  mode: z$1.ZodEnum<{
4867
5023
  all: "all";
4868
- caseIds: "caseIds";
4869
5024
  evalIds: "evalIds";
5025
+ caseIds: "caseIds";
4870
5026
  }>;
4871
5027
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
4872
5028
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -4878,9 +5034,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
4878
5034
  temporary: z$1.ZodOptional<z$1.ZodBoolean>;
4879
5035
  cache: z$1.ZodOptional<z$1.ZodObject<{
4880
5036
  mode: z$1.ZodDefault<z$1.ZodEnum<{
4881
- refresh: "refresh";
4882
- bypass: "bypass";
4883
5037
  use: "use";
5038
+ bypass: "bypass";
5039
+ refresh: "refresh";
4884
5040
  }>>;
4885
5041
  }, z$1.core.$strip>>;
4886
5042
  manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
@@ -5176,6 +5332,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
5176
5332
  markdown: "markdown";
5177
5333
  json: "json";
5178
5334
  image: "image";
5335
+ html: "html";
5336
+ pdf: "pdf";
5179
5337
  audio: "audio";
5180
5338
  video: "video";
5181
5339
  percent: "percent";
@@ -5215,9 +5373,25 @@ declare const evalSummarySchema: z$1.ZodObject<{
5215
5373
  }, z$1.core.$strip>, z$1.ZodObject<{
5216
5374
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
5217
5375
  kind: z$1.ZodLiteral<"duration">;
5376
+ aggregate: z$1.ZodOptional<z$1.ZodEnum<{
5377
+ avg: "avg";
5378
+ min: "min";
5379
+ max: "max";
5380
+ sum: "sum";
5381
+ best: "best";
5382
+ worst: "worst";
5383
+ }>>;
5218
5384
  }, z$1.core.$strip>, z$1.ZodObject<{
5219
5385
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
5220
5386
  kind: z$1.ZodLiteral<"cacheHits">;
5387
+ aggregate: z$1.ZodOptional<z$1.ZodEnum<{
5388
+ avg: "avg";
5389
+ min: "min";
5390
+ max: "max";
5391
+ sum: "sum";
5392
+ best: "best";
5393
+ worst: "worst";
5394
+ }>>;
5221
5395
  }, z$1.core.$strip>, z$1.ZodObject<{
5222
5396
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
5223
5397
  kind: z$1.ZodLiteral<"column">;
@@ -5228,7 +5402,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
5228
5402
  min: "min";
5229
5403
  max: "max";
5230
5404
  sum: "sum";
5231
- last: "last";
5405
+ best: "best";
5406
+ worst: "worst";
5232
5407
  }>;
5233
5408
  format: z$1.ZodOptional<z$1.ZodEnum<{
5234
5409
  number: "number";
@@ -5238,6 +5413,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
5238
5413
  markdown: "markdown";
5239
5414
  json: "json";
5240
5415
  image: "image";
5416
+ html: "html";
5417
+ pdf: "pdf";
5241
5418
  audio: "audio";
5242
5419
  video: "video";
5243
5420
  percent: "percent";
@@ -5247,6 +5424,14 @@ declare const evalSummarySchema: z$1.ZodObject<{
5247
5424
  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions$1, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions$1, unknown>>>;
5248
5425
  accent: z$1.ZodOptional<z$1.ZodBoolean>;
5249
5426
  }, z$1.core.$strip>], "kind">>>;
5427
+ defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
5428
+ avg: "avg";
5429
+ min: "min";
5430
+ max: "max";
5431
+ sum: "sum";
5432
+ best: "best";
5433
+ worst: "worst";
5434
+ }>>;
5250
5435
  charts: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
5251
5436
  heading: z$1.ZodOptional<z$1.ZodString>;
5252
5437
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -5435,11 +5620,13 @@ declare const caseRowSchema: z$1.ZodObject<{
5435
5620
  source: z$1.ZodLiteral<"repo">;
5436
5621
  path: z$1.ZodString;
5437
5622
  mimeType: z$1.ZodOptional<z$1.ZodString>;
5623
+ sizeBytes: z$1.ZodOptional<z$1.ZodNumber>;
5438
5624
  }, z$1.core.$strip>, z$1.ZodObject<{
5439
5625
  source: z$1.ZodLiteral<"run">;
5440
5626
  artifactId: z$1.ZodString;
5441
5627
  mimeType: z$1.ZodString;
5442
5628
  fileName: z$1.ZodOptional<z$1.ZodString>;
5629
+ sizeBytes: z$1.ZodOptional<z$1.ZodNumber>;
5443
5630
  }, z$1.core.$strip>]>]>>;
5444
5631
  trial: z$1.ZodNumber;
5445
5632
  }, z$1.core.$strip>;
@@ -5524,8 +5711,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
5524
5711
  }>>;
5525
5712
  mode: z$1.ZodOptional<z$1.ZodEnum<{
5526
5713
  sum: "sum";
5527
- last: "last";
5528
5714
  all: "all";
5715
+ last: "last";
5529
5716
  }>>;
5530
5717
  }, z$1.core.$strip>>>;
5531
5718
  }, z$1.core.$strip>;
@@ -5593,8 +5780,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
5593
5780
  }>>;
5594
5781
  mode: z$1.ZodOptional<z$1.ZodEnum<{
5595
5782
  sum: "sum";
5596
- last: "last";
5597
5783
  all: "all";
5784
+ last: "last";
5598
5785
  }>>;
5599
5786
  }, z$1.core.$strip>>>;
5600
5787
  }, z$1.core.$strip>;
@@ -5619,11 +5806,13 @@ declare const caseDetailSchema: z$1.ZodObject<{
5619
5806
  source: z$1.ZodLiteral<"repo">;
5620
5807
  path: z$1.ZodString;
5621
5808
  mimeType: z$1.ZodOptional<z$1.ZodString>;
5809
+ sizeBytes: z$1.ZodOptional<z$1.ZodNumber>;
5622
5810
  }, z$1.core.$strip>, z$1.ZodObject<{
5623
5811
  source: z$1.ZodLiteral<"run">;
5624
5812
  artifactId: z$1.ZodString;
5625
5813
  mimeType: z$1.ZodString;
5626
5814
  fileName: z$1.ZodOptional<z$1.ZodString>;
5815
+ sizeBytes: z$1.ZodOptional<z$1.ZodNumber>;
5627
5816
  }, z$1.core.$strip>]>]>>;
5628
5817
  assertionFailures: z$1.ZodArray<z$1.ZodUnion<readonly [z$1.ZodObject<{
5629
5818
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -6310,6 +6499,22 @@ type EvalRunner = {
6310
6499
  deleteRun(runId: string): Promise<{
6311
6500
  deleted: boolean;
6312
6501
  }>;
6502
+ /**
6503
+ * Convert a temporary persisted run into durable run history.
6504
+ *
6505
+ * Returns the updated run when found. Already-durable runs are treated as a
6506
+ * no-op success so UI callers can refresh their cached copy idempotently.
6507
+ */
6508
+ promoteRun(runId: string): Promise<{
6509
+ promoted: boolean;
6510
+ run: {
6511
+ manifest: RunManifest$1;
6512
+ summary: RunSummary$1;
6513
+ cases: CaseRow$1[];
6514
+ };
6515
+ } | {
6516
+ promoted: false;
6517
+ }>;
6313
6518
  /**
6314
6519
  * Validate a `CreateRunRequest`'s `manualInputs` map against each targeted
6315
6520
  * eval's authored `manualInput.schema`. Returns `ok: true` with the parsed