@ls-stack/agent-eval 0.53.0 → 0.54.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-D0a57pVo.mjs → app-CMJtftUB.mjs} +12 -4
- package/dist/apps/web/dist/assets/index-BcLH_Wme.css +1 -0
- package/dist/apps/web/dist/assets/index-Bx1VZctJ.js +377 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-3FrKBc9l.mjs → cli-BEtk5skO.mjs} +51 -11
- package/dist/index.d.mts +283 -111
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +2 -1
- package/dist/{runOrchestration-Cn6fGL2s.mjs → runOrchestration-C78U4Ir0.mjs} +228 -134
- package/dist/{runner-C0qdoRSi.mjs → runner-5y6nEBZM.mjs} +2 -2
- package/dist/{runner-Dsqj431i.mjs → runner-pmIYculz.mjs} +1 -1
- package/dist/{src-BNmtaqeC.mjs → src-BM6LW4ou.mjs} +2 -2
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +14 -3
- package/dist/apps/web/dist/assets/index-RNejIyap.js +0 -375
- package/dist/apps/web/dist/assets/index-vaLgWG8j.css +0 -1
package/dist/index.d.mts
CHANGED
|
@@ -144,6 +144,25 @@ declare const traceSpanSchema$2: z$1.ZodObject<{
|
|
|
144
144
|
/** Persisted trace span shape stored for each eval case run. */
|
|
145
145
|
type EvalTraceSpan$2 = z$1.infer<typeof traceSpanSchema$2>; //#endregion
|
|
146
146
|
//#region ../shared/src/schemas/eval.d.ts
|
|
147
|
+
/**
|
|
148
|
+
* Reducer used to collapse per-case values into a single duration or column
|
|
149
|
+
* stat.
|
|
150
|
+
* `best` selects the highest finite value and `worst` selects the lowest.
|
|
151
|
+
*/
|
|
152
|
+
declare const evalStatAggregateSchema$1: z$1.ZodEnum<{
|
|
153
|
+
avg: "avg";
|
|
154
|
+
min: "min";
|
|
155
|
+
max: "max";
|
|
156
|
+
sum: "sum";
|
|
157
|
+
best: "best";
|
|
158
|
+
worst: "worst";
|
|
159
|
+
}>;
|
|
160
|
+
/**
|
|
161
|
+
* Reducer used to collapse per-case values into a single duration or column
|
|
162
|
+
* stat.
|
|
163
|
+
* `best` selects the highest finite value and `worst` selects the lowest.
|
|
164
|
+
*/
|
|
165
|
+
type EvalStatAggregate$1 = z$1.infer<typeof evalStatAggregateSchema$1>;
|
|
147
166
|
/** Ordered list of stats rendered in the EvalCard stats row. */
|
|
148
167
|
declare const evalStatsConfigSchema$1: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
149
168
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -155,9 +174,25 @@ declare const evalStatsConfigSchema$1: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z
|
|
|
155
174
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
156
175
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
157
176
|
kind: z$1.ZodLiteral<"duration">;
|
|
177
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
178
|
+
avg: "avg";
|
|
179
|
+
min: "min";
|
|
180
|
+
max: "max";
|
|
181
|
+
sum: "sum";
|
|
182
|
+
best: "best";
|
|
183
|
+
worst: "worst";
|
|
184
|
+
}>>;
|
|
158
185
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
159
186
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
160
187
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
188
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
189
|
+
avg: "avg";
|
|
190
|
+
min: "min";
|
|
191
|
+
max: "max";
|
|
192
|
+
sum: "sum";
|
|
193
|
+
best: "best";
|
|
194
|
+
worst: "worst";
|
|
195
|
+
}>>;
|
|
161
196
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
162
197
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
163
198
|
kind: z$1.ZodLiteral<"column">;
|
|
@@ -168,7 +203,8 @@ declare const evalStatsConfigSchema$1: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z
|
|
|
168
203
|
min: "min";
|
|
169
204
|
max: "max";
|
|
170
205
|
sum: "sum";
|
|
171
|
-
|
|
206
|
+
best: "best";
|
|
207
|
+
worst: "worst";
|
|
172
208
|
}>;
|
|
173
209
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
174
210
|
number: "number";
|
|
@@ -1022,16 +1058,28 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
|
|
|
1022
1058
|
* When provided, the stats render in order, left to right.
|
|
1023
1059
|
*
|
|
1024
1060
|
* Built-in kinds (`cases`, `passRate`, `duration`, `cacheHits`) read from
|
|
1025
|
-
* the latest run
|
|
1026
|
-
*
|
|
1027
|
-
*
|
|
1028
|
-
*
|
|
1029
|
-
*
|
|
1030
|
-
*
|
|
1031
|
-
*
|
|
1032
|
-
*
|
|
1061
|
+
* the latest run. `duration` aggregates finite per-case durations using the
|
|
1062
|
+
* same modes as column stats. `cacheHits` counts Agent Eval operation-level
|
|
1063
|
+
* cache hits over total cache operations, not LLM provider prompt-cache read
|
|
1064
|
+
* tokens. Cache-hit stats have their own aggregate mode and default to `sum`;
|
|
1065
|
+
* `avg` is average per-case hit rate, and min/max/best/worst select cases by
|
|
1066
|
+
* hit rate. `kind: 'column'` aggregates a score or numeric output column
|
|
1067
|
+
* across the latest run's cases — `key` must match one of the eval's score or
|
|
1068
|
+
* column keys, and only finite numeric values participate in the reduction.
|
|
1069
|
+
* When no case has a numeric value for the key the stat renders an em dash, or
|
|
1070
|
+
* hides when `hideIfNoValue` is true. `label`, `format`, and `numberFormat`
|
|
1071
|
+
* default to the matching `ColumnDef`.
|
|
1033
1072
|
*/
|
|
1034
1073
|
stats?: EvalStatsConfig$1;
|
|
1074
|
+
/**
|
|
1075
|
+
* Initial aggregate mode used for this eval's duration and column stats in
|
|
1076
|
+
* the web UI.
|
|
1077
|
+
*
|
|
1078
|
+
* Overrides `AgentEvalsConfig.defaultStatAggregate`. Individual stat
|
|
1079
|
+
* `aggregate` values still define their authored reducer and remain the
|
|
1080
|
+
* fallback when neither default is configured.
|
|
1081
|
+
*/
|
|
1082
|
+
defaultStatAggregate?: EvalStatAggregate$1;
|
|
1035
1083
|
/**
|
|
1036
1084
|
* Optional history chart configuration for the EvalCard in the web UI.
|
|
1037
1085
|
*
|
|
@@ -1083,8 +1131,12 @@ type EvalRegistryEntry = {
|
|
|
1083
1131
|
/** Return the in-memory registry of evals defined in the current process. */
|
|
1084
1132
|
declare function getEvalRegistry(): Map<string, EvalRegistryEntry>;
|
|
1085
1133
|
/**
|
|
1086
|
-
*
|
|
1087
|
-
*
|
|
1134
|
+
* Execute a callback with an empty async-local eval registry.
|
|
1135
|
+
*
|
|
1136
|
+
* Runner internals use this when importing eval modules concurrently so
|
|
1137
|
+
* `defineEval(...)` calls from one import cannot overwrite another import's
|
|
1138
|
+
* registered definitions. The callback receives the scoped registry populated
|
|
1139
|
+
* during its async execution.
|
|
1088
1140
|
*/
|
|
1089
1141
|
//#endregion
|
|
1090
1142
|
//#region src/evalExpect.d.ts
|
|
@@ -1729,14 +1781,14 @@ declare const columnFormatSchema: z$1.ZodEnum<{
|
|
|
1729
1781
|
number: "number";
|
|
1730
1782
|
boolean: "boolean";
|
|
1731
1783
|
file: "file";
|
|
1732
|
-
markdown: "markdown";
|
|
1733
1784
|
json: "json";
|
|
1785
|
+
duration: "duration";
|
|
1786
|
+
markdown: "markdown";
|
|
1734
1787
|
image: "image";
|
|
1735
1788
|
html: "html";
|
|
1736
1789
|
pdf: "pdf";
|
|
1737
1790
|
audio: "audio";
|
|
1738
1791
|
video: "video";
|
|
1739
|
-
duration: "duration";
|
|
1740
1792
|
percent: "percent";
|
|
1741
1793
|
passFail: "passFail";
|
|
1742
1794
|
stars: "stars";
|
|
@@ -1756,14 +1808,14 @@ declare const columnDefSchema: z$1.ZodObject<{
|
|
|
1756
1808
|
number: "number";
|
|
1757
1809
|
boolean: "boolean";
|
|
1758
1810
|
file: "file";
|
|
1759
|
-
markdown: "markdown";
|
|
1760
1811
|
json: "json";
|
|
1812
|
+
duration: "duration";
|
|
1813
|
+
markdown: "markdown";
|
|
1761
1814
|
image: "image";
|
|
1762
1815
|
html: "html";
|
|
1763
1816
|
pdf: "pdf";
|
|
1764
1817
|
audio: "audio";
|
|
1765
1818
|
video: "video";
|
|
1766
|
-
duration: "duration";
|
|
1767
1819
|
percent: "percent";
|
|
1768
1820
|
passFail: "passFail";
|
|
1769
1821
|
stars: "stars";
|
|
@@ -1777,8 +1829,8 @@ declare const columnDefSchema: z$1.ZodObject<{
|
|
|
1777
1829
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
1778
1830
|
align: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1779
1831
|
left: "left";
|
|
1780
|
-
center: "center";
|
|
1781
1832
|
right: "right";
|
|
1833
|
+
center: "center";
|
|
1782
1834
|
}>>;
|
|
1783
1835
|
}, z$1.core.$strip>;
|
|
1784
1836
|
/** Column definition exposed to the UI for eval and case tables. */
|
|
@@ -1848,9 +1900,9 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
|
|
|
1848
1900
|
subtree: "subtree";
|
|
1849
1901
|
}>>;
|
|
1850
1902
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1903
|
+
all: "all";
|
|
1851
1904
|
sum: "sum";
|
|
1852
1905
|
last: "last";
|
|
1853
|
-
all: "all";
|
|
1854
1906
|
}>>;
|
|
1855
1907
|
}, z$1.core.$strip>;
|
|
1856
1908
|
/**
|
|
@@ -1884,9 +1936,9 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
|
|
|
1884
1936
|
subtree: "subtree";
|
|
1885
1937
|
}>>;
|
|
1886
1938
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1939
|
+
all: "all";
|
|
1887
1940
|
sum: "sum";
|
|
1888
1941
|
last: "last";
|
|
1889
|
-
all: "all";
|
|
1890
1942
|
}>>;
|
|
1891
1943
|
}, z$1.core.$strip>>>;
|
|
1892
1944
|
}, z$1.core.$strip>;
|
|
@@ -1924,9 +1976,9 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
|
|
|
1924
1976
|
subtree: "subtree";
|
|
1925
1977
|
}>>;
|
|
1926
1978
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1979
|
+
all: "all";
|
|
1927
1980
|
sum: "sum";
|
|
1928
1981
|
last: "last";
|
|
1929
|
-
all: "all";
|
|
1930
1982
|
}>>;
|
|
1931
1983
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
1932
1984
|
}, z$1.core.$strip>;
|
|
@@ -1962,9 +2014,9 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
|
|
|
1962
2014
|
subtree: "subtree";
|
|
1963
2015
|
}>>;
|
|
1964
2016
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2017
|
+
all: "all";
|
|
1965
2018
|
sum: "sum";
|
|
1966
2019
|
last: "last";
|
|
1967
|
-
all: "all";
|
|
1968
2020
|
}>>;
|
|
1969
2021
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
1970
2022
|
}, z$1.core.$strip>>>;
|
|
@@ -1999,9 +2051,9 @@ declare const traceSpanSchema$1: z$1.ZodObject<{
|
|
|
1999
2051
|
startedAt: z$1.ZodString;
|
|
2000
2052
|
endedAt: z$1.ZodNullable<z$1.ZodString>;
|
|
2001
2053
|
status: z$1.ZodEnum<{
|
|
2002
|
-
error: "error";
|
|
2003
2054
|
running: "running";
|
|
2004
2055
|
cancelled: "cancelled";
|
|
2056
|
+
error: "error";
|
|
2005
2057
|
ok: "ok";
|
|
2006
2058
|
}>;
|
|
2007
2059
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
@@ -2041,21 +2093,32 @@ declare const evalFreshnessStatusSchema: z$1.ZodEnum<{
|
|
|
2041
2093
|
}>;
|
|
2042
2094
|
/** Freshness signal derived from the latest relevant run plus git state. */
|
|
2043
2095
|
type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
|
|
2044
|
-
/**
|
|
2096
|
+
/**
|
|
2097
|
+
* Reducer used to collapse per-case values into a single duration or column
|
|
2098
|
+
* stat.
|
|
2099
|
+
* `best` selects the highest finite value and `worst` selects the lowest.
|
|
2100
|
+
*/
|
|
2045
2101
|
declare const evalStatAggregateSchema: z$1.ZodEnum<{
|
|
2046
2102
|
avg: "avg";
|
|
2103
|
+
sum: "sum";
|
|
2047
2104
|
min: "min";
|
|
2048
2105
|
max: "max";
|
|
2049
|
-
|
|
2050
|
-
|
|
2106
|
+
best: "best";
|
|
2107
|
+
worst: "worst";
|
|
2051
2108
|
}>;
|
|
2052
|
-
/**
|
|
2109
|
+
/**
|
|
2110
|
+
* Reducer used to collapse per-case values into a single duration or column
|
|
2111
|
+
* stat.
|
|
2112
|
+
* `best` selects the highest finite value and `worst` selects the lowest.
|
|
2113
|
+
*/
|
|
2053
2114
|
type EvalStatAggregate = z$1.infer<typeof evalStatAggregateSchema>;
|
|
2054
2115
|
/**
|
|
2055
|
-
* One entry in the EvalCard stats row. Built-in kinds
|
|
2056
|
-
* `
|
|
2057
|
-
* `evalTracer.cache(...)` refs, not
|
|
2058
|
-
*
|
|
2116
|
+
* One entry in the EvalCard stats row. Built-in kinds read from the latest run;
|
|
2117
|
+
* `duration` aggregates per-case durations, `cacheHits` counts Agent Eval
|
|
2118
|
+
* operation-level cache hits from spans and `evalTracer.cache(...)` refs, not
|
|
2119
|
+
* LLM provider prompt-cache read tokens. Cache hits use an independent
|
|
2120
|
+
* aggregate mode and default to `sum`. `column` aggregates a score or numeric
|
|
2121
|
+
* output column across the latest run.
|
|
2059
2122
|
*/
|
|
2060
2123
|
declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
2061
2124
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2067,9 +2130,25 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2067
2130
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2068
2131
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2069
2132
|
kind: z$1.ZodLiteral<"duration">;
|
|
2133
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2134
|
+
avg: "avg";
|
|
2135
|
+
sum: "sum";
|
|
2136
|
+
min: "min";
|
|
2137
|
+
max: "max";
|
|
2138
|
+
best: "best";
|
|
2139
|
+
worst: "worst";
|
|
2140
|
+
}>>;
|
|
2070
2141
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2071
2142
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2072
2143
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2144
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2145
|
+
avg: "avg";
|
|
2146
|
+
sum: "sum";
|
|
2147
|
+
min: "min";
|
|
2148
|
+
max: "max";
|
|
2149
|
+
best: "best";
|
|
2150
|
+
worst: "worst";
|
|
2151
|
+
}>>;
|
|
2073
2152
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2074
2153
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2075
2154
|
kind: z$1.ZodLiteral<"column">;
|
|
@@ -2077,23 +2156,24 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2077
2156
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2078
2157
|
aggregate: z$1.ZodEnum<{
|
|
2079
2158
|
avg: "avg";
|
|
2159
|
+
sum: "sum";
|
|
2080
2160
|
min: "min";
|
|
2081
2161
|
max: "max";
|
|
2082
|
-
|
|
2083
|
-
|
|
2162
|
+
best: "best";
|
|
2163
|
+
worst: "worst";
|
|
2084
2164
|
}>;
|
|
2085
2165
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2086
2166
|
number: "number";
|
|
2087
2167
|
boolean: "boolean";
|
|
2088
2168
|
file: "file";
|
|
2089
|
-
markdown: "markdown";
|
|
2090
2169
|
json: "json";
|
|
2170
|
+
duration: "duration";
|
|
2171
|
+
markdown: "markdown";
|
|
2091
2172
|
image: "image";
|
|
2092
2173
|
html: "html";
|
|
2093
2174
|
pdf: "pdf";
|
|
2094
2175
|
audio: "audio";
|
|
2095
2176
|
video: "video";
|
|
2096
|
-
duration: "duration";
|
|
2097
2177
|
percent: "percent";
|
|
2098
2178
|
passFail: "passFail";
|
|
2099
2179
|
stars: "stars";
|
|
@@ -2114,9 +2194,25 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2114
2194
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2115
2195
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2116
2196
|
kind: z$1.ZodLiteral<"duration">;
|
|
2197
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2198
|
+
avg: "avg";
|
|
2199
|
+
sum: "sum";
|
|
2200
|
+
min: "min";
|
|
2201
|
+
max: "max";
|
|
2202
|
+
best: "best";
|
|
2203
|
+
worst: "worst";
|
|
2204
|
+
}>>;
|
|
2117
2205
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2118
2206
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2119
2207
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2208
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2209
|
+
avg: "avg";
|
|
2210
|
+
sum: "sum";
|
|
2211
|
+
min: "min";
|
|
2212
|
+
max: "max";
|
|
2213
|
+
best: "best";
|
|
2214
|
+
worst: "worst";
|
|
2215
|
+
}>>;
|
|
2120
2216
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2121
2217
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2122
2218
|
kind: z$1.ZodLiteral<"column">;
|
|
@@ -2124,23 +2220,24 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2124
2220
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2125
2221
|
aggregate: z$1.ZodEnum<{
|
|
2126
2222
|
avg: "avg";
|
|
2223
|
+
sum: "sum";
|
|
2127
2224
|
min: "min";
|
|
2128
2225
|
max: "max";
|
|
2129
|
-
|
|
2130
|
-
|
|
2226
|
+
best: "best";
|
|
2227
|
+
worst: "worst";
|
|
2131
2228
|
}>;
|
|
2132
2229
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2133
2230
|
number: "number";
|
|
2134
2231
|
boolean: "boolean";
|
|
2135
2232
|
file: "file";
|
|
2136
|
-
markdown: "markdown";
|
|
2137
2233
|
json: "json";
|
|
2234
|
+
duration: "duration";
|
|
2235
|
+
markdown: "markdown";
|
|
2138
2236
|
image: "image";
|
|
2139
2237
|
html: "html";
|
|
2140
2238
|
pdf: "pdf";
|
|
2141
2239
|
audio: "audio";
|
|
2142
2240
|
video: "video";
|
|
2143
|
-
duration: "duration";
|
|
2144
2241
|
percent: "percent";
|
|
2145
2242
|
passFail: "passFail";
|
|
2146
2243
|
stars: "stars";
|
|
@@ -2179,14 +2276,14 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2179
2276
|
number: "number";
|
|
2180
2277
|
boolean: "boolean";
|
|
2181
2278
|
file: "file";
|
|
2182
|
-
markdown: "markdown";
|
|
2183
2279
|
json: "json";
|
|
2280
|
+
duration: "duration";
|
|
2281
|
+
markdown: "markdown";
|
|
2184
2282
|
image: "image";
|
|
2185
2283
|
html: "html";
|
|
2186
2284
|
pdf: "pdf";
|
|
2187
2285
|
audio: "audio";
|
|
2188
2286
|
video: "video";
|
|
2189
|
-
duration: "duration";
|
|
2190
2287
|
percent: "percent";
|
|
2191
2288
|
passFail: "passFail";
|
|
2192
2289
|
stars: "stars";
|
|
@@ -2200,18 +2297,18 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2200
2297
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2201
2298
|
align: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2202
2299
|
left: "left";
|
|
2203
|
-
center: "center";
|
|
2204
2300
|
right: "right";
|
|
2301
|
+
center: "center";
|
|
2205
2302
|
}>>;
|
|
2206
2303
|
}, z$1.core.$strip>>;
|
|
2207
2304
|
caseCount: z$1.ZodNullable<z$1.ZodNumber>;
|
|
2208
2305
|
caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2209
2306
|
lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
|
|
2307
|
+
running: "running";
|
|
2308
|
+
cancelled: "cancelled";
|
|
2210
2309
|
error: "error";
|
|
2211
2310
|
pass: "pass";
|
|
2212
2311
|
fail: "fail";
|
|
2213
|
-
running: "running";
|
|
2214
|
-
cancelled: "cancelled";
|
|
2215
2312
|
unscored: "unscored";
|
|
2216
2313
|
}>>;
|
|
2217
2314
|
stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
@@ -2224,9 +2321,25 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2224
2321
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2225
2322
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2226
2323
|
kind: z$1.ZodLiteral<"duration">;
|
|
2324
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2325
|
+
avg: "avg";
|
|
2326
|
+
sum: "sum";
|
|
2327
|
+
min: "min";
|
|
2328
|
+
max: "max";
|
|
2329
|
+
best: "best";
|
|
2330
|
+
worst: "worst";
|
|
2331
|
+
}>>;
|
|
2227
2332
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2228
2333
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2229
2334
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2335
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2336
|
+
avg: "avg";
|
|
2337
|
+
sum: "sum";
|
|
2338
|
+
min: "min";
|
|
2339
|
+
max: "max";
|
|
2340
|
+
best: "best";
|
|
2341
|
+
worst: "worst";
|
|
2342
|
+
}>>;
|
|
2230
2343
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2231
2344
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2232
2345
|
kind: z$1.ZodLiteral<"column">;
|
|
@@ -2234,23 +2347,24 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2234
2347
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2235
2348
|
aggregate: z$1.ZodEnum<{
|
|
2236
2349
|
avg: "avg";
|
|
2350
|
+
sum: "sum";
|
|
2237
2351
|
min: "min";
|
|
2238
2352
|
max: "max";
|
|
2239
|
-
|
|
2240
|
-
|
|
2353
|
+
best: "best";
|
|
2354
|
+
worst: "worst";
|
|
2241
2355
|
}>;
|
|
2242
2356
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2243
2357
|
number: "number";
|
|
2244
2358
|
boolean: "boolean";
|
|
2245
2359
|
file: "file";
|
|
2246
|
-
markdown: "markdown";
|
|
2247
2360
|
json: "json";
|
|
2361
|
+
duration: "duration";
|
|
2362
|
+
markdown: "markdown";
|
|
2248
2363
|
image: "image";
|
|
2249
2364
|
html: "html";
|
|
2250
2365
|
pdf: "pdf";
|
|
2251
2366
|
audio: "audio";
|
|
2252
2367
|
video: "video";
|
|
2253
|
-
duration: "duration";
|
|
2254
2368
|
percent: "percent";
|
|
2255
2369
|
passFail: "passFail";
|
|
2256
2370
|
stars: "stars";
|
|
@@ -2258,6 +2372,14 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2258
2372
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2259
2373
|
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2260
2374
|
}, z$1.core.$strip>], "kind">>>;
|
|
2375
|
+
defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2376
|
+
avg: "avg";
|
|
2377
|
+
sum: "sum";
|
|
2378
|
+
min: "min";
|
|
2379
|
+
max: "max";
|
|
2380
|
+
best: "best";
|
|
2381
|
+
worst: "worst";
|
|
2382
|
+
}>>;
|
|
2261
2383
|
charts: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
2262
2384
|
heading: z$1.ZodOptional<z$1.ZodString>;
|
|
2263
2385
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2275,8 +2397,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2275
2397
|
}>;
|
|
2276
2398
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2277
2399
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2278
|
-
success: "success";
|
|
2279
2400
|
error: "error";
|
|
2401
|
+
success: "success";
|
|
2280
2402
|
accent: "accent";
|
|
2281
2403
|
accentDim: "accentDim";
|
|
2282
2404
|
warning: "warning";
|
|
@@ -2291,16 +2413,16 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2291
2413
|
key: z$1.ZodString;
|
|
2292
2414
|
aggregate: z$1.ZodEnum<{
|
|
2293
2415
|
avg: "avg";
|
|
2416
|
+
sum: "sum";
|
|
2294
2417
|
min: "min";
|
|
2295
2418
|
max: "max";
|
|
2296
|
-
sum: "sum";
|
|
2297
2419
|
latest: "latest";
|
|
2298
2420
|
passThresholdRate: "passThresholdRate";
|
|
2299
2421
|
}>;
|
|
2300
2422
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2301
2423
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2302
|
-
success: "success";
|
|
2303
2424
|
error: "error";
|
|
2425
|
+
success: "success";
|
|
2304
2426
|
accent: "accent";
|
|
2305
2427
|
accentDim: "accentDim";
|
|
2306
2428
|
warning: "warning";
|
|
@@ -2333,9 +2455,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2333
2455
|
key: z$1.ZodString;
|
|
2334
2456
|
aggregate: z$1.ZodEnum<{
|
|
2335
2457
|
avg: "avg";
|
|
2458
|
+
sum: "sum";
|
|
2336
2459
|
min: "min";
|
|
2337
2460
|
max: "max";
|
|
2338
|
-
sum: "sum";
|
|
2339
2461
|
latest: "latest";
|
|
2340
2462
|
passThresholdRate: "passThresholdRate";
|
|
2341
2463
|
}>;
|
|
@@ -2431,12 +2553,12 @@ declare const caseRowSchema$1: z$1.ZodObject<{
|
|
|
2431
2553
|
evalId: z$1.ZodString;
|
|
2432
2554
|
tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2433
2555
|
status: z$1.ZodEnum<{
|
|
2556
|
+
pending: "pending";
|
|
2557
|
+
running: "running";
|
|
2558
|
+
cancelled: "cancelled";
|
|
2434
2559
|
error: "error";
|
|
2435
2560
|
pass: "pass";
|
|
2436
2561
|
fail: "fail";
|
|
2437
|
-
running: "running";
|
|
2438
|
-
cancelled: "cancelled";
|
|
2439
|
-
pending: "pending";
|
|
2440
2562
|
}>;
|
|
2441
2563
|
durationMs: z$1.ZodNullable<z$1.ZodNumber>;
|
|
2442
2564
|
cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
|
|
@@ -2532,9 +2654,9 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2532
2654
|
startedAt: z$1.ZodString;
|
|
2533
2655
|
endedAt: z$1.ZodNullable<z$1.ZodString>;
|
|
2534
2656
|
status: z$1.ZodEnum<{
|
|
2535
|
-
error: "error";
|
|
2536
2657
|
running: "running";
|
|
2537
2658
|
cancelled: "cancelled";
|
|
2659
|
+
error: "error";
|
|
2538
2660
|
ok: "ok";
|
|
2539
2661
|
}>;
|
|
2540
2662
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
@@ -2585,9 +2707,9 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2585
2707
|
subtree: "subtree";
|
|
2586
2708
|
}>>;
|
|
2587
2709
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2710
|
+
all: "all";
|
|
2588
2711
|
sum: "sum";
|
|
2589
2712
|
last: "last";
|
|
2590
|
-
all: "all";
|
|
2591
2713
|
}>>;
|
|
2592
2714
|
}, z$1.core.$strip>>>;
|
|
2593
2715
|
}, z$1.core.$strip>;
|
|
@@ -2597,10 +2719,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2597
2719
|
namespace: z$1.ZodString;
|
|
2598
2720
|
key: z$1.ZodString;
|
|
2599
2721
|
status: z$1.ZodEnum<{
|
|
2722
|
+
bypass: "bypass";
|
|
2723
|
+
refresh: "refresh";
|
|
2600
2724
|
hit: "hit";
|
|
2601
2725
|
miss: "miss";
|
|
2602
|
-
refresh: "refresh";
|
|
2603
|
-
bypass: "bypass";
|
|
2604
2726
|
}>;
|
|
2605
2727
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2606
2728
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2618,12 +2740,12 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2618
2740
|
evalId: z$1.ZodString;
|
|
2619
2741
|
tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2620
2742
|
status: z$1.ZodEnum<{
|
|
2743
|
+
pending: "pending";
|
|
2744
|
+
running: "running";
|
|
2745
|
+
cancelled: "cancelled";
|
|
2621
2746
|
error: "error";
|
|
2622
2747
|
pass: "pass";
|
|
2623
2748
|
fail: "fail";
|
|
2624
|
-
running: "running";
|
|
2625
|
-
cancelled: "cancelled";
|
|
2626
|
-
pending: "pending";
|
|
2627
2749
|
}>;
|
|
2628
2750
|
input: z$1.ZodUnknown;
|
|
2629
2751
|
trace: z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -2635,9 +2757,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2635
2757
|
startedAt: z$1.ZodString;
|
|
2636
2758
|
endedAt: z$1.ZodNullable<z$1.ZodString>;
|
|
2637
2759
|
status: z$1.ZodEnum<{
|
|
2638
|
-
error: "error";
|
|
2639
2760
|
running: "running";
|
|
2640
2761
|
cancelled: "cancelled";
|
|
2762
|
+
error: "error";
|
|
2641
2763
|
ok: "ok";
|
|
2642
2764
|
}>;
|
|
2643
2765
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
@@ -2688,9 +2810,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2688
2810
|
subtree: "subtree";
|
|
2689
2811
|
}>>;
|
|
2690
2812
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2813
|
+
all: "all";
|
|
2691
2814
|
sum: "sum";
|
|
2692
2815
|
last: "last";
|
|
2693
|
-
all: "all";
|
|
2694
2816
|
}>>;
|
|
2695
2817
|
}, z$1.core.$strip>>>;
|
|
2696
2818
|
}, z$1.core.$strip>;
|
|
@@ -2704,9 +2826,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2704
2826
|
startedAt: z$1.ZodString;
|
|
2705
2827
|
endedAt: z$1.ZodNullable<z$1.ZodString>;
|
|
2706
2828
|
status: z$1.ZodEnum<{
|
|
2707
|
-
error: "error";
|
|
2708
2829
|
running: "running";
|
|
2709
2830
|
cancelled: "cancelled";
|
|
2831
|
+
error: "error";
|
|
2710
2832
|
ok: "ok";
|
|
2711
2833
|
}>;
|
|
2712
2834
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
@@ -2757,9 +2879,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2757
2879
|
subtree: "subtree";
|
|
2758
2880
|
}>>;
|
|
2759
2881
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2882
|
+
all: "all";
|
|
2760
2883
|
sum: "sum";
|
|
2761
2884
|
last: "last";
|
|
2762
|
-
all: "all";
|
|
2763
2885
|
}>>;
|
|
2764
2886
|
}, z$1.core.$strip>>>;
|
|
2765
2887
|
}, z$1.core.$strip>;
|
|
@@ -2769,10 +2891,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2769
2891
|
namespace: z$1.ZodString;
|
|
2770
2892
|
key: z$1.ZodString;
|
|
2771
2893
|
status: z$1.ZodEnum<{
|
|
2894
|
+
bypass: "bypass";
|
|
2895
|
+
refresh: "refresh";
|
|
2772
2896
|
hit: "hit";
|
|
2773
2897
|
miss: "miss";
|
|
2774
|
-
refresh: "refresh";
|
|
2775
|
-
bypass: "bypass";
|
|
2776
2898
|
}>;
|
|
2777
2899
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2778
2900
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2838,10 +2960,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2838
2960
|
namespace: z$1.ZodString;
|
|
2839
2961
|
key: z$1.ZodString;
|
|
2840
2962
|
status: z$1.ZodEnum<{
|
|
2963
|
+
bypass: "bypass";
|
|
2964
|
+
refresh: "refresh";
|
|
2841
2965
|
hit: "hit";
|
|
2842
2966
|
miss: "miss";
|
|
2843
|
-
refresh: "refresh";
|
|
2844
|
-
bypass: "bypass";
|
|
2845
2967
|
}>;
|
|
2846
2968
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2847
2969
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2895,9 +3017,9 @@ type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
|
|
|
2895
3017
|
/** Reducer applied to a numeric column across all cases of a single run. */
|
|
2896
3018
|
declare const evalChartAggregateSchema: z$1.ZodEnum<{
|
|
2897
3019
|
avg: "avg";
|
|
3020
|
+
sum: "sum";
|
|
2898
3021
|
min: "min";
|
|
2899
3022
|
max: "max";
|
|
2900
|
-
sum: "sum";
|
|
2901
3023
|
latest: "latest";
|
|
2902
3024
|
passThresholdRate: "passThresholdRate";
|
|
2903
3025
|
}>;
|
|
@@ -2908,8 +3030,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
|
|
|
2908
3030
|
* not emit raw hex so authored evals stay decoupled from the web theme.
|
|
2909
3031
|
*/
|
|
2910
3032
|
declare const evalChartColorSchema: z$1.ZodEnum<{
|
|
2911
|
-
success: "success";
|
|
2912
3033
|
error: "error";
|
|
3034
|
+
success: "success";
|
|
2913
3035
|
accent: "accent";
|
|
2914
3036
|
accentDim: "accentDim";
|
|
2915
3037
|
warning: "warning";
|
|
@@ -2937,8 +3059,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2937
3059
|
}>;
|
|
2938
3060
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2939
3061
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2940
|
-
success: "success";
|
|
2941
3062
|
error: "error";
|
|
3063
|
+
success: "success";
|
|
2942
3064
|
accent: "accent";
|
|
2943
3065
|
accentDim: "accentDim";
|
|
2944
3066
|
warning: "warning";
|
|
@@ -2953,16 +3075,16 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2953
3075
|
key: z$1.ZodString;
|
|
2954
3076
|
aggregate: z$1.ZodEnum<{
|
|
2955
3077
|
avg: "avg";
|
|
3078
|
+
sum: "sum";
|
|
2956
3079
|
min: "min";
|
|
2957
3080
|
max: "max";
|
|
2958
|
-
sum: "sum";
|
|
2959
3081
|
latest: "latest";
|
|
2960
3082
|
passThresholdRate: "passThresholdRate";
|
|
2961
3083
|
}>;
|
|
2962
3084
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2963
3085
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2964
|
-
success: "success";
|
|
2965
3086
|
error: "error";
|
|
3087
|
+
success: "success";
|
|
2966
3088
|
accent: "accent";
|
|
2967
3089
|
accentDim: "accentDim";
|
|
2968
3090
|
warning: "warning";
|
|
@@ -2988,9 +3110,9 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
|
|
|
2988
3110
|
key: z$1.ZodString;
|
|
2989
3111
|
aggregate: z$1.ZodEnum<{
|
|
2990
3112
|
avg: "avg";
|
|
3113
|
+
sum: "sum";
|
|
2991
3114
|
min: "min";
|
|
2992
3115
|
max: "max";
|
|
2993
|
-
sum: "sum";
|
|
2994
3116
|
latest: "latest";
|
|
2995
3117
|
passThresholdRate: "passThresholdRate";
|
|
2996
3118
|
}>;
|
|
@@ -3020,8 +3142,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3020
3142
|
}>;
|
|
3021
3143
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3022
3144
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3023
|
-
success: "success";
|
|
3024
3145
|
error: "error";
|
|
3146
|
+
success: "success";
|
|
3025
3147
|
accent: "accent";
|
|
3026
3148
|
accentDim: "accentDim";
|
|
3027
3149
|
warning: "warning";
|
|
@@ -3036,16 +3158,16 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3036
3158
|
key: z$1.ZodString;
|
|
3037
3159
|
aggregate: z$1.ZodEnum<{
|
|
3038
3160
|
avg: "avg";
|
|
3161
|
+
sum: "sum";
|
|
3039
3162
|
min: "min";
|
|
3040
3163
|
max: "max";
|
|
3041
|
-
sum: "sum";
|
|
3042
3164
|
latest: "latest";
|
|
3043
3165
|
passThresholdRate: "passThresholdRate";
|
|
3044
3166
|
}>;
|
|
3045
3167
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3046
3168
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3047
|
-
success: "success";
|
|
3048
3169
|
error: "error";
|
|
3170
|
+
success: "success";
|
|
3049
3171
|
accent: "accent";
|
|
3050
3172
|
accentDim: "accentDim";
|
|
3051
3173
|
warning: "warning";
|
|
@@ -3078,9 +3200,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3078
3200
|
key: z$1.ZodString;
|
|
3079
3201
|
aggregate: z$1.ZodEnum<{
|
|
3080
3202
|
avg: "avg";
|
|
3203
|
+
sum: "sum";
|
|
3081
3204
|
min: "min";
|
|
3082
3205
|
max: "max";
|
|
3083
|
-
sum: "sum";
|
|
3084
3206
|
latest: "latest";
|
|
3085
3207
|
passThresholdRate: "passThresholdRate";
|
|
3086
3208
|
}>;
|
|
@@ -3110,8 +3232,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3110
3232
|
}>;
|
|
3111
3233
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3112
3234
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3113
|
-
success: "success";
|
|
3114
3235
|
error: "error";
|
|
3236
|
+
success: "success";
|
|
3115
3237
|
accent: "accent";
|
|
3116
3238
|
accentDim: "accentDim";
|
|
3117
3239
|
warning: "warning";
|
|
@@ -3126,16 +3248,16 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3126
3248
|
key: z$1.ZodString;
|
|
3127
3249
|
aggregate: z$1.ZodEnum<{
|
|
3128
3250
|
avg: "avg";
|
|
3251
|
+
sum: "sum";
|
|
3129
3252
|
min: "min";
|
|
3130
3253
|
max: "max";
|
|
3131
|
-
sum: "sum";
|
|
3132
3254
|
latest: "latest";
|
|
3133
3255
|
passThresholdRate: "passThresholdRate";
|
|
3134
3256
|
}>;
|
|
3135
3257
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3136
3258
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3137
|
-
success: "success";
|
|
3138
3259
|
error: "error";
|
|
3260
|
+
success: "success";
|
|
3139
3261
|
accent: "accent";
|
|
3140
3262
|
accentDim: "accentDim";
|
|
3141
3263
|
warning: "warning";
|
|
@@ -3168,9 +3290,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3168
3290
|
key: z$1.ZodString;
|
|
3169
3291
|
aggregate: z$1.ZodEnum<{
|
|
3170
3292
|
avg: "avg";
|
|
3293
|
+
sum: "sum";
|
|
3171
3294
|
min: "min";
|
|
3172
3295
|
max: "max";
|
|
3173
|
-
sum: "sum";
|
|
3174
3296
|
latest: "latest";
|
|
3175
3297
|
passThresholdRate: "passThresholdRate";
|
|
3176
3298
|
}>;
|
|
@@ -3185,11 +3307,11 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3185
3307
|
id: z$1.ZodString;
|
|
3186
3308
|
shortId: z$1.ZodString;
|
|
3187
3309
|
status: z$1.ZodEnum<{
|
|
3188
|
-
error: "error";
|
|
3189
|
-
running: "running";
|
|
3190
|
-
cancelled: "cancelled";
|
|
3191
3310
|
pending: "pending";
|
|
3311
|
+
running: "running";
|
|
3192
3312
|
completed: "completed";
|
|
3313
|
+
cancelled: "cancelled";
|
|
3314
|
+
error: "error";
|
|
3193
3315
|
}>;
|
|
3194
3316
|
temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
|
|
3195
3317
|
startedAt: z$1.ZodString;
|
|
@@ -3198,9 +3320,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3198
3320
|
evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
|
|
3199
3321
|
target: z$1.ZodObject<{
|
|
3200
3322
|
mode: z$1.ZodEnum<{
|
|
3201
|
-
caseIds: "caseIds";
|
|
3202
3323
|
all: "all";
|
|
3203
3324
|
evalIds: "evalIds";
|
|
3325
|
+
caseIds: "caseIds";
|
|
3204
3326
|
}>;
|
|
3205
3327
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
3206
3328
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -3214,9 +3336,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3214
3336
|
median: "median";
|
|
3215
3337
|
}>>>;
|
|
3216
3338
|
cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3217
|
-
refresh: "refresh";
|
|
3218
|
-
bypass: "bypass";
|
|
3219
3339
|
use: "use";
|
|
3340
|
+
bypass: "bypass";
|
|
3341
|
+
refresh: "refresh";
|
|
3220
3342
|
}>>;
|
|
3221
3343
|
}, z$1.core.$strip>;
|
|
3222
3344
|
/** Persisted lifecycle metadata for a single eval run. */
|
|
@@ -3225,11 +3347,11 @@ type RunManifest = z$1.infer<typeof runManifestSchema$1>;
|
|
|
3225
3347
|
declare const runSummarySchema$1: z$1.ZodObject<{
|
|
3226
3348
|
runId: z$1.ZodString;
|
|
3227
3349
|
status: z$1.ZodEnum<{
|
|
3228
|
-
error: "error";
|
|
3229
|
-
running: "running";
|
|
3230
|
-
cancelled: "cancelled";
|
|
3231
3350
|
pending: "pending";
|
|
3351
|
+
running: "running";
|
|
3232
3352
|
completed: "completed";
|
|
3353
|
+
cancelled: "cancelled";
|
|
3354
|
+
error: "error";
|
|
3233
3355
|
}>;
|
|
3234
3356
|
totalCases: z$1.ZodNumber;
|
|
3235
3357
|
passedCases: z$1.ZodNumber;
|
|
@@ -3312,8 +3434,8 @@ type TrialSelectionMode = z$1.infer<typeof trialSelectionModeSchema>;
|
|
|
3312
3434
|
/** Built-in eval-level output/column keys. */
|
|
3313
3435
|
/** Removal config for built-in eval-level outputs and UI metadata. */
|
|
3314
3436
|
declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<true>, z$1.ZodArray<z$1.ZodEnum<{
|
|
3315
|
-
costUsd: "costUsd";
|
|
3316
3437
|
apiCalls: "apiCalls";
|
|
3438
|
+
costUsd: "costUsd";
|
|
3317
3439
|
llmTurns: "llmTurns";
|
|
3318
3440
|
inputTokens: "inputTokens";
|
|
3319
3441
|
outputTokens: "outputTokens";
|
|
@@ -3818,6 +3940,15 @@ type AgentEvalsConfig$1 = {
|
|
|
3818
3940
|
* appended last unless removed with `removeDefaultConfig`.
|
|
3819
3941
|
*/
|
|
3820
3942
|
stats?: EvalStatsConfig;
|
|
3943
|
+
/**
|
|
3944
|
+
* Initial aggregate mode used for duration and column stats on every eval
|
|
3945
|
+
* card.
|
|
3946
|
+
*
|
|
3947
|
+
* Per-eval `defaultStatAggregate` overrides this value. Individual stat
|
|
3948
|
+
* `aggregate` values still define their authored reducer and remain the
|
|
3949
|
+
* fallback when no default aggregate is configured.
|
|
3950
|
+
*/
|
|
3951
|
+
defaultStatAggregate?: EvalStatAggregate;
|
|
3821
3952
|
/**
|
|
3822
3953
|
* Configuration for the "LLM calls" tab in the case-run drawer.
|
|
3823
3954
|
*
|
|
@@ -4118,9 +4249,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
|
|
|
4118
4249
|
* - `refresh`: never read, always write (forces re-execution and overwrites).
|
|
4119
4250
|
*/
|
|
4120
4251
|
declare const cacheModeSchema: z$1.ZodEnum<{
|
|
4121
|
-
refresh: "refresh";
|
|
4122
|
-
bypass: "bypass";
|
|
4123
4252
|
use: "use";
|
|
4253
|
+
bypass: "bypass";
|
|
4254
|
+
refresh: "refresh";
|
|
4124
4255
|
}>;
|
|
4125
4256
|
/** Mode controlling how cached spans behave during a run. */
|
|
4126
4257
|
type CacheMode = z$1.infer<typeof cacheModeSchema>;
|
|
@@ -4141,10 +4272,10 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
|
|
|
4141
4272
|
type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
|
|
4142
4273
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4143
4274
|
declare const cacheStatusSchema: z$1.ZodEnum<{
|
|
4275
|
+
bypass: "bypass";
|
|
4276
|
+
refresh: "refresh";
|
|
4144
4277
|
hit: "hit";
|
|
4145
4278
|
miss: "miss";
|
|
4146
|
-
refresh: "refresh";
|
|
4147
|
-
bypass: "bypass";
|
|
4148
4279
|
}>;
|
|
4149
4280
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4150
4281
|
type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
|
|
@@ -4161,10 +4292,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
|
|
|
4161
4292
|
namespace: z$1.ZodString;
|
|
4162
4293
|
key: z$1.ZodString;
|
|
4163
4294
|
status: z$1.ZodEnum<{
|
|
4295
|
+
bypass: "bypass";
|
|
4296
|
+
refresh: "refresh";
|
|
4164
4297
|
hit: "hit";
|
|
4165
4298
|
miss: "miss";
|
|
4166
|
-
refresh: "refresh";
|
|
4167
|
-
bypass: "bypass";
|
|
4168
4299
|
}>;
|
|
4169
4300
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
4170
4301
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -4239,9 +4370,9 @@ declare const cacheRecordingSchema: z$1.ZodObject<{
|
|
|
4239
4370
|
returnValue: z$1.ZodUnknown;
|
|
4240
4371
|
finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
|
|
4241
4372
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4242
|
-
error: "error";
|
|
4243
4373
|
running: "running";
|
|
4244
4374
|
cancelled: "cancelled";
|
|
4375
|
+
error: "error";
|
|
4245
4376
|
ok: "ok";
|
|
4246
4377
|
}>>;
|
|
4247
4378
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -4312,9 +4443,9 @@ declare const cacheEntrySchema: z$1.ZodObject<{
|
|
|
4312
4443
|
returnValue: z$1.ZodUnknown;
|
|
4313
4444
|
finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
|
|
4314
4445
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4315
|
-
error: "error";
|
|
4316
4446
|
running: "running";
|
|
4317
4447
|
cancelled: "cancelled";
|
|
4448
|
+
error: "error";
|
|
4318
4449
|
ok: "ok";
|
|
4319
4450
|
}>>;
|
|
4320
4451
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -4402,9 +4533,9 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
4402
4533
|
returnValue: z$1.ZodUnknown;
|
|
4403
4534
|
finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
|
|
4404
4535
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4405
|
-
error: "error";
|
|
4406
4536
|
running: "running";
|
|
4407
4537
|
cancelled: "cancelled";
|
|
4538
|
+
error: "error";
|
|
4408
4539
|
ok: "ok";
|
|
4409
4540
|
}>>;
|
|
4410
4541
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -4481,9 +4612,9 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4481
4612
|
returnValue: z$1.ZodUnknown;
|
|
4482
4613
|
finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
|
|
4483
4614
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4484
|
-
error: "error";
|
|
4485
4615
|
running: "running";
|
|
4486
4616
|
cancelled: "cancelled";
|
|
4617
|
+
error: "error";
|
|
4487
4618
|
ok: "ok";
|
|
4488
4619
|
}>>;
|
|
4489
4620
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -4562,9 +4693,9 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4562
4693
|
returnValue: z$1.ZodUnknown;
|
|
4563
4694
|
finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
|
|
4564
4695
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4565
|
-
error: "error";
|
|
4566
4696
|
running: "running";
|
|
4567
4697
|
cancelled: "cancelled";
|
|
4698
|
+
error: "error";
|
|
4568
4699
|
ok: "ok";
|
|
4569
4700
|
}>>;
|
|
4570
4701
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -4641,9 +4772,9 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
4641
4772
|
returnValue: z$1.ZodUnknown;
|
|
4642
4773
|
finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
|
|
4643
4774
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4644
|
-
error: "error";
|
|
4645
4775
|
running: "running";
|
|
4646
4776
|
cancelled: "cancelled";
|
|
4777
|
+
error: "error";
|
|
4647
4778
|
ok: "ok";
|
|
4648
4779
|
}>>;
|
|
4649
4780
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -4730,9 +4861,9 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
4730
4861
|
returnValue: z$1.ZodUnknown;
|
|
4731
4862
|
finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
|
|
4732
4863
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4733
|
-
error: "error";
|
|
4734
4864
|
running: "running";
|
|
4735
4865
|
cancelled: "cancelled";
|
|
4866
|
+
error: "error";
|
|
4736
4867
|
ok: "ok";
|
|
4737
4868
|
}>>;
|
|
4738
4869
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -4889,9 +5020,9 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema$1>;
|
|
|
4889
5020
|
declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
4890
5021
|
target: z$1.ZodObject<{
|
|
4891
5022
|
mode: z$1.ZodEnum<{
|
|
4892
|
-
caseIds: "caseIds";
|
|
4893
5023
|
all: "all";
|
|
4894
5024
|
evalIds: "evalIds";
|
|
5025
|
+
caseIds: "caseIds";
|
|
4895
5026
|
}>;
|
|
4896
5027
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
4897
5028
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -4903,9 +5034,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
|
4903
5034
|
temporary: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
4904
5035
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
4905
5036
|
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
4906
|
-
refresh: "refresh";
|
|
4907
|
-
bypass: "bypass";
|
|
4908
5037
|
use: "use";
|
|
5038
|
+
bypass: "bypass";
|
|
5039
|
+
refresh: "refresh";
|
|
4909
5040
|
}>>;
|
|
4910
5041
|
}, z$1.core.$strip>>;
|
|
4911
5042
|
manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
@@ -5242,9 +5373,25 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
5242
5373
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
5243
5374
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5244
5375
|
kind: z$1.ZodLiteral<"duration">;
|
|
5376
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5377
|
+
avg: "avg";
|
|
5378
|
+
min: "min";
|
|
5379
|
+
max: "max";
|
|
5380
|
+
sum: "sum";
|
|
5381
|
+
best: "best";
|
|
5382
|
+
worst: "worst";
|
|
5383
|
+
}>>;
|
|
5245
5384
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
5246
5385
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5247
5386
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
5387
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5388
|
+
avg: "avg";
|
|
5389
|
+
min: "min";
|
|
5390
|
+
max: "max";
|
|
5391
|
+
sum: "sum";
|
|
5392
|
+
best: "best";
|
|
5393
|
+
worst: "worst";
|
|
5394
|
+
}>>;
|
|
5248
5395
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
5249
5396
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5250
5397
|
kind: z$1.ZodLiteral<"column">;
|
|
@@ -5255,7 +5402,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
5255
5402
|
min: "min";
|
|
5256
5403
|
max: "max";
|
|
5257
5404
|
sum: "sum";
|
|
5258
|
-
|
|
5405
|
+
best: "best";
|
|
5406
|
+
worst: "worst";
|
|
5259
5407
|
}>;
|
|
5260
5408
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5261
5409
|
number: "number";
|
|
@@ -5276,6 +5424,14 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
5276
5424
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions$1, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions$1, unknown>>>;
|
|
5277
5425
|
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5278
5426
|
}, z$1.core.$strip>], "kind">>>;
|
|
5427
|
+
defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5428
|
+
avg: "avg";
|
|
5429
|
+
min: "min";
|
|
5430
|
+
max: "max";
|
|
5431
|
+
sum: "sum";
|
|
5432
|
+
best: "best";
|
|
5433
|
+
worst: "worst";
|
|
5434
|
+
}>>;
|
|
5279
5435
|
charts: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
5280
5436
|
heading: z$1.ZodOptional<z$1.ZodString>;
|
|
5281
5437
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -5555,8 +5711,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
5555
5711
|
}>>;
|
|
5556
5712
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5557
5713
|
sum: "sum";
|
|
5558
|
-
last: "last";
|
|
5559
5714
|
all: "all";
|
|
5715
|
+
last: "last";
|
|
5560
5716
|
}>>;
|
|
5561
5717
|
}, z$1.core.$strip>>>;
|
|
5562
5718
|
}, z$1.core.$strip>;
|
|
@@ -5624,8 +5780,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
5624
5780
|
}>>;
|
|
5625
5781
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5626
5782
|
sum: "sum";
|
|
5627
|
-
last: "last";
|
|
5628
5783
|
all: "all";
|
|
5784
|
+
last: "last";
|
|
5629
5785
|
}>>;
|
|
5630
5786
|
}, z$1.core.$strip>>>;
|
|
5631
5787
|
}, z$1.core.$strip>;
|
|
@@ -6343,6 +6499,22 @@ type EvalRunner = {
|
|
|
6343
6499
|
deleteRun(runId: string): Promise<{
|
|
6344
6500
|
deleted: boolean;
|
|
6345
6501
|
}>;
|
|
6502
|
+
/**
|
|
6503
|
+
* Convert a temporary persisted run into durable run history.
|
|
6504
|
+
*
|
|
6505
|
+
* Returns the updated run when found. Already-durable runs are treated as a
|
|
6506
|
+
* no-op success so UI callers can refresh their cached copy idempotently.
|
|
6507
|
+
*/
|
|
6508
|
+
promoteRun(runId: string): Promise<{
|
|
6509
|
+
promoted: boolean;
|
|
6510
|
+
run: {
|
|
6511
|
+
manifest: RunManifest$1;
|
|
6512
|
+
summary: RunSummary$1;
|
|
6513
|
+
cases: CaseRow$1[];
|
|
6514
|
+
};
|
|
6515
|
+
} | {
|
|
6516
|
+
promoted: false;
|
|
6517
|
+
}>;
|
|
6346
6518
|
/**
|
|
6347
6519
|
* Validate a `CreateRunRequest`'s `manualInputs` map against each targeted
|
|
6348
6520
|
* eval's authored `manualInput.schema`. Returns `ok: true` with the parsed
|