@ls-stack/agent-eval 0.53.0 → 0.55.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-D0a57pVo.mjs → app-CunZ8Dku.mjs} +12 -4
- package/dist/apps/web/dist/assets/index-2I-eWzVL.css +1 -0
- package/dist/apps/web/dist/assets/index-CvsPmlHl.js +377 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-3FrKBc9l.mjs → cli-rvPrUj6S.mjs} +51 -11
- package/dist/index.d.mts +305 -133
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +2 -1
- package/dist/{runOrchestration-Cn6fGL2s.mjs → runOrchestration-BWyE5lRX.mjs} +236 -134
- package/dist/{runner-Dsqj431i.mjs → runner-C2fvjKZP.mjs} +1 -1
- package/dist/{runner-C0qdoRSi.mjs → runner-CFQ8LZmY.mjs} +2 -2
- package/dist/{src-BNmtaqeC.mjs → src-DEENkbkn.mjs} +2 -2
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +14 -3
- package/dist/apps/web/dist/assets/index-RNejIyap.js +0 -375
- package/dist/apps/web/dist/assets/index-vaLgWG8j.css +0 -1
package/dist/index.d.mts
CHANGED
|
@@ -144,6 +144,25 @@ declare const traceSpanSchema$2: z$1.ZodObject<{
|
|
|
144
144
|
/** Persisted trace span shape stored for each eval case run. */
|
|
145
145
|
type EvalTraceSpan$2 = z$1.infer<typeof traceSpanSchema$2>; //#endregion
|
|
146
146
|
//#region ../shared/src/schemas/eval.d.ts
|
|
147
|
+
/**
|
|
148
|
+
* Reducer used to collapse per-case values into a single duration or column
|
|
149
|
+
* stat.
|
|
150
|
+
* `best` selects the highest finite value and `worst` selects the lowest.
|
|
151
|
+
*/
|
|
152
|
+
declare const evalStatAggregateSchema$1: z$1.ZodEnum<{
|
|
153
|
+
avg: "avg";
|
|
154
|
+
min: "min";
|
|
155
|
+
max: "max";
|
|
156
|
+
sum: "sum";
|
|
157
|
+
best: "best";
|
|
158
|
+
worst: "worst";
|
|
159
|
+
}>;
|
|
160
|
+
/**
|
|
161
|
+
* Reducer used to collapse per-case values into a single duration or column
|
|
162
|
+
* stat.
|
|
163
|
+
* `best` selects the highest finite value and `worst` selects the lowest.
|
|
164
|
+
*/
|
|
165
|
+
type EvalStatAggregate$1 = z$1.infer<typeof evalStatAggregateSchema$1>;
|
|
147
166
|
/** Ordered list of stats rendered in the EvalCard stats row. */
|
|
148
167
|
declare const evalStatsConfigSchema$1: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
149
168
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -155,9 +174,25 @@ declare const evalStatsConfigSchema$1: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z
|
|
|
155
174
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
156
175
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
157
176
|
kind: z$1.ZodLiteral<"duration">;
|
|
177
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
178
|
+
avg: "avg";
|
|
179
|
+
min: "min";
|
|
180
|
+
max: "max";
|
|
181
|
+
sum: "sum";
|
|
182
|
+
best: "best";
|
|
183
|
+
worst: "worst";
|
|
184
|
+
}>>;
|
|
158
185
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
159
186
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
160
187
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
188
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
189
|
+
avg: "avg";
|
|
190
|
+
min: "min";
|
|
191
|
+
max: "max";
|
|
192
|
+
sum: "sum";
|
|
193
|
+
best: "best";
|
|
194
|
+
worst: "worst";
|
|
195
|
+
}>>;
|
|
161
196
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
162
197
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
163
198
|
kind: z$1.ZodLiteral<"column">;
|
|
@@ -168,7 +203,8 @@ declare const evalStatsConfigSchema$1: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z
|
|
|
168
203
|
min: "min";
|
|
169
204
|
max: "max";
|
|
170
205
|
sum: "sum";
|
|
171
|
-
|
|
206
|
+
best: "best";
|
|
207
|
+
worst: "worst";
|
|
172
208
|
}>;
|
|
173
209
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
174
210
|
number: "number";
|
|
@@ -1022,16 +1058,28 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
|
|
|
1022
1058
|
* When provided, the stats render in order, left to right.
|
|
1023
1059
|
*
|
|
1024
1060
|
* Built-in kinds (`cases`, `passRate`, `duration`, `cacheHits`) read from
|
|
1025
|
-
* the latest run
|
|
1026
|
-
*
|
|
1027
|
-
*
|
|
1028
|
-
*
|
|
1029
|
-
*
|
|
1030
|
-
*
|
|
1031
|
-
*
|
|
1032
|
-
*
|
|
1061
|
+
* the latest run. `duration` aggregates finite per-case durations using the
|
|
1062
|
+
* same modes as column stats. `cacheHits` counts Agent Eval operation-level
|
|
1063
|
+
* cache hits over total cache operations, not LLM provider prompt-cache read
|
|
1064
|
+
* tokens. Cache-hit stats have their own aggregate mode and default to `sum`;
|
|
1065
|
+
* `avg` is average per-case hit rate, and min/max/best/worst select cases by
|
|
1066
|
+
* hit rate. `kind: 'column'` aggregates a score or numeric output column
|
|
1067
|
+
* across the latest run's cases — `key` must match one of the eval's score or
|
|
1068
|
+
* column keys, and only finite numeric values participate in the reduction.
|
|
1069
|
+
* When no case has a numeric value for the key the stat renders an em dash, or
|
|
1070
|
+
* hides when `hideIfNoValue` is true. `label`, `format`, and `numberFormat`
|
|
1071
|
+
* default to the matching `ColumnDef`.
|
|
1033
1072
|
*/
|
|
1034
1073
|
stats?: EvalStatsConfig$1;
|
|
1074
|
+
/**
|
|
1075
|
+
* Initial aggregate mode used for this eval's duration and column stats in
|
|
1076
|
+
* the web UI.
|
|
1077
|
+
*
|
|
1078
|
+
* Overrides `AgentEvalsConfig.defaultStatAggregate`. Individual stat
|
|
1079
|
+
* `aggregate` values still define their authored reducer and remain the
|
|
1080
|
+
* fallback when neither default is configured.
|
|
1081
|
+
*/
|
|
1082
|
+
defaultStatAggregate?: EvalStatAggregate$1;
|
|
1035
1083
|
/**
|
|
1036
1084
|
* Optional history chart configuration for the EvalCard in the web UI.
|
|
1037
1085
|
*
|
|
@@ -1083,8 +1131,12 @@ type EvalRegistryEntry = {
|
|
|
1083
1131
|
/** Return the in-memory registry of evals defined in the current process. */
|
|
1084
1132
|
declare function getEvalRegistry(): Map<string, EvalRegistryEntry>;
|
|
1085
1133
|
/**
|
|
1086
|
-
*
|
|
1087
|
-
*
|
|
1134
|
+
* Execute a callback with an empty async-local eval registry.
|
|
1135
|
+
*
|
|
1136
|
+
* Runner internals use this when importing eval modules concurrently so
|
|
1137
|
+
* `defineEval(...)` calls from one import cannot overwrite another import's
|
|
1138
|
+
* registered definitions. The callback receives the scoped registry populated
|
|
1139
|
+
* during its async execution.
|
|
1088
1140
|
*/
|
|
1089
1141
|
//#endregion
|
|
1090
1142
|
//#region src/evalExpect.d.ts
|
|
@@ -1729,14 +1781,14 @@ declare const columnFormatSchema: z$1.ZodEnum<{
|
|
|
1729
1781
|
number: "number";
|
|
1730
1782
|
boolean: "boolean";
|
|
1731
1783
|
file: "file";
|
|
1732
|
-
markdown: "markdown";
|
|
1733
1784
|
json: "json";
|
|
1785
|
+
duration: "duration";
|
|
1786
|
+
markdown: "markdown";
|
|
1734
1787
|
image: "image";
|
|
1735
1788
|
html: "html";
|
|
1736
1789
|
pdf: "pdf";
|
|
1737
1790
|
audio: "audio";
|
|
1738
1791
|
video: "video";
|
|
1739
|
-
duration: "duration";
|
|
1740
1792
|
percent: "percent";
|
|
1741
1793
|
passFail: "passFail";
|
|
1742
1794
|
stars: "stars";
|
|
@@ -1756,14 +1808,14 @@ declare const columnDefSchema: z$1.ZodObject<{
|
|
|
1756
1808
|
number: "number";
|
|
1757
1809
|
boolean: "boolean";
|
|
1758
1810
|
file: "file";
|
|
1759
|
-
markdown: "markdown";
|
|
1760
1811
|
json: "json";
|
|
1812
|
+
duration: "duration";
|
|
1813
|
+
markdown: "markdown";
|
|
1761
1814
|
image: "image";
|
|
1762
1815
|
html: "html";
|
|
1763
1816
|
pdf: "pdf";
|
|
1764
1817
|
audio: "audio";
|
|
1765
1818
|
video: "video";
|
|
1766
|
-
duration: "duration";
|
|
1767
1819
|
percent: "percent";
|
|
1768
1820
|
passFail: "passFail";
|
|
1769
1821
|
stars: "stars";
|
|
@@ -1777,8 +1829,8 @@ declare const columnDefSchema: z$1.ZodObject<{
|
|
|
1777
1829
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
1778
1830
|
align: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1779
1831
|
left: "left";
|
|
1780
|
-
center: "center";
|
|
1781
1832
|
right: "right";
|
|
1833
|
+
center: "center";
|
|
1782
1834
|
}>>;
|
|
1783
1835
|
}, z$1.core.$strip>;
|
|
1784
1836
|
/** Column definition exposed to the UI for eval and case tables. */
|
|
@@ -1848,9 +1900,9 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
|
|
|
1848
1900
|
subtree: "subtree";
|
|
1849
1901
|
}>>;
|
|
1850
1902
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1851
|
-
sum: "sum";
|
|
1852
|
-
last: "last";
|
|
1853
1903
|
all: "all";
|
|
1904
|
+
last: "last";
|
|
1905
|
+
sum: "sum";
|
|
1854
1906
|
}>>;
|
|
1855
1907
|
}, z$1.core.$strip>;
|
|
1856
1908
|
/**
|
|
@@ -1884,9 +1936,9 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
|
|
|
1884
1936
|
subtree: "subtree";
|
|
1885
1937
|
}>>;
|
|
1886
1938
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1887
|
-
sum: "sum";
|
|
1888
|
-
last: "last";
|
|
1889
1939
|
all: "all";
|
|
1940
|
+
last: "last";
|
|
1941
|
+
sum: "sum";
|
|
1890
1942
|
}>>;
|
|
1891
1943
|
}, z$1.core.$strip>>>;
|
|
1892
1944
|
}, z$1.core.$strip>;
|
|
@@ -1924,9 +1976,9 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
|
|
|
1924
1976
|
subtree: "subtree";
|
|
1925
1977
|
}>>;
|
|
1926
1978
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1927
|
-
sum: "sum";
|
|
1928
|
-
last: "last";
|
|
1929
1979
|
all: "all";
|
|
1980
|
+
last: "last";
|
|
1981
|
+
sum: "sum";
|
|
1930
1982
|
}>>;
|
|
1931
1983
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
1932
1984
|
}, z$1.core.$strip>;
|
|
@@ -1962,9 +2014,9 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
|
|
|
1962
2014
|
subtree: "subtree";
|
|
1963
2015
|
}>>;
|
|
1964
2016
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
1965
|
-
sum: "sum";
|
|
1966
|
-
last: "last";
|
|
1967
2017
|
all: "all";
|
|
2018
|
+
last: "last";
|
|
2019
|
+
sum: "sum";
|
|
1968
2020
|
}>>;
|
|
1969
2021
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
1970
2022
|
}, z$1.core.$strip>>>;
|
|
@@ -2001,8 +2053,8 @@ declare const traceSpanSchema$1: z$1.ZodObject<{
|
|
|
2001
2053
|
status: z$1.ZodEnum<{
|
|
2002
2054
|
error: "error";
|
|
2003
2055
|
running: "running";
|
|
2004
|
-
cancelled: "cancelled";
|
|
2005
2056
|
ok: "ok";
|
|
2057
|
+
cancelled: "cancelled";
|
|
2006
2058
|
}>;
|
|
2007
2059
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
2008
2060
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -2041,21 +2093,32 @@ declare const evalFreshnessStatusSchema: z$1.ZodEnum<{
|
|
|
2041
2093
|
}>;
|
|
2042
2094
|
/** Freshness signal derived from the latest relevant run plus git state. */
|
|
2043
2095
|
type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
|
|
2044
|
-
/**
|
|
2096
|
+
/**
|
|
2097
|
+
* Reducer used to collapse per-case values into a single duration or column
|
|
2098
|
+
* stat.
|
|
2099
|
+
* `best` selects the highest finite value and `worst` selects the lowest.
|
|
2100
|
+
*/
|
|
2045
2101
|
declare const evalStatAggregateSchema: z$1.ZodEnum<{
|
|
2046
|
-
avg: "avg";
|
|
2047
2102
|
min: "min";
|
|
2048
2103
|
max: "max";
|
|
2049
2104
|
sum: "sum";
|
|
2050
|
-
|
|
2105
|
+
avg: "avg";
|
|
2106
|
+
best: "best";
|
|
2107
|
+
worst: "worst";
|
|
2051
2108
|
}>;
|
|
2052
|
-
/**
|
|
2109
|
+
/**
|
|
2110
|
+
* Reducer used to collapse per-case values into a single duration or column
|
|
2111
|
+
* stat.
|
|
2112
|
+
* `best` selects the highest finite value and `worst` selects the lowest.
|
|
2113
|
+
*/
|
|
2053
2114
|
type EvalStatAggregate = z$1.infer<typeof evalStatAggregateSchema>;
|
|
2054
2115
|
/**
|
|
2055
|
-
* One entry in the EvalCard stats row. Built-in kinds
|
|
2056
|
-
* `
|
|
2057
|
-
* `evalTracer.cache(...)` refs, not
|
|
2058
|
-
*
|
|
2116
|
+
* One entry in the EvalCard stats row. Built-in kinds read from the latest run;
|
|
2117
|
+
* `duration` aggregates per-case durations, `cacheHits` counts Agent Eval
|
|
2118
|
+
* operation-level cache hits from spans and `evalTracer.cache(...)` refs, not
|
|
2119
|
+
* LLM provider prompt-cache read tokens. Cache hits use an independent
|
|
2120
|
+
* aggregate mode and default to `sum`. `column` aggregates a score or numeric
|
|
2121
|
+
* output column across the latest run.
|
|
2059
2122
|
*/
|
|
2060
2123
|
declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
2061
2124
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2067,33 +2130,50 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2067
2130
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2068
2131
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2069
2132
|
kind: z$1.ZodLiteral<"duration">;
|
|
2133
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2134
|
+
min: "min";
|
|
2135
|
+
max: "max";
|
|
2136
|
+
sum: "sum";
|
|
2137
|
+
avg: "avg";
|
|
2138
|
+
best: "best";
|
|
2139
|
+
worst: "worst";
|
|
2140
|
+
}>>;
|
|
2070
2141
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2071
2142
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2072
2143
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2144
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2145
|
+
min: "min";
|
|
2146
|
+
max: "max";
|
|
2147
|
+
sum: "sum";
|
|
2148
|
+
avg: "avg";
|
|
2149
|
+
best: "best";
|
|
2150
|
+
worst: "worst";
|
|
2151
|
+
}>>;
|
|
2073
2152
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2074
2153
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2075
2154
|
kind: z$1.ZodLiteral<"column">;
|
|
2076
2155
|
key: z$1.ZodString;
|
|
2077
2156
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2078
2157
|
aggregate: z$1.ZodEnum<{
|
|
2079
|
-
avg: "avg";
|
|
2080
2158
|
min: "min";
|
|
2081
2159
|
max: "max";
|
|
2082
2160
|
sum: "sum";
|
|
2083
|
-
|
|
2161
|
+
avg: "avg";
|
|
2162
|
+
best: "best";
|
|
2163
|
+
worst: "worst";
|
|
2084
2164
|
}>;
|
|
2085
2165
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2086
2166
|
number: "number";
|
|
2087
2167
|
boolean: "boolean";
|
|
2088
2168
|
file: "file";
|
|
2089
|
-
markdown: "markdown";
|
|
2090
2169
|
json: "json";
|
|
2170
|
+
duration: "duration";
|
|
2171
|
+
markdown: "markdown";
|
|
2091
2172
|
image: "image";
|
|
2092
2173
|
html: "html";
|
|
2093
2174
|
pdf: "pdf";
|
|
2094
2175
|
audio: "audio";
|
|
2095
2176
|
video: "video";
|
|
2096
|
-
duration: "duration";
|
|
2097
2177
|
percent: "percent";
|
|
2098
2178
|
passFail: "passFail";
|
|
2099
2179
|
stars: "stars";
|
|
@@ -2114,33 +2194,50 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2114
2194
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2115
2195
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2116
2196
|
kind: z$1.ZodLiteral<"duration">;
|
|
2197
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2198
|
+
min: "min";
|
|
2199
|
+
max: "max";
|
|
2200
|
+
sum: "sum";
|
|
2201
|
+
avg: "avg";
|
|
2202
|
+
best: "best";
|
|
2203
|
+
worst: "worst";
|
|
2204
|
+
}>>;
|
|
2117
2205
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2118
2206
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2119
2207
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2208
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2209
|
+
min: "min";
|
|
2210
|
+
max: "max";
|
|
2211
|
+
sum: "sum";
|
|
2212
|
+
avg: "avg";
|
|
2213
|
+
best: "best";
|
|
2214
|
+
worst: "worst";
|
|
2215
|
+
}>>;
|
|
2120
2216
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2121
2217
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2122
2218
|
kind: z$1.ZodLiteral<"column">;
|
|
2123
2219
|
key: z$1.ZodString;
|
|
2124
2220
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2125
2221
|
aggregate: z$1.ZodEnum<{
|
|
2126
|
-
avg: "avg";
|
|
2127
2222
|
min: "min";
|
|
2128
2223
|
max: "max";
|
|
2129
2224
|
sum: "sum";
|
|
2130
|
-
|
|
2225
|
+
avg: "avg";
|
|
2226
|
+
best: "best";
|
|
2227
|
+
worst: "worst";
|
|
2131
2228
|
}>;
|
|
2132
2229
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2133
2230
|
number: "number";
|
|
2134
2231
|
boolean: "boolean";
|
|
2135
2232
|
file: "file";
|
|
2136
|
-
markdown: "markdown";
|
|
2137
2233
|
json: "json";
|
|
2234
|
+
duration: "duration";
|
|
2235
|
+
markdown: "markdown";
|
|
2138
2236
|
image: "image";
|
|
2139
2237
|
html: "html";
|
|
2140
2238
|
pdf: "pdf";
|
|
2141
2239
|
audio: "audio";
|
|
2142
2240
|
video: "video";
|
|
2143
|
-
duration: "duration";
|
|
2144
2241
|
percent: "percent";
|
|
2145
2242
|
passFail: "passFail";
|
|
2146
2243
|
stars: "stars";
|
|
@@ -2179,14 +2276,14 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2179
2276
|
number: "number";
|
|
2180
2277
|
boolean: "boolean";
|
|
2181
2278
|
file: "file";
|
|
2182
|
-
markdown: "markdown";
|
|
2183
2279
|
json: "json";
|
|
2280
|
+
duration: "duration";
|
|
2281
|
+
markdown: "markdown";
|
|
2184
2282
|
image: "image";
|
|
2185
2283
|
html: "html";
|
|
2186
2284
|
pdf: "pdf";
|
|
2187
2285
|
audio: "audio";
|
|
2188
2286
|
video: "video";
|
|
2189
|
-
duration: "duration";
|
|
2190
2287
|
percent: "percent";
|
|
2191
2288
|
passFail: "passFail";
|
|
2192
2289
|
stars: "stars";
|
|
@@ -2200,18 +2297,18 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2200
2297
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2201
2298
|
align: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2202
2299
|
left: "left";
|
|
2203
|
-
center: "center";
|
|
2204
2300
|
right: "right";
|
|
2301
|
+
center: "center";
|
|
2205
2302
|
}>>;
|
|
2206
2303
|
}, z$1.core.$strip>>;
|
|
2207
2304
|
caseCount: z$1.ZodNullable<z$1.ZodNumber>;
|
|
2208
2305
|
caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2209
2306
|
lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
|
|
2210
2307
|
error: "error";
|
|
2211
|
-
pass: "pass";
|
|
2212
|
-
fail: "fail";
|
|
2213
2308
|
running: "running";
|
|
2214
2309
|
cancelled: "cancelled";
|
|
2310
|
+
pass: "pass";
|
|
2311
|
+
fail: "fail";
|
|
2215
2312
|
unscored: "unscored";
|
|
2216
2313
|
}>>;
|
|
2217
2314
|
stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
@@ -2224,33 +2321,50 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2224
2321
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2225
2322
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2226
2323
|
kind: z$1.ZodLiteral<"duration">;
|
|
2324
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2325
|
+
min: "min";
|
|
2326
|
+
max: "max";
|
|
2327
|
+
sum: "sum";
|
|
2328
|
+
avg: "avg";
|
|
2329
|
+
best: "best";
|
|
2330
|
+
worst: "worst";
|
|
2331
|
+
}>>;
|
|
2227
2332
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2228
2333
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2229
2334
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2335
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2336
|
+
min: "min";
|
|
2337
|
+
max: "max";
|
|
2338
|
+
sum: "sum";
|
|
2339
|
+
avg: "avg";
|
|
2340
|
+
best: "best";
|
|
2341
|
+
worst: "worst";
|
|
2342
|
+
}>>;
|
|
2230
2343
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2231
2344
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2232
2345
|
kind: z$1.ZodLiteral<"column">;
|
|
2233
2346
|
key: z$1.ZodString;
|
|
2234
2347
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2235
2348
|
aggregate: z$1.ZodEnum<{
|
|
2236
|
-
avg: "avg";
|
|
2237
2349
|
min: "min";
|
|
2238
2350
|
max: "max";
|
|
2239
2351
|
sum: "sum";
|
|
2240
|
-
|
|
2352
|
+
avg: "avg";
|
|
2353
|
+
best: "best";
|
|
2354
|
+
worst: "worst";
|
|
2241
2355
|
}>;
|
|
2242
2356
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2243
2357
|
number: "number";
|
|
2244
2358
|
boolean: "boolean";
|
|
2245
2359
|
file: "file";
|
|
2246
|
-
markdown: "markdown";
|
|
2247
2360
|
json: "json";
|
|
2361
|
+
duration: "duration";
|
|
2362
|
+
markdown: "markdown";
|
|
2248
2363
|
image: "image";
|
|
2249
2364
|
html: "html";
|
|
2250
2365
|
pdf: "pdf";
|
|
2251
2366
|
audio: "audio";
|
|
2252
2367
|
video: "video";
|
|
2253
|
-
duration: "duration";
|
|
2254
2368
|
percent: "percent";
|
|
2255
2369
|
passFail: "passFail";
|
|
2256
2370
|
stars: "stars";
|
|
@@ -2258,6 +2372,14 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2258
2372
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2259
2373
|
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2260
2374
|
}, z$1.core.$strip>], "kind">>>;
|
|
2375
|
+
defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2376
|
+
min: "min";
|
|
2377
|
+
max: "max";
|
|
2378
|
+
sum: "sum";
|
|
2379
|
+
avg: "avg";
|
|
2380
|
+
best: "best";
|
|
2381
|
+
worst: "worst";
|
|
2382
|
+
}>>;
|
|
2261
2383
|
charts: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
2262
2384
|
heading: z$1.ZodOptional<z$1.ZodString>;
|
|
2263
2385
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2270,13 +2392,13 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2270
2392
|
metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
2271
2393
|
source: z$1.ZodLiteral<"builtin">;
|
|
2272
2394
|
metric: z$1.ZodEnum<{
|
|
2273
|
-
passRate: "passRate";
|
|
2274
2395
|
durationMs: "durationMs";
|
|
2396
|
+
passRate: "passRate";
|
|
2275
2397
|
}>;
|
|
2276
2398
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2277
2399
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2278
|
-
success: "success";
|
|
2279
2400
|
error: "error";
|
|
2401
|
+
success: "success";
|
|
2280
2402
|
accent: "accent";
|
|
2281
2403
|
accentDim: "accentDim";
|
|
2282
2404
|
warning: "warning";
|
|
@@ -2290,17 +2412,17 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2290
2412
|
source: z$1.ZodLiteral<"column">;
|
|
2291
2413
|
key: z$1.ZodString;
|
|
2292
2414
|
aggregate: z$1.ZodEnum<{
|
|
2293
|
-
avg: "avg";
|
|
2294
2415
|
min: "min";
|
|
2295
2416
|
max: "max";
|
|
2296
2417
|
sum: "sum";
|
|
2418
|
+
avg: "avg";
|
|
2297
2419
|
latest: "latest";
|
|
2298
2420
|
passThresholdRate: "passThresholdRate";
|
|
2299
2421
|
}>;
|
|
2300
2422
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2301
2423
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2302
|
-
success: "success";
|
|
2303
2424
|
error: "error";
|
|
2425
|
+
success: "success";
|
|
2304
2426
|
accent: "accent";
|
|
2305
2427
|
accentDim: "accentDim";
|
|
2306
2428
|
warning: "warning";
|
|
@@ -2324,18 +2446,18 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2324
2446
|
tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
2325
2447
|
source: z$1.ZodLiteral<"builtin">;
|
|
2326
2448
|
metric: z$1.ZodEnum<{
|
|
2327
|
-
passRate: "passRate";
|
|
2328
2449
|
durationMs: "durationMs";
|
|
2450
|
+
passRate: "passRate";
|
|
2329
2451
|
}>;
|
|
2330
2452
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2331
2453
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2332
2454
|
source: z$1.ZodLiteral<"column">;
|
|
2333
2455
|
key: z$1.ZodString;
|
|
2334
2456
|
aggregate: z$1.ZodEnum<{
|
|
2335
|
-
avg: "avg";
|
|
2336
2457
|
min: "min";
|
|
2337
2458
|
max: "max";
|
|
2338
2459
|
sum: "sum";
|
|
2460
|
+
avg: "avg";
|
|
2339
2461
|
latest: "latest";
|
|
2340
2462
|
passThresholdRate: "passThresholdRate";
|
|
2341
2463
|
}>;
|
|
@@ -2432,11 +2554,11 @@ declare const caseRowSchema$1: z$1.ZodObject<{
|
|
|
2432
2554
|
tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2433
2555
|
status: z$1.ZodEnum<{
|
|
2434
2556
|
error: "error";
|
|
2435
|
-
pass: "pass";
|
|
2436
|
-
fail: "fail";
|
|
2437
2557
|
running: "running";
|
|
2438
2558
|
cancelled: "cancelled";
|
|
2439
2559
|
pending: "pending";
|
|
2560
|
+
pass: "pass";
|
|
2561
|
+
fail: "fail";
|
|
2440
2562
|
}>;
|
|
2441
2563
|
durationMs: z$1.ZodNullable<z$1.ZodNumber>;
|
|
2442
2564
|
cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
|
|
@@ -2534,8 +2656,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2534
2656
|
status: z$1.ZodEnum<{
|
|
2535
2657
|
error: "error";
|
|
2536
2658
|
running: "running";
|
|
2537
|
-
cancelled: "cancelled";
|
|
2538
2659
|
ok: "ok";
|
|
2660
|
+
cancelled: "cancelled";
|
|
2539
2661
|
}>;
|
|
2540
2662
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
2541
2663
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -2585,9 +2707,9 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2585
2707
|
subtree: "subtree";
|
|
2586
2708
|
}>>;
|
|
2587
2709
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2588
|
-
sum: "sum";
|
|
2589
|
-
last: "last";
|
|
2590
2710
|
all: "all";
|
|
2711
|
+
last: "last";
|
|
2712
|
+
sum: "sum";
|
|
2591
2713
|
}>>;
|
|
2592
2714
|
}, z$1.core.$strip>>>;
|
|
2593
2715
|
}, z$1.core.$strip>;
|
|
@@ -2597,10 +2719,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2597
2719
|
namespace: z$1.ZodString;
|
|
2598
2720
|
key: z$1.ZodString;
|
|
2599
2721
|
status: z$1.ZodEnum<{
|
|
2722
|
+
bypass: "bypass";
|
|
2723
|
+
refresh: "refresh";
|
|
2600
2724
|
hit: "hit";
|
|
2601
2725
|
miss: "miss";
|
|
2602
|
-
refresh: "refresh";
|
|
2603
|
-
bypass: "bypass";
|
|
2604
2726
|
}>;
|
|
2605
2727
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2606
2728
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2619,11 +2741,11 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2619
2741
|
tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2620
2742
|
status: z$1.ZodEnum<{
|
|
2621
2743
|
error: "error";
|
|
2622
|
-
pass: "pass";
|
|
2623
|
-
fail: "fail";
|
|
2624
2744
|
running: "running";
|
|
2625
2745
|
cancelled: "cancelled";
|
|
2626
2746
|
pending: "pending";
|
|
2747
|
+
pass: "pass";
|
|
2748
|
+
fail: "fail";
|
|
2627
2749
|
}>;
|
|
2628
2750
|
input: z$1.ZodUnknown;
|
|
2629
2751
|
trace: z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -2637,8 +2759,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2637
2759
|
status: z$1.ZodEnum<{
|
|
2638
2760
|
error: "error";
|
|
2639
2761
|
running: "running";
|
|
2640
|
-
cancelled: "cancelled";
|
|
2641
2762
|
ok: "ok";
|
|
2763
|
+
cancelled: "cancelled";
|
|
2642
2764
|
}>;
|
|
2643
2765
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
2644
2766
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -2688,9 +2810,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2688
2810
|
subtree: "subtree";
|
|
2689
2811
|
}>>;
|
|
2690
2812
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2691
|
-
sum: "sum";
|
|
2692
|
-
last: "last";
|
|
2693
2813
|
all: "all";
|
|
2814
|
+
last: "last";
|
|
2815
|
+
sum: "sum";
|
|
2694
2816
|
}>>;
|
|
2695
2817
|
}, z$1.core.$strip>>>;
|
|
2696
2818
|
}, z$1.core.$strip>;
|
|
@@ -2706,8 +2828,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2706
2828
|
status: z$1.ZodEnum<{
|
|
2707
2829
|
error: "error";
|
|
2708
2830
|
running: "running";
|
|
2709
|
-
cancelled: "cancelled";
|
|
2710
2831
|
ok: "ok";
|
|
2832
|
+
cancelled: "cancelled";
|
|
2711
2833
|
}>;
|
|
2712
2834
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
2713
2835
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -2757,9 +2879,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2757
2879
|
subtree: "subtree";
|
|
2758
2880
|
}>>;
|
|
2759
2881
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2760
|
-
sum: "sum";
|
|
2761
|
-
last: "last";
|
|
2762
2882
|
all: "all";
|
|
2883
|
+
last: "last";
|
|
2884
|
+
sum: "sum";
|
|
2763
2885
|
}>>;
|
|
2764
2886
|
}, z$1.core.$strip>>>;
|
|
2765
2887
|
}, z$1.core.$strip>;
|
|
@@ -2769,10 +2891,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2769
2891
|
namespace: z$1.ZodString;
|
|
2770
2892
|
key: z$1.ZodString;
|
|
2771
2893
|
status: z$1.ZodEnum<{
|
|
2894
|
+
bypass: "bypass";
|
|
2895
|
+
refresh: "refresh";
|
|
2772
2896
|
hit: "hit";
|
|
2773
2897
|
miss: "miss";
|
|
2774
|
-
refresh: "refresh";
|
|
2775
|
-
bypass: "bypass";
|
|
2776
2898
|
}>;
|
|
2777
2899
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2778
2900
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2838,10 +2960,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2838
2960
|
namespace: z$1.ZodString;
|
|
2839
2961
|
key: z$1.ZodString;
|
|
2840
2962
|
status: z$1.ZodEnum<{
|
|
2963
|
+
bypass: "bypass";
|
|
2964
|
+
refresh: "refresh";
|
|
2841
2965
|
hit: "hit";
|
|
2842
2966
|
miss: "miss";
|
|
2843
|
-
refresh: "refresh";
|
|
2844
|
-
bypass: "bypass";
|
|
2845
2967
|
}>;
|
|
2846
2968
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2847
2969
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2884,8 +3006,8 @@ type EvalChartType = z$1.infer<typeof evalChartTypeSchema>;
|
|
|
2884
3006
|
* than from a per-case column.
|
|
2885
3007
|
*/
|
|
2886
3008
|
declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
|
|
2887
|
-
passRate: "passRate";
|
|
2888
3009
|
durationMs: "durationMs";
|
|
3010
|
+
passRate: "passRate";
|
|
2889
3011
|
}>;
|
|
2890
3012
|
/**
|
|
2891
3013
|
* Run-level metric sourced from the aggregated `RunSummary` for a run, rather
|
|
@@ -2894,10 +3016,10 @@ declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
|
|
|
2894
3016
|
type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
|
|
2895
3017
|
/** Reducer applied to a numeric column across all cases of a single run. */
|
|
2896
3018
|
declare const evalChartAggregateSchema: z$1.ZodEnum<{
|
|
2897
|
-
avg: "avg";
|
|
2898
3019
|
min: "min";
|
|
2899
3020
|
max: "max";
|
|
2900
3021
|
sum: "sum";
|
|
3022
|
+
avg: "avg";
|
|
2901
3023
|
latest: "latest";
|
|
2902
3024
|
passThresholdRate: "passThresholdRate";
|
|
2903
3025
|
}>;
|
|
@@ -2908,8 +3030,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
|
|
|
2908
3030
|
* not emit raw hex so authored evals stay decoupled from the web theme.
|
|
2909
3031
|
*/
|
|
2910
3032
|
declare const evalChartColorSchema: z$1.ZodEnum<{
|
|
2911
|
-
success: "success";
|
|
2912
3033
|
error: "error";
|
|
3034
|
+
success: "success";
|
|
2913
3035
|
accent: "accent";
|
|
2914
3036
|
accentDim: "accentDim";
|
|
2915
3037
|
warning: "warning";
|
|
@@ -2932,13 +3054,13 @@ type EvalChartAxis = z$1.infer<typeof evalChartAxisSchema>;
|
|
|
2932
3054
|
declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
2933
3055
|
source: z$1.ZodLiteral<"builtin">;
|
|
2934
3056
|
metric: z$1.ZodEnum<{
|
|
2935
|
-
passRate: "passRate";
|
|
2936
3057
|
durationMs: "durationMs";
|
|
3058
|
+
passRate: "passRate";
|
|
2937
3059
|
}>;
|
|
2938
3060
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2939
3061
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2940
|
-
success: "success";
|
|
2941
3062
|
error: "error";
|
|
3063
|
+
success: "success";
|
|
2942
3064
|
accent: "accent";
|
|
2943
3065
|
accentDim: "accentDim";
|
|
2944
3066
|
warning: "warning";
|
|
@@ -2952,17 +3074,17 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2952
3074
|
source: z$1.ZodLiteral<"column">;
|
|
2953
3075
|
key: z$1.ZodString;
|
|
2954
3076
|
aggregate: z$1.ZodEnum<{
|
|
2955
|
-
avg: "avg";
|
|
2956
3077
|
min: "min";
|
|
2957
3078
|
max: "max";
|
|
2958
3079
|
sum: "sum";
|
|
3080
|
+
avg: "avg";
|
|
2959
3081
|
latest: "latest";
|
|
2960
3082
|
passThresholdRate: "passThresholdRate";
|
|
2961
3083
|
}>;
|
|
2962
3084
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2963
3085
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2964
|
-
success: "success";
|
|
2965
3086
|
error: "error";
|
|
3087
|
+
success: "success";
|
|
2966
3088
|
accent: "accent";
|
|
2967
3089
|
accentDim: "accentDim";
|
|
2968
3090
|
warning: "warning";
|
|
@@ -2979,18 +3101,18 @@ type EvalChartMetric = z$1.infer<typeof evalChartMetricSchema>;
|
|
|
2979
3101
|
declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
2980
3102
|
source: z$1.ZodLiteral<"builtin">;
|
|
2981
3103
|
metric: z$1.ZodEnum<{
|
|
2982
|
-
passRate: "passRate";
|
|
2983
3104
|
durationMs: "durationMs";
|
|
3105
|
+
passRate: "passRate";
|
|
2984
3106
|
}>;
|
|
2985
3107
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2986
3108
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2987
3109
|
source: z$1.ZodLiteral<"column">;
|
|
2988
3110
|
key: z$1.ZodString;
|
|
2989
3111
|
aggregate: z$1.ZodEnum<{
|
|
2990
|
-
avg: "avg";
|
|
2991
3112
|
min: "min";
|
|
2992
3113
|
max: "max";
|
|
2993
3114
|
sum: "sum";
|
|
3115
|
+
avg: "avg";
|
|
2994
3116
|
latest: "latest";
|
|
2995
3117
|
passThresholdRate: "passThresholdRate";
|
|
2996
3118
|
}>;
|
|
@@ -3015,13 +3137,13 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3015
3137
|
metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3016
3138
|
source: z$1.ZodLiteral<"builtin">;
|
|
3017
3139
|
metric: z$1.ZodEnum<{
|
|
3018
|
-
passRate: "passRate";
|
|
3019
3140
|
durationMs: "durationMs";
|
|
3141
|
+
passRate: "passRate";
|
|
3020
3142
|
}>;
|
|
3021
3143
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3022
3144
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3023
|
-
success: "success";
|
|
3024
3145
|
error: "error";
|
|
3146
|
+
success: "success";
|
|
3025
3147
|
accent: "accent";
|
|
3026
3148
|
accentDim: "accentDim";
|
|
3027
3149
|
warning: "warning";
|
|
@@ -3035,17 +3157,17 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3035
3157
|
source: z$1.ZodLiteral<"column">;
|
|
3036
3158
|
key: z$1.ZodString;
|
|
3037
3159
|
aggregate: z$1.ZodEnum<{
|
|
3038
|
-
avg: "avg";
|
|
3039
3160
|
min: "min";
|
|
3040
3161
|
max: "max";
|
|
3041
3162
|
sum: "sum";
|
|
3163
|
+
avg: "avg";
|
|
3042
3164
|
latest: "latest";
|
|
3043
3165
|
passThresholdRate: "passThresholdRate";
|
|
3044
3166
|
}>;
|
|
3045
3167
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3046
3168
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3047
|
-
success: "success";
|
|
3048
3169
|
error: "error";
|
|
3170
|
+
success: "success";
|
|
3049
3171
|
accent: "accent";
|
|
3050
3172
|
accentDim: "accentDim";
|
|
3051
3173
|
warning: "warning";
|
|
@@ -3069,18 +3191,18 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3069
3191
|
tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3070
3192
|
source: z$1.ZodLiteral<"builtin">;
|
|
3071
3193
|
metric: z$1.ZodEnum<{
|
|
3072
|
-
passRate: "passRate";
|
|
3073
3194
|
durationMs: "durationMs";
|
|
3195
|
+
passRate: "passRate";
|
|
3074
3196
|
}>;
|
|
3075
3197
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3076
3198
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
3077
3199
|
source: z$1.ZodLiteral<"column">;
|
|
3078
3200
|
key: z$1.ZodString;
|
|
3079
3201
|
aggregate: z$1.ZodEnum<{
|
|
3080
|
-
avg: "avg";
|
|
3081
3202
|
min: "min";
|
|
3082
3203
|
max: "max";
|
|
3083
3204
|
sum: "sum";
|
|
3205
|
+
avg: "avg";
|
|
3084
3206
|
latest: "latest";
|
|
3085
3207
|
passThresholdRate: "passThresholdRate";
|
|
3086
3208
|
}>;
|
|
@@ -3105,13 +3227,13 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3105
3227
|
metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3106
3228
|
source: z$1.ZodLiteral<"builtin">;
|
|
3107
3229
|
metric: z$1.ZodEnum<{
|
|
3108
|
-
passRate: "passRate";
|
|
3109
3230
|
durationMs: "durationMs";
|
|
3231
|
+
passRate: "passRate";
|
|
3110
3232
|
}>;
|
|
3111
3233
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3112
3234
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3113
|
-
success: "success";
|
|
3114
3235
|
error: "error";
|
|
3236
|
+
success: "success";
|
|
3115
3237
|
accent: "accent";
|
|
3116
3238
|
accentDim: "accentDim";
|
|
3117
3239
|
warning: "warning";
|
|
@@ -3125,17 +3247,17 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3125
3247
|
source: z$1.ZodLiteral<"column">;
|
|
3126
3248
|
key: z$1.ZodString;
|
|
3127
3249
|
aggregate: z$1.ZodEnum<{
|
|
3128
|
-
avg: "avg";
|
|
3129
3250
|
min: "min";
|
|
3130
3251
|
max: "max";
|
|
3131
3252
|
sum: "sum";
|
|
3253
|
+
avg: "avg";
|
|
3132
3254
|
latest: "latest";
|
|
3133
3255
|
passThresholdRate: "passThresholdRate";
|
|
3134
3256
|
}>;
|
|
3135
3257
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3136
3258
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3137
|
-
success: "success";
|
|
3138
3259
|
error: "error";
|
|
3260
|
+
success: "success";
|
|
3139
3261
|
accent: "accent";
|
|
3140
3262
|
accentDim: "accentDim";
|
|
3141
3263
|
warning: "warning";
|
|
@@ -3159,18 +3281,18 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3159
3281
|
tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3160
3282
|
source: z$1.ZodLiteral<"builtin">;
|
|
3161
3283
|
metric: z$1.ZodEnum<{
|
|
3162
|
-
passRate: "passRate";
|
|
3163
3284
|
durationMs: "durationMs";
|
|
3285
|
+
passRate: "passRate";
|
|
3164
3286
|
}>;
|
|
3165
3287
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3166
3288
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
3167
3289
|
source: z$1.ZodLiteral<"column">;
|
|
3168
3290
|
key: z$1.ZodString;
|
|
3169
3291
|
aggregate: z$1.ZodEnum<{
|
|
3170
|
-
avg: "avg";
|
|
3171
3292
|
min: "min";
|
|
3172
3293
|
max: "max";
|
|
3173
3294
|
sum: "sum";
|
|
3295
|
+
avg: "avg";
|
|
3174
3296
|
latest: "latest";
|
|
3175
3297
|
passThresholdRate: "passThresholdRate";
|
|
3176
3298
|
}>;
|
|
@@ -3198,9 +3320,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3198
3320
|
evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
|
|
3199
3321
|
target: z$1.ZodObject<{
|
|
3200
3322
|
mode: z$1.ZodEnum<{
|
|
3201
|
-
caseIds: "caseIds";
|
|
3202
3323
|
all: "all";
|
|
3203
3324
|
evalIds: "evalIds";
|
|
3325
|
+
caseIds: "caseIds";
|
|
3204
3326
|
}>;
|
|
3205
3327
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
3206
3328
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -3214,9 +3336,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3214
3336
|
median: "median";
|
|
3215
3337
|
}>>>;
|
|
3216
3338
|
cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3217
|
-
refresh: "refresh";
|
|
3218
|
-
bypass: "bypass";
|
|
3219
3339
|
use: "use";
|
|
3340
|
+
bypass: "bypass";
|
|
3341
|
+
refresh: "refresh";
|
|
3220
3342
|
}>>;
|
|
3221
3343
|
}, z$1.core.$strip>;
|
|
3222
3344
|
/** Persisted lifecycle metadata for a single eval run. */
|
|
@@ -3312,8 +3434,8 @@ type TrialSelectionMode = z$1.infer<typeof trialSelectionModeSchema>;
|
|
|
3312
3434
|
/** Built-in eval-level output/column keys. */
|
|
3313
3435
|
/** Removal config for built-in eval-level outputs and UI metadata. */
|
|
3314
3436
|
declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<true>, z$1.ZodArray<z$1.ZodEnum<{
|
|
3315
|
-
costUsd: "costUsd";
|
|
3316
3437
|
apiCalls: "apiCalls";
|
|
3438
|
+
costUsd: "costUsd";
|
|
3317
3439
|
llmTurns: "llmTurns";
|
|
3318
3440
|
inputTokens: "inputTokens";
|
|
3319
3441
|
outputTokens: "outputTokens";
|
|
@@ -3818,6 +3940,15 @@ type AgentEvalsConfig$1 = {
|
|
|
3818
3940
|
* appended last unless removed with `removeDefaultConfig`.
|
|
3819
3941
|
*/
|
|
3820
3942
|
stats?: EvalStatsConfig;
|
|
3943
|
+
/**
|
|
3944
|
+
* Initial aggregate mode used for duration and column stats on every eval
|
|
3945
|
+
* card.
|
|
3946
|
+
*
|
|
3947
|
+
* Per-eval `defaultStatAggregate` overrides this value. Individual stat
|
|
3948
|
+
* `aggregate` values still define their authored reducer and remain the
|
|
3949
|
+
* fallback when no default aggregate is configured.
|
|
3950
|
+
*/
|
|
3951
|
+
defaultStatAggregate?: EvalStatAggregate;
|
|
3821
3952
|
/**
|
|
3822
3953
|
* Configuration for the "LLM calls" tab in the case-run drawer.
|
|
3823
3954
|
*
|
|
@@ -4118,9 +4249,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
|
|
|
4118
4249
|
* - `refresh`: never read, always write (forces re-execution and overwrites).
|
|
4119
4250
|
*/
|
|
4120
4251
|
declare const cacheModeSchema: z$1.ZodEnum<{
|
|
4121
|
-
refresh: "refresh";
|
|
4122
|
-
bypass: "bypass";
|
|
4123
4252
|
use: "use";
|
|
4253
|
+
bypass: "bypass";
|
|
4254
|
+
refresh: "refresh";
|
|
4124
4255
|
}>;
|
|
4125
4256
|
/** Mode controlling how cached spans behave during a run. */
|
|
4126
4257
|
type CacheMode = z$1.infer<typeof cacheModeSchema>;
|
|
@@ -4134,17 +4265,17 @@ declare const spanCacheOptionsSchema: z$1.ZodObject<{
|
|
|
4134
4265
|
type SpanCacheOptions = z$1.infer<typeof spanCacheOptionsSchema>;
|
|
4135
4266
|
/** Category of operation stored in the eval cache. */
|
|
4136
4267
|
declare const cacheOperationTypeSchema: z$1.ZodEnum<{
|
|
4137
|
-
value: "value";
|
|
4138
4268
|
span: "span";
|
|
4269
|
+
value: "value";
|
|
4139
4270
|
}>;
|
|
4140
4271
|
/** Category of operation stored in the eval cache. */
|
|
4141
4272
|
type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
|
|
4142
4273
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4143
4274
|
declare const cacheStatusSchema: z$1.ZodEnum<{
|
|
4275
|
+
bypass: "bypass";
|
|
4276
|
+
refresh: "refresh";
|
|
4144
4277
|
hit: "hit";
|
|
4145
4278
|
miss: "miss";
|
|
4146
|
-
refresh: "refresh";
|
|
4147
|
-
bypass: "bypass";
|
|
4148
4279
|
}>;
|
|
4149
4280
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4150
4281
|
type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
|
|
@@ -4161,10 +4292,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
|
|
|
4161
4292
|
namespace: z$1.ZodString;
|
|
4162
4293
|
key: z$1.ZodString;
|
|
4163
4294
|
status: z$1.ZodEnum<{
|
|
4295
|
+
bypass: "bypass";
|
|
4296
|
+
refresh: "refresh";
|
|
4164
4297
|
hit: "hit";
|
|
4165
4298
|
miss: "miss";
|
|
4166
|
-
refresh: "refresh";
|
|
4167
|
-
bypass: "bypass";
|
|
4168
4299
|
}>;
|
|
4169
4300
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
4170
4301
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -4178,8 +4309,8 @@ declare const cacheListItemSchema$1: z$1.ZodObject<{
|
|
|
4178
4309
|
key: z$1.ZodString;
|
|
4179
4310
|
namespace: z$1.ZodString;
|
|
4180
4311
|
operationType: z$1.ZodEnum<{
|
|
4181
|
-
value: "value";
|
|
4182
4312
|
span: "span";
|
|
4313
|
+
value: "value";
|
|
4183
4314
|
}>;
|
|
4184
4315
|
operationName: z$1.ZodString;
|
|
4185
4316
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4241,8 +4372,8 @@ declare const cacheRecordingSchema: z$1.ZodObject<{
|
|
|
4241
4372
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4242
4373
|
error: "error";
|
|
4243
4374
|
running: "running";
|
|
4244
|
-
cancelled: "cancelled";
|
|
4245
4375
|
ok: "ok";
|
|
4376
|
+
cancelled: "cancelled";
|
|
4246
4377
|
}>>;
|
|
4247
4378
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4248
4379
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4301,8 +4432,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
|
|
|
4301
4432
|
key: z$1.ZodString;
|
|
4302
4433
|
namespace: z$1.ZodString;
|
|
4303
4434
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4304
|
-
value: "value";
|
|
4305
4435
|
span: "span";
|
|
4436
|
+
value: "value";
|
|
4306
4437
|
}>>;
|
|
4307
4438
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4308
4439
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4314,8 +4445,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
|
|
|
4314
4445
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4315
4446
|
error: "error";
|
|
4316
4447
|
running: "running";
|
|
4317
|
-
cancelled: "cancelled";
|
|
4318
4448
|
ok: "ok";
|
|
4449
|
+
cancelled: "cancelled";
|
|
4319
4450
|
}>>;
|
|
4320
4451
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4321
4452
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4380,8 +4511,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
4380
4511
|
key: z$1.ZodString;
|
|
4381
4512
|
namespace: z$1.ZodString;
|
|
4382
4513
|
operationType: z$1.ZodEnum<{
|
|
4383
|
-
value: "value";
|
|
4384
4514
|
span: "span";
|
|
4515
|
+
value: "value";
|
|
4385
4516
|
}>;
|
|
4386
4517
|
operationName: z$1.ZodString;
|
|
4387
4518
|
storedAt: z$1.ZodString;
|
|
@@ -4391,8 +4522,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
4391
4522
|
key: z$1.ZodString;
|
|
4392
4523
|
namespace: z$1.ZodString;
|
|
4393
4524
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4394
|
-
value: "value";
|
|
4395
4525
|
span: "span";
|
|
4526
|
+
value: "value";
|
|
4396
4527
|
}>>;
|
|
4397
4528
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4398
4529
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4404,8 +4535,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
4404
4535
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4405
4536
|
error: "error";
|
|
4406
4537
|
running: "running";
|
|
4407
|
-
cancelled: "cancelled";
|
|
4408
4538
|
ok: "ok";
|
|
4539
|
+
cancelled: "cancelled";
|
|
4409
4540
|
}>>;
|
|
4410
4541
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4411
4542
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4470,8 +4601,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4470
4601
|
key: z$1.ZodString;
|
|
4471
4602
|
namespace: z$1.ZodString;
|
|
4472
4603
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4473
|
-
value: "value";
|
|
4474
4604
|
span: "span";
|
|
4605
|
+
value: "value";
|
|
4475
4606
|
}>>;
|
|
4476
4607
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4477
4608
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4483,8 +4614,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4483
4614
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4484
4615
|
error: "error";
|
|
4485
4616
|
running: "running";
|
|
4486
|
-
cancelled: "cancelled";
|
|
4487
4617
|
ok: "ok";
|
|
4618
|
+
cancelled: "cancelled";
|
|
4488
4619
|
}>>;
|
|
4489
4620
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4490
4621
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4540,8 +4671,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4540
4671
|
key: z$1.ZodString;
|
|
4541
4672
|
namespace: z$1.ZodString;
|
|
4542
4673
|
operationType: z$1.ZodEnum<{
|
|
4543
|
-
value: "value";
|
|
4544
4674
|
span: "span";
|
|
4675
|
+
value: "value";
|
|
4545
4676
|
}>;
|
|
4546
4677
|
operationName: z$1.ZodString;
|
|
4547
4678
|
storedAt: z$1.ZodString;
|
|
@@ -4551,8 +4682,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4551
4682
|
key: z$1.ZodString;
|
|
4552
4683
|
namespace: z$1.ZodString;
|
|
4553
4684
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4554
|
-
value: "value";
|
|
4555
4685
|
span: "span";
|
|
4686
|
+
value: "value";
|
|
4556
4687
|
}>>;
|
|
4557
4688
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4558
4689
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4564,8 +4695,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4564
4695
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4565
4696
|
error: "error";
|
|
4566
4697
|
running: "running";
|
|
4567
|
-
cancelled: "cancelled";
|
|
4568
4698
|
ok: "ok";
|
|
4699
|
+
cancelled: "cancelled";
|
|
4569
4700
|
}>>;
|
|
4570
4701
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4571
4702
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4630,8 +4761,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
4630
4761
|
key: z$1.ZodString;
|
|
4631
4762
|
namespace: z$1.ZodString;
|
|
4632
4763
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4633
|
-
value: "value";
|
|
4634
4764
|
span: "span";
|
|
4765
|
+
value: "value";
|
|
4635
4766
|
}>>;
|
|
4636
4767
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4637
4768
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4643,8 +4774,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
4643
4774
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4644
4775
|
error: "error";
|
|
4645
4776
|
running: "running";
|
|
4646
|
-
cancelled: "cancelled";
|
|
4647
4777
|
ok: "ok";
|
|
4778
|
+
cancelled: "cancelled";
|
|
4648
4779
|
}>>;
|
|
4649
4780
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4650
4781
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4708,8 +4839,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
4708
4839
|
key: z$1.ZodString;
|
|
4709
4840
|
namespace: z$1.ZodString;
|
|
4710
4841
|
operationType: z$1.ZodEnum<{
|
|
4711
|
-
value: "value";
|
|
4712
4842
|
span: "span";
|
|
4843
|
+
value: "value";
|
|
4713
4844
|
}>;
|
|
4714
4845
|
operationName: z$1.ZodString;
|
|
4715
4846
|
storedAt: z$1.ZodString;
|
|
@@ -4719,8 +4850,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
4719
4850
|
key: z$1.ZodString;
|
|
4720
4851
|
namespace: z$1.ZodString;
|
|
4721
4852
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4722
|
-
value: "value";
|
|
4723
4853
|
span: "span";
|
|
4854
|
+
value: "value";
|
|
4724
4855
|
}>>;
|
|
4725
4856
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4726
4857
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4732,8 +4863,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
4732
4863
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4733
4864
|
error: "error";
|
|
4734
4865
|
running: "running";
|
|
4735
|
-
cancelled: "cancelled";
|
|
4736
4866
|
ok: "ok";
|
|
4867
|
+
cancelled: "cancelled";
|
|
4737
4868
|
}>>;
|
|
4738
4869
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4739
4870
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4866,8 +4997,8 @@ type SseEnvelope = z$1.infer<typeof sseEnvelopeSchema$1>; //#endregion
|
|
|
4866
4997
|
//#region src/schemas/api.d.ts
|
|
4867
4998
|
/** Lifecycle state for an app config reload triggered by `agent-evals.config.ts`. */
|
|
4868
4999
|
declare const configReloadStatusSchema: z$1.ZodEnum<{
|
|
4869
|
-
pending: "pending";
|
|
4870
5000
|
idle: "idle";
|
|
5001
|
+
pending: "pending";
|
|
4871
5002
|
reloading: "reloading";
|
|
4872
5003
|
}>;
|
|
4873
5004
|
/** Status for config reloads in the long-running app server. */
|
|
@@ -4875,8 +5006,8 @@ type ConfigReloadStatus = z$1.infer<typeof configReloadStatusSchema>;
|
|
|
4875
5006
|
/** UI/API-visible state for config reloads in `agent-evals app`. */
|
|
4876
5007
|
declare const configReloadStateSchema$1: z$1.ZodObject<{
|
|
4877
5008
|
status: z$1.ZodEnum<{
|
|
4878
|
-
pending: "pending";
|
|
4879
5009
|
idle: "idle";
|
|
5010
|
+
pending: "pending";
|
|
4880
5011
|
reloading: "reloading";
|
|
4881
5012
|
}>;
|
|
4882
5013
|
activeRunCount: z$1.ZodNumber;
|
|
@@ -4889,9 +5020,9 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema$1>;
|
|
|
4889
5020
|
declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
4890
5021
|
target: z$1.ZodObject<{
|
|
4891
5022
|
mode: z$1.ZodEnum<{
|
|
4892
|
-
caseIds: "caseIds";
|
|
4893
5023
|
all: "all";
|
|
4894
5024
|
evalIds: "evalIds";
|
|
5025
|
+
caseIds: "caseIds";
|
|
4895
5026
|
}>;
|
|
4896
5027
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
4897
5028
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -4903,9 +5034,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
|
4903
5034
|
temporary: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
4904
5035
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
4905
5036
|
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
4906
|
-
refresh: "refresh";
|
|
4907
|
-
bypass: "bypass";
|
|
4908
5037
|
use: "use";
|
|
5038
|
+
bypass: "bypass";
|
|
5039
|
+
refresh: "refresh";
|
|
4909
5040
|
}>>;
|
|
4910
5041
|
}, z$1.core.$strip>>;
|
|
4911
5042
|
manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
@@ -5242,9 +5373,25 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
5242
5373
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
5243
5374
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5244
5375
|
kind: z$1.ZodLiteral<"duration">;
|
|
5376
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5377
|
+
avg: "avg";
|
|
5378
|
+
min: "min";
|
|
5379
|
+
max: "max";
|
|
5380
|
+
sum: "sum";
|
|
5381
|
+
best: "best";
|
|
5382
|
+
worst: "worst";
|
|
5383
|
+
}>>;
|
|
5245
5384
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
5246
5385
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5247
5386
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
5387
|
+
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5388
|
+
avg: "avg";
|
|
5389
|
+
min: "min";
|
|
5390
|
+
max: "max";
|
|
5391
|
+
sum: "sum";
|
|
5392
|
+
best: "best";
|
|
5393
|
+
worst: "worst";
|
|
5394
|
+
}>>;
|
|
5248
5395
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
5249
5396
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5250
5397
|
kind: z$1.ZodLiteral<"column">;
|
|
@@ -5255,7 +5402,8 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
5255
5402
|
min: "min";
|
|
5256
5403
|
max: "max";
|
|
5257
5404
|
sum: "sum";
|
|
5258
|
-
|
|
5405
|
+
best: "best";
|
|
5406
|
+
worst: "worst";
|
|
5259
5407
|
}>;
|
|
5260
5408
|
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5261
5409
|
number: "number";
|
|
@@ -5276,6 +5424,14 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
5276
5424
|
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions$1, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions$1, unknown>>>;
|
|
5277
5425
|
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5278
5426
|
}, z$1.core.$strip>], "kind">>>;
|
|
5427
|
+
defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5428
|
+
avg: "avg";
|
|
5429
|
+
min: "min";
|
|
5430
|
+
max: "max";
|
|
5431
|
+
sum: "sum";
|
|
5432
|
+
best: "best";
|
|
5433
|
+
worst: "worst";
|
|
5434
|
+
}>>;
|
|
5279
5435
|
charts: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
5280
5436
|
heading: z$1.ZodOptional<z$1.ZodString>;
|
|
5281
5437
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -5555,8 +5711,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
5555
5711
|
}>>;
|
|
5556
5712
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5557
5713
|
sum: "sum";
|
|
5558
|
-
last: "last";
|
|
5559
5714
|
all: "all";
|
|
5715
|
+
last: "last";
|
|
5560
5716
|
}>>;
|
|
5561
5717
|
}, z$1.core.$strip>>>;
|
|
5562
5718
|
}, z$1.core.$strip>;
|
|
@@ -5624,8 +5780,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
5624
5780
|
}>>;
|
|
5625
5781
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5626
5782
|
sum: "sum";
|
|
5627
|
-
last: "last";
|
|
5628
5783
|
all: "all";
|
|
5784
|
+
last: "last";
|
|
5629
5785
|
}>>;
|
|
5630
5786
|
}, z$1.core.$strip>>>;
|
|
5631
5787
|
}, z$1.core.$strip>;
|
|
@@ -6343,6 +6499,22 @@ type EvalRunner = {
|
|
|
6343
6499
|
deleteRun(runId: string): Promise<{
|
|
6344
6500
|
deleted: boolean;
|
|
6345
6501
|
}>;
|
|
6502
|
+
/**
|
|
6503
|
+
* Convert a temporary persisted run into durable run history.
|
|
6504
|
+
*
|
|
6505
|
+
* Returns the updated run when found. Already-durable runs are treated as a
|
|
6506
|
+
* no-op success so UI callers can refresh their cached copy idempotently.
|
|
6507
|
+
*/
|
|
6508
|
+
promoteRun(runId: string): Promise<{
|
|
6509
|
+
promoted: boolean;
|
|
6510
|
+
run: {
|
|
6511
|
+
manifest: RunManifest$1;
|
|
6512
|
+
summary: RunSummary$1;
|
|
6513
|
+
cases: CaseRow$1[];
|
|
6514
|
+
};
|
|
6515
|
+
} | {
|
|
6516
|
+
promoted: false;
|
|
6517
|
+
}>;
|
|
6346
6518
|
/**
|
|
6347
6519
|
* Validate a `CreateRunRequest`'s `manualInputs` map against each targeted
|
|
6348
6520
|
* eval's authored `manualInput.schema`. Returns `ok: true` with the parsed
|
|
@@ -6463,4 +6635,4 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
|
|
|
6463
6635
|
/** Return whether the active eval case has tags matching the typed input. */
|
|
6464
6636
|
declare function matchesEvalTags(input: EvalTagMatchInput): boolean;
|
|
6465
6637
|
//#endregion
|
|
6466
|
-
export {
|
|
6638
|
+
export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|