@ls-stack/agent-eval 0.58.1 → 0.58.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-DhMIbjlE.mjs → app-ROCEce9X.mjs} +52 -7
- package/dist/apps/web/dist/assets/index-CHH7m5Cv.css +1 -0
- package/dist/apps/web/dist/assets/index-PTikBbhf.js +377 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +2 -1
- package/dist/{cli-_g2qOMK6.mjs → cli-SP4kEtYL.mjs} +31 -5
- package/dist/index.d.mts +184 -129
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +2 -2
- package/dist/{runExecution-d42Lm0i5.mjs → runExecution-CFw0MQFs.mjs} +114 -21
- package/dist/{runOrchestration-CvmFeOmT.mjs → runOrchestration-CxjiQmof.mjs} +73 -6
- package/dist/{runner-BKogjiYd.mjs → runner-BlFQyvN2.mjs} +1 -1
- package/dist/{runner-MSr8sAWm.mjs → runner-CY3bgsjU.mjs} +2 -2
- package/dist/{src-CdZsOn6y.mjs → src-7GbQj1sb.mjs} +2 -2
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +19 -3
- package/dist/apps/web/dist/assets/index-Cz9p4l-t.js +0 -377
- package/dist/apps/web/dist/assets/index-DtARRwsS.css +0 -1
package/dist/index.d.mts
CHANGED
|
@@ -268,6 +268,7 @@ declare const runLogEntrySchema$1: z$1.ZodObject<{
|
|
|
268
268
|
phase: z$1.ZodEnum<{
|
|
269
269
|
eval: "eval";
|
|
270
270
|
derive: "derive";
|
|
271
|
+
tracingAssertions: "tracingAssertions";
|
|
271
272
|
outputsSchema: "outputsSchema";
|
|
272
273
|
scorer: "scorer";
|
|
273
274
|
}>;
|
|
@@ -399,10 +400,17 @@ type EvalCase$1$1<TInput = unknown> = {
|
|
|
399
400
|
};
|
|
400
401
|
/** Query helpers built from the flattened trace recorded for one eval case. */
|
|
401
402
|
type EvalTraceTree = {
|
|
402
|
-
spans: EvalTraceSpan$2[];
|
|
403
|
-
rootSpans: EvalTraceSpan$2[];
|
|
404
|
-
findSpan: (name: string) => EvalTraceSpan$2 | undefined;
|
|
405
|
-
|
|
403
|
+
/** Flat span list in creation order. */spans: EvalTraceSpan$2[]; /** Top-level spans whose `parentId` is `null`. */
|
|
404
|
+
rootSpans: EvalTraceSpan$2[]; /** Return the first span whose name exactly matches `name`. */
|
|
405
|
+
findSpan: (name: string) => EvalTraceSpan$2 | undefined; /** Return every span whose name exactly matches `name`. */
|
|
406
|
+
findSpans: (name: string) => EvalTraceSpan$2[]; /** Return whether any span name exactly matches `name`. */
|
|
407
|
+
hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
|
|
408
|
+
findSpansByKind: (kind: string) => EvalTraceSpan$2[]; /** Return every span with `kind: 'tool'`. */
|
|
409
|
+
findToolCallSpans: () => EvalTraceSpan$2[]; /** Return the names of every span with `kind: 'tool'`. */
|
|
410
|
+
listToolCallSpanNames: () => string[]; /** Return whether a `kind: 'tool'` span has a name exactly matching `name`. */
|
|
411
|
+
hasToolCallSpan: (name: string) => boolean; /** Return span names in creation order, optionally filtered by kind. */
|
|
412
|
+
listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
|
|
413
|
+
listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
|
|
406
414
|
flattenDfs: () => EvalTraceSpan$2[];
|
|
407
415
|
checkpoints: Map<string, unknown>;
|
|
408
416
|
};
|
|
@@ -421,6 +429,10 @@ type EvalDeriveMap<TInput = unknown> = Record<string, EvalDeriveValueFn<TInput>>
|
|
|
421
429
|
type EvalDeriveFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => Record<string, unknown> | Promise<Record<string, unknown>>;
|
|
422
430
|
/** Trace-derived output config accepted globally and on eval definitions. */
|
|
423
431
|
type EvalDeriveConfig<TInput = unknown> = EvalDeriveMap<TInput> | EvalDeriveFn<TInput>;
|
|
432
|
+
/** Function that records trace-derived assertions for one case. */
|
|
433
|
+
type EvalTracingAssertionsFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => MaybePromise$1<void>;
|
|
434
|
+
/** Trace-derived assertion config accepted globally and on eval definitions. */
|
|
435
|
+
type EvalTracingAssertionsConfig<TInput = unknown> = EvalTracingAssertionsFn<TInput>;
|
|
424
436
|
/** UI overrides for a derived or scored column emitted by an eval. */
|
|
425
437
|
type EvalColumnOverride = {
|
|
426
438
|
/** Display label shown for the column in tables and detail views. */label?: string;
|
|
@@ -1142,9 +1154,18 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
|
|
|
1142
1154
|
*
|
|
1143
1155
|
* Prefer the keyed map form when each key has one derivation. The
|
|
1144
1156
|
* object-returning callback form is also supported. Derived values only fill
|
|
1145
|
-
* keys not already recorded during execution.
|
|
1157
|
+
* keys not already recorded during execution. Assertion helpers are not
|
|
1158
|
+
* allowed here; use `tracingAssertions` for trace-derived pass/fail checks.
|
|
1146
1159
|
*/
|
|
1147
1160
|
deriveFromTracing?: EvalDeriveConfig<TInput>;
|
|
1161
|
+
/**
|
|
1162
|
+
* Record assertions from the finished execution trace.
|
|
1163
|
+
*
|
|
1164
|
+
* Runs after `deriveFromTracing` and before output schema validation and
|
|
1165
|
+
* scores. Use `evalAssert(...)` or `evalExpect(...)` inside the callback to
|
|
1166
|
+
* write normal assertion results without creating score columns.
|
|
1167
|
+
*/
|
|
1168
|
+
tracingAssertions?: EvalTracingAssertionsConfig<TInput>;
|
|
1148
1169
|
/**
|
|
1149
1170
|
* Computed score columns for each case.
|
|
1150
1171
|
*
|
|
@@ -1503,12 +1524,16 @@ type EvalCaseScope = {
|
|
|
1503
1524
|
* covers run-time module/environment loading, including top-level code in
|
|
1504
1525
|
* modules imported while a run is being prepared.
|
|
1505
1526
|
*/
|
|
1506
|
-
type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'outputsSchema' | 'scorer';
|
|
1527
|
+
type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'tracingAssertions' | 'outputsSchema' | 'scorer';
|
|
1507
1528
|
type EvalLogLevelInput = RunLogLevel$1 | 'warning';
|
|
1508
1529
|
/** Error thrown when an eval assertion fails during case execution. */
|
|
1509
1530
|
declare class EvalAssertionError extends Error {
|
|
1510
1531
|
constructor(message: string);
|
|
1511
1532
|
}
|
|
1533
|
+
/** Error thrown when an SDK helper is used in an unsupported runner phase. */
|
|
1534
|
+
declare class EvalRuntimeUsageError extends Error {
|
|
1535
|
+
constructor(message: string);
|
|
1536
|
+
}
|
|
1512
1537
|
/** Return the host process clock, bypassing the eval Date shim. */
|
|
1513
1538
|
/**
|
|
1514
1539
|
* Eval time helpers for reading and moving the active eval clock.
|
|
@@ -1539,8 +1564,10 @@ declare function getCurrentScope(): EvalCaseScope | undefined;
|
|
|
1539
1564
|
*
|
|
1540
1565
|
* Returns `null` outside eval-owned work, `env` while the runner is loading
|
|
1541
1566
|
* eval modules for a run, `cases` while generating cases, `eval` while running
|
|
1542
|
-
* case `execute`, `derive` while deriving outputs from traces,
|
|
1543
|
-
*
|
|
1567
|
+
* case `execute`, `derive` while deriving outputs from traces,
|
|
1568
|
+
* `tracingAssertions` while checking trace-derived assertions,
|
|
1569
|
+
* `outputsSchema` while validating outputs, and `scorer` while computing
|
|
1570
|
+
* scores.
|
|
1544
1571
|
*/
|
|
1545
1572
|
declare function isInEvalScope(): EvalRuntimeScope | null;
|
|
1546
1573
|
/**
|
|
@@ -1656,7 +1683,8 @@ declare function incrementEvalOutput(key: string, delta: number): void;
|
|
|
1656
1683
|
* Calls made outside `runInEvalScope(...)` are ignored so shared workflow code
|
|
1657
1684
|
* can safely reuse `evalAssert(...)` when it also runs outside an eval. The
|
|
1658
1685
|
* TypeScript assertion signature still narrows the checked value after the
|
|
1659
|
-
* call.
|
|
1686
|
+
* call. Calls inside `deriveFromTracing` throw because derivations must only
|
|
1687
|
+
* write outputs; use `tracingAssertions` for trace-derived pass/fail checks.
|
|
1660
1688
|
*/
|
|
1661
1689
|
declare function evalAssert(condition: unknown, message: string): asserts condition; //#endregion
|
|
1662
1690
|
//#region src/valueCache.d.ts
|
|
@@ -2014,9 +2042,9 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
|
|
|
2014
2042
|
subtree: "subtree";
|
|
2015
2043
|
}>>;
|
|
2016
2044
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2017
|
-
sum: "sum";
|
|
2018
2045
|
all: "all";
|
|
2019
2046
|
last: "last";
|
|
2047
|
+
sum: "sum";
|
|
2020
2048
|
}>>;
|
|
2021
2049
|
}, z$1.core.$strip>;
|
|
2022
2050
|
/**
|
|
@@ -2050,9 +2078,9 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
|
|
|
2050
2078
|
subtree: "subtree";
|
|
2051
2079
|
}>>;
|
|
2052
2080
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2053
|
-
sum: "sum";
|
|
2054
2081
|
all: "all";
|
|
2055
2082
|
last: "last";
|
|
2083
|
+
sum: "sum";
|
|
2056
2084
|
}>>;
|
|
2057
2085
|
}, z$1.core.$strip>>>;
|
|
2058
2086
|
}, z$1.core.$strip>;
|
|
@@ -2090,9 +2118,9 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
|
|
|
2090
2118
|
subtree: "subtree";
|
|
2091
2119
|
}>>;
|
|
2092
2120
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2093
|
-
sum: "sum";
|
|
2094
2121
|
all: "all";
|
|
2095
2122
|
last: "last";
|
|
2123
|
+
sum: "sum";
|
|
2096
2124
|
}>>;
|
|
2097
2125
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
2098
2126
|
}, z$1.core.$strip>;
|
|
@@ -2128,9 +2156,9 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
|
|
|
2128
2156
|
subtree: "subtree";
|
|
2129
2157
|
}>>;
|
|
2130
2158
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2131
|
-
sum: "sum";
|
|
2132
2159
|
all: "all";
|
|
2133
2160
|
last: "last";
|
|
2161
|
+
sum: "sum";
|
|
2134
2162
|
}>>;
|
|
2135
2163
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
2136
2164
|
}, z$1.core.$strip>>>;
|
|
@@ -2167,8 +2195,8 @@ declare const traceSpanSchema$1: z$1.ZodObject<{
|
|
|
2167
2195
|
status: z$1.ZodEnum<{
|
|
2168
2196
|
error: "error";
|
|
2169
2197
|
running: "running";
|
|
2170
|
-
cancelled: "cancelled";
|
|
2171
2198
|
ok: "ok";
|
|
2199
|
+
cancelled: "cancelled";
|
|
2172
2200
|
}>;
|
|
2173
2201
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
2174
2202
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -2213,10 +2241,10 @@ type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
|
|
|
2213
2241
|
* `best` selects the highest finite value and `worst` selects the lowest.
|
|
2214
2242
|
*/
|
|
2215
2243
|
declare const evalStatAggregateSchema: z$1.ZodEnum<{
|
|
2216
|
-
|
|
2244
|
+
sum: "sum";
|
|
2217
2245
|
min: "min";
|
|
2218
2246
|
max: "max";
|
|
2219
|
-
|
|
2247
|
+
avg: "avg";
|
|
2220
2248
|
best: "best";
|
|
2221
2249
|
worst: "worst";
|
|
2222
2250
|
}>;
|
|
@@ -2245,10 +2273,10 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2245
2273
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2246
2274
|
kind: z$1.ZodLiteral<"duration">;
|
|
2247
2275
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2248
|
-
|
|
2276
|
+
sum: "sum";
|
|
2249
2277
|
min: "min";
|
|
2250
2278
|
max: "max";
|
|
2251
|
-
|
|
2279
|
+
avg: "avg";
|
|
2252
2280
|
best: "best";
|
|
2253
2281
|
worst: "worst";
|
|
2254
2282
|
}>>;
|
|
@@ -2256,10 +2284,10 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2256
2284
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2257
2285
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2258
2286
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2259
|
-
|
|
2287
|
+
sum: "sum";
|
|
2260
2288
|
min: "min";
|
|
2261
2289
|
max: "max";
|
|
2262
|
-
|
|
2290
|
+
avg: "avg";
|
|
2263
2291
|
best: "best";
|
|
2264
2292
|
worst: "worst";
|
|
2265
2293
|
}>>;
|
|
@@ -2269,10 +2297,10 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2269
2297
|
key: z$1.ZodString;
|
|
2270
2298
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2271
2299
|
aggregate: z$1.ZodEnum<{
|
|
2272
|
-
|
|
2300
|
+
sum: "sum";
|
|
2273
2301
|
min: "min";
|
|
2274
2302
|
max: "max";
|
|
2275
|
-
|
|
2303
|
+
avg: "avg";
|
|
2276
2304
|
best: "best";
|
|
2277
2305
|
worst: "worst";
|
|
2278
2306
|
}>;
|
|
@@ -2309,10 +2337,10 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2309
2337
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2310
2338
|
kind: z$1.ZodLiteral<"duration">;
|
|
2311
2339
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2312
|
-
|
|
2340
|
+
sum: "sum";
|
|
2313
2341
|
min: "min";
|
|
2314
2342
|
max: "max";
|
|
2315
|
-
|
|
2343
|
+
avg: "avg";
|
|
2316
2344
|
best: "best";
|
|
2317
2345
|
worst: "worst";
|
|
2318
2346
|
}>>;
|
|
@@ -2320,10 +2348,10 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2320
2348
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2321
2349
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2322
2350
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2323
|
-
|
|
2351
|
+
sum: "sum";
|
|
2324
2352
|
min: "min";
|
|
2325
2353
|
max: "max";
|
|
2326
|
-
|
|
2354
|
+
avg: "avg";
|
|
2327
2355
|
best: "best";
|
|
2328
2356
|
worst: "worst";
|
|
2329
2357
|
}>>;
|
|
@@ -2333,10 +2361,10 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2333
2361
|
key: z$1.ZodString;
|
|
2334
2362
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2335
2363
|
aggregate: z$1.ZodEnum<{
|
|
2336
|
-
|
|
2364
|
+
sum: "sum";
|
|
2337
2365
|
min: "min";
|
|
2338
2366
|
max: "max";
|
|
2339
|
-
|
|
2367
|
+
avg: "avg";
|
|
2340
2368
|
best: "best";
|
|
2341
2369
|
worst: "worst";
|
|
2342
2370
|
}>;
|
|
@@ -2419,10 +2447,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2419
2447
|
caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2420
2448
|
lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
|
|
2421
2449
|
error: "error";
|
|
2422
|
-
pass: "pass";
|
|
2423
|
-
fail: "fail";
|
|
2424
2450
|
running: "running";
|
|
2425
2451
|
cancelled: "cancelled";
|
|
2452
|
+
pass: "pass";
|
|
2453
|
+
fail: "fail";
|
|
2426
2454
|
unscored: "unscored";
|
|
2427
2455
|
}>>;
|
|
2428
2456
|
stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
@@ -2436,10 +2464,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2436
2464
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2437
2465
|
kind: z$1.ZodLiteral<"duration">;
|
|
2438
2466
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2439
|
-
|
|
2467
|
+
sum: "sum";
|
|
2440
2468
|
min: "min";
|
|
2441
2469
|
max: "max";
|
|
2442
|
-
|
|
2470
|
+
avg: "avg";
|
|
2443
2471
|
best: "best";
|
|
2444
2472
|
worst: "worst";
|
|
2445
2473
|
}>>;
|
|
@@ -2447,10 +2475,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2447
2475
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2448
2476
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2449
2477
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2450
|
-
|
|
2478
|
+
sum: "sum";
|
|
2451
2479
|
min: "min";
|
|
2452
2480
|
max: "max";
|
|
2453
|
-
|
|
2481
|
+
avg: "avg";
|
|
2454
2482
|
best: "best";
|
|
2455
2483
|
worst: "worst";
|
|
2456
2484
|
}>>;
|
|
@@ -2460,10 +2488,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2460
2488
|
key: z$1.ZodString;
|
|
2461
2489
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2462
2490
|
aggregate: z$1.ZodEnum<{
|
|
2463
|
-
|
|
2491
|
+
sum: "sum";
|
|
2464
2492
|
min: "min";
|
|
2465
2493
|
max: "max";
|
|
2466
|
-
|
|
2494
|
+
avg: "avg";
|
|
2467
2495
|
best: "best";
|
|
2468
2496
|
worst: "worst";
|
|
2469
2497
|
}>;
|
|
@@ -2487,10 +2515,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2487
2515
|
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2488
2516
|
}, z$1.core.$strip>], "kind">>>;
|
|
2489
2517
|
defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2490
|
-
|
|
2518
|
+
sum: "sum";
|
|
2491
2519
|
min: "min";
|
|
2492
2520
|
max: "max";
|
|
2493
|
-
|
|
2521
|
+
avg: "avg";
|
|
2494
2522
|
best: "best";
|
|
2495
2523
|
worst: "worst";
|
|
2496
2524
|
}>>;
|
|
@@ -2511,11 +2539,11 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2511
2539
|
}>;
|
|
2512
2540
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2513
2541
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2514
|
-
success: "success";
|
|
2515
2542
|
error: "error";
|
|
2543
|
+
success: "success";
|
|
2544
|
+
warning: "warning";
|
|
2516
2545
|
accent: "accent";
|
|
2517
2546
|
accentDim: "accentDim";
|
|
2518
|
-
warning: "warning";
|
|
2519
2547
|
textMuted: "textMuted";
|
|
2520
2548
|
}>>;
|
|
2521
2549
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -2526,20 +2554,20 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2526
2554
|
source: z$1.ZodLiteral<"column">;
|
|
2527
2555
|
key: z$1.ZodString;
|
|
2528
2556
|
aggregate: z$1.ZodEnum<{
|
|
2529
|
-
|
|
2557
|
+
sum: "sum";
|
|
2530
2558
|
min: "min";
|
|
2531
2559
|
max: "max";
|
|
2532
|
-
|
|
2560
|
+
avg: "avg";
|
|
2533
2561
|
latest: "latest";
|
|
2534
2562
|
passThresholdRate: "passThresholdRate";
|
|
2535
2563
|
}>;
|
|
2536
2564
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2537
2565
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2538
|
-
success: "success";
|
|
2539
2566
|
error: "error";
|
|
2567
|
+
success: "success";
|
|
2568
|
+
warning: "warning";
|
|
2540
2569
|
accent: "accent";
|
|
2541
2570
|
accentDim: "accentDim";
|
|
2542
|
-
warning: "warning";
|
|
2543
2571
|
textMuted: "textMuted";
|
|
2544
2572
|
}>>;
|
|
2545
2573
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -2568,10 +2596,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2568
2596
|
source: z$1.ZodLiteral<"column">;
|
|
2569
2597
|
key: z$1.ZodString;
|
|
2570
2598
|
aggregate: z$1.ZodEnum<{
|
|
2571
|
-
|
|
2599
|
+
sum: "sum";
|
|
2572
2600
|
min: "min";
|
|
2573
2601
|
max: "max";
|
|
2574
|
-
|
|
2602
|
+
avg: "avg";
|
|
2575
2603
|
latest: "latest";
|
|
2576
2604
|
passThresholdRate: "passThresholdRate";
|
|
2577
2605
|
}>;
|
|
@@ -2668,11 +2696,11 @@ declare const caseRowSchema$1: z$1.ZodObject<{
|
|
|
2668
2696
|
tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2669
2697
|
status: z$1.ZodEnum<{
|
|
2670
2698
|
error: "error";
|
|
2671
|
-
pass: "pass";
|
|
2672
|
-
fail: "fail";
|
|
2673
2699
|
running: "running";
|
|
2674
2700
|
cancelled: "cancelled";
|
|
2675
2701
|
pending: "pending";
|
|
2702
|
+
pass: "pass";
|
|
2703
|
+
fail: "fail";
|
|
2676
2704
|
}>;
|
|
2677
2705
|
durationMs: z$1.ZodNullable<z$1.ZodNumber>;
|
|
2678
2706
|
cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
|
|
@@ -2753,6 +2781,7 @@ type RunLogLevel = z$1.infer<typeof runLogLevelSchema>;
|
|
|
2753
2781
|
declare const runLogPhaseSchema: z$1.ZodEnum<{
|
|
2754
2782
|
eval: "eval";
|
|
2755
2783
|
derive: "derive";
|
|
2784
|
+
tracingAssertions: "tracingAssertions";
|
|
2756
2785
|
outputsSchema: "outputsSchema";
|
|
2757
2786
|
scorer: "scorer";
|
|
2758
2787
|
}>;
|
|
@@ -2779,6 +2808,7 @@ declare const runLogEntrySchema: z$1.ZodObject<{
|
|
|
2779
2808
|
phase: z$1.ZodEnum<{
|
|
2780
2809
|
eval: "eval";
|
|
2781
2810
|
derive: "derive";
|
|
2811
|
+
tracingAssertions: "tracingAssertions";
|
|
2782
2812
|
outputsSchema: "outputsSchema";
|
|
2783
2813
|
scorer: "scorer";
|
|
2784
2814
|
}>;
|
|
@@ -2808,8 +2838,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2808
2838
|
status: z$1.ZodEnum<{
|
|
2809
2839
|
error: "error";
|
|
2810
2840
|
running: "running";
|
|
2811
|
-
cancelled: "cancelled";
|
|
2812
2841
|
ok: "ok";
|
|
2842
|
+
cancelled: "cancelled";
|
|
2813
2843
|
}>;
|
|
2814
2844
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
2815
2845
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -2859,9 +2889,9 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2859
2889
|
subtree: "subtree";
|
|
2860
2890
|
}>>;
|
|
2861
2891
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2862
|
-
sum: "sum";
|
|
2863
2892
|
all: "all";
|
|
2864
2893
|
last: "last";
|
|
2894
|
+
sum: "sum";
|
|
2865
2895
|
}>>;
|
|
2866
2896
|
}, z$1.core.$strip>>>;
|
|
2867
2897
|
}, z$1.core.$strip>;
|
|
@@ -2871,10 +2901,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2871
2901
|
namespace: z$1.ZodString;
|
|
2872
2902
|
key: z$1.ZodString;
|
|
2873
2903
|
status: z$1.ZodEnum<{
|
|
2904
|
+
bypass: "bypass";
|
|
2905
|
+
refresh: "refresh";
|
|
2874
2906
|
hit: "hit";
|
|
2875
2907
|
miss: "miss";
|
|
2876
|
-
refresh: "refresh";
|
|
2877
|
-
bypass: "bypass";
|
|
2878
2908
|
}>;
|
|
2879
2909
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2880
2910
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2893,11 +2923,11 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2893
2923
|
tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2894
2924
|
status: z$1.ZodEnum<{
|
|
2895
2925
|
error: "error";
|
|
2896
|
-
pass: "pass";
|
|
2897
|
-
fail: "fail";
|
|
2898
2926
|
running: "running";
|
|
2899
2927
|
cancelled: "cancelled";
|
|
2900
2928
|
pending: "pending";
|
|
2929
|
+
pass: "pass";
|
|
2930
|
+
fail: "fail";
|
|
2901
2931
|
}>;
|
|
2902
2932
|
input: z$1.ZodUnknown;
|
|
2903
2933
|
trace: z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -2911,8 +2941,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2911
2941
|
status: z$1.ZodEnum<{
|
|
2912
2942
|
error: "error";
|
|
2913
2943
|
running: "running";
|
|
2914
|
-
cancelled: "cancelled";
|
|
2915
2944
|
ok: "ok";
|
|
2945
|
+
cancelled: "cancelled";
|
|
2916
2946
|
}>;
|
|
2917
2947
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
2918
2948
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -2962,9 +2992,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2962
2992
|
subtree: "subtree";
|
|
2963
2993
|
}>>;
|
|
2964
2994
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2965
|
-
sum: "sum";
|
|
2966
2995
|
all: "all";
|
|
2967
2996
|
last: "last";
|
|
2997
|
+
sum: "sum";
|
|
2968
2998
|
}>>;
|
|
2969
2999
|
}, z$1.core.$strip>>>;
|
|
2970
3000
|
}, z$1.core.$strip>;
|
|
@@ -2980,8 +3010,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2980
3010
|
status: z$1.ZodEnum<{
|
|
2981
3011
|
error: "error";
|
|
2982
3012
|
running: "running";
|
|
2983
|
-
cancelled: "cancelled";
|
|
2984
3013
|
ok: "ok";
|
|
3014
|
+
cancelled: "cancelled";
|
|
2985
3015
|
}>;
|
|
2986
3016
|
attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
2987
3017
|
error: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -3031,9 +3061,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3031
3061
|
subtree: "subtree";
|
|
3032
3062
|
}>>;
|
|
3033
3063
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3034
|
-
sum: "sum";
|
|
3035
3064
|
all: "all";
|
|
3036
3065
|
last: "last";
|
|
3066
|
+
sum: "sum";
|
|
3037
3067
|
}>>;
|
|
3038
3068
|
}, z$1.core.$strip>>>;
|
|
3039
3069
|
}, z$1.core.$strip>;
|
|
@@ -3043,10 +3073,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3043
3073
|
namespace: z$1.ZodString;
|
|
3044
3074
|
key: z$1.ZodString;
|
|
3045
3075
|
status: z$1.ZodEnum<{
|
|
3076
|
+
bypass: "bypass";
|
|
3077
|
+
refresh: "refresh";
|
|
3046
3078
|
hit: "hit";
|
|
3047
3079
|
miss: "miss";
|
|
3048
|
-
refresh: "refresh";
|
|
3049
|
-
bypass: "bypass";
|
|
3050
3080
|
}>;
|
|
3051
3081
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
3052
3082
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -3137,6 +3167,7 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3137
3167
|
phase: z$1.ZodEnum<{
|
|
3138
3168
|
eval: "eval";
|
|
3139
3169
|
derive: "derive";
|
|
3170
|
+
tracingAssertions: "tracingAssertions";
|
|
3140
3171
|
outputsSchema: "outputsSchema";
|
|
3141
3172
|
scorer: "scorer";
|
|
3142
3173
|
}>;
|
|
@@ -3163,10 +3194,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3163
3194
|
namespace: z$1.ZodString;
|
|
3164
3195
|
key: z$1.ZodString;
|
|
3165
3196
|
status: z$1.ZodEnum<{
|
|
3197
|
+
bypass: "bypass";
|
|
3198
|
+
refresh: "refresh";
|
|
3166
3199
|
hit: "hit";
|
|
3167
3200
|
miss: "miss";
|
|
3168
|
-
refresh: "refresh";
|
|
3169
|
-
bypass: "bypass";
|
|
3170
3201
|
}>;
|
|
3171
3202
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
3172
3203
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -3219,10 +3250,10 @@ declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
|
|
|
3219
3250
|
type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
|
|
3220
3251
|
/** Reducer applied to a numeric column across all cases of a single run. */
|
|
3221
3252
|
declare const evalChartAggregateSchema: z$1.ZodEnum<{
|
|
3222
|
-
|
|
3253
|
+
sum: "sum";
|
|
3223
3254
|
min: "min";
|
|
3224
3255
|
max: "max";
|
|
3225
|
-
|
|
3256
|
+
avg: "avg";
|
|
3226
3257
|
latest: "latest";
|
|
3227
3258
|
passThresholdRate: "passThresholdRate";
|
|
3228
3259
|
}>;
|
|
@@ -3233,11 +3264,11 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
|
|
|
3233
3264
|
* not emit raw hex so authored evals stay decoupled from the web theme.
|
|
3234
3265
|
*/
|
|
3235
3266
|
declare const evalChartColorSchema: z$1.ZodEnum<{
|
|
3236
|
-
success: "success";
|
|
3237
3267
|
error: "error";
|
|
3268
|
+
success: "success";
|
|
3269
|
+
warning: "warning";
|
|
3238
3270
|
accent: "accent";
|
|
3239
3271
|
accentDim: "accentDim";
|
|
3240
|
-
warning: "warning";
|
|
3241
3272
|
textMuted: "textMuted";
|
|
3242
3273
|
}>;
|
|
3243
3274
|
/** Semantic color token resolved to a theme color by the web UI. */
|
|
@@ -3262,11 +3293,11 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
3262
3293
|
}>;
|
|
3263
3294
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3264
3295
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3265
|
-
success: "success";
|
|
3266
3296
|
error: "error";
|
|
3297
|
+
success: "success";
|
|
3298
|
+
warning: "warning";
|
|
3267
3299
|
accent: "accent";
|
|
3268
3300
|
accentDim: "accentDim";
|
|
3269
|
-
warning: "warning";
|
|
3270
3301
|
textMuted: "textMuted";
|
|
3271
3302
|
}>>;
|
|
3272
3303
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3277,20 +3308,20 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
3277
3308
|
source: z$1.ZodLiteral<"column">;
|
|
3278
3309
|
key: z$1.ZodString;
|
|
3279
3310
|
aggregate: z$1.ZodEnum<{
|
|
3280
|
-
|
|
3311
|
+
sum: "sum";
|
|
3281
3312
|
min: "min";
|
|
3282
3313
|
max: "max";
|
|
3283
|
-
|
|
3314
|
+
avg: "avg";
|
|
3284
3315
|
latest: "latest";
|
|
3285
3316
|
passThresholdRate: "passThresholdRate";
|
|
3286
3317
|
}>;
|
|
3287
3318
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3288
3319
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3289
|
-
success: "success";
|
|
3290
3320
|
error: "error";
|
|
3321
|
+
success: "success";
|
|
3322
|
+
warning: "warning";
|
|
3291
3323
|
accent: "accent";
|
|
3292
3324
|
accentDim: "accentDim";
|
|
3293
|
-
warning: "warning";
|
|
3294
3325
|
textMuted: "textMuted";
|
|
3295
3326
|
}>>;
|
|
3296
3327
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3312,10 +3343,10 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
|
|
|
3312
3343
|
source: z$1.ZodLiteral<"column">;
|
|
3313
3344
|
key: z$1.ZodString;
|
|
3314
3345
|
aggregate: z$1.ZodEnum<{
|
|
3315
|
-
|
|
3346
|
+
sum: "sum";
|
|
3316
3347
|
min: "min";
|
|
3317
3348
|
max: "max";
|
|
3318
|
-
|
|
3349
|
+
avg: "avg";
|
|
3319
3350
|
latest: "latest";
|
|
3320
3351
|
passThresholdRate: "passThresholdRate";
|
|
3321
3352
|
}>;
|
|
@@ -3345,11 +3376,11 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3345
3376
|
}>;
|
|
3346
3377
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3347
3378
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3348
|
-
success: "success";
|
|
3349
3379
|
error: "error";
|
|
3380
|
+
success: "success";
|
|
3381
|
+
warning: "warning";
|
|
3350
3382
|
accent: "accent";
|
|
3351
3383
|
accentDim: "accentDim";
|
|
3352
|
-
warning: "warning";
|
|
3353
3384
|
textMuted: "textMuted";
|
|
3354
3385
|
}>>;
|
|
3355
3386
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3360,20 +3391,20 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3360
3391
|
source: z$1.ZodLiteral<"column">;
|
|
3361
3392
|
key: z$1.ZodString;
|
|
3362
3393
|
aggregate: z$1.ZodEnum<{
|
|
3363
|
-
|
|
3394
|
+
sum: "sum";
|
|
3364
3395
|
min: "min";
|
|
3365
3396
|
max: "max";
|
|
3366
|
-
|
|
3397
|
+
avg: "avg";
|
|
3367
3398
|
latest: "latest";
|
|
3368
3399
|
passThresholdRate: "passThresholdRate";
|
|
3369
3400
|
}>;
|
|
3370
3401
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3371
3402
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3372
|
-
success: "success";
|
|
3373
3403
|
error: "error";
|
|
3404
|
+
success: "success";
|
|
3405
|
+
warning: "warning";
|
|
3374
3406
|
accent: "accent";
|
|
3375
3407
|
accentDim: "accentDim";
|
|
3376
|
-
warning: "warning";
|
|
3377
3408
|
textMuted: "textMuted";
|
|
3378
3409
|
}>>;
|
|
3379
3410
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3402,10 +3433,10 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3402
3433
|
source: z$1.ZodLiteral<"column">;
|
|
3403
3434
|
key: z$1.ZodString;
|
|
3404
3435
|
aggregate: z$1.ZodEnum<{
|
|
3405
|
-
|
|
3436
|
+
sum: "sum";
|
|
3406
3437
|
min: "min";
|
|
3407
3438
|
max: "max";
|
|
3408
|
-
|
|
3439
|
+
avg: "avg";
|
|
3409
3440
|
latest: "latest";
|
|
3410
3441
|
passThresholdRate: "passThresholdRate";
|
|
3411
3442
|
}>;
|
|
@@ -3435,11 +3466,11 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3435
3466
|
}>;
|
|
3436
3467
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3437
3468
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3438
|
-
success: "success";
|
|
3439
3469
|
error: "error";
|
|
3470
|
+
success: "success";
|
|
3471
|
+
warning: "warning";
|
|
3440
3472
|
accent: "accent";
|
|
3441
3473
|
accentDim: "accentDim";
|
|
3442
|
-
warning: "warning";
|
|
3443
3474
|
textMuted: "textMuted";
|
|
3444
3475
|
}>>;
|
|
3445
3476
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3450,20 +3481,20 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3450
3481
|
source: z$1.ZodLiteral<"column">;
|
|
3451
3482
|
key: z$1.ZodString;
|
|
3452
3483
|
aggregate: z$1.ZodEnum<{
|
|
3453
|
-
|
|
3484
|
+
sum: "sum";
|
|
3454
3485
|
min: "min";
|
|
3455
3486
|
max: "max";
|
|
3456
|
-
|
|
3487
|
+
avg: "avg";
|
|
3457
3488
|
latest: "latest";
|
|
3458
3489
|
passThresholdRate: "passThresholdRate";
|
|
3459
3490
|
}>;
|
|
3460
3491
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3461
3492
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3462
|
-
success: "success";
|
|
3463
3493
|
error: "error";
|
|
3494
|
+
success: "success";
|
|
3495
|
+
warning: "warning";
|
|
3464
3496
|
accent: "accent";
|
|
3465
3497
|
accentDim: "accentDim";
|
|
3466
|
-
warning: "warning";
|
|
3467
3498
|
textMuted: "textMuted";
|
|
3468
3499
|
}>>;
|
|
3469
3500
|
axis: z$1.ZodOptional<z$1.ZodEnum<{
|
|
@@ -3492,10 +3523,10 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3492
3523
|
source: z$1.ZodLiteral<"column">;
|
|
3493
3524
|
key: z$1.ZodString;
|
|
3494
3525
|
aggregate: z$1.ZodEnum<{
|
|
3495
|
-
|
|
3526
|
+
sum: "sum";
|
|
3496
3527
|
min: "min";
|
|
3497
3528
|
max: "max";
|
|
3498
|
-
|
|
3529
|
+
avg: "avg";
|
|
3499
3530
|
latest: "latest";
|
|
3500
3531
|
passThresholdRate: "passThresholdRate";
|
|
3501
3532
|
}>;
|
|
@@ -3523,9 +3554,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3523
3554
|
evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
|
|
3524
3555
|
target: z$1.ZodObject<{
|
|
3525
3556
|
mode: z$1.ZodEnum<{
|
|
3526
|
-
caseIds: "caseIds";
|
|
3527
3557
|
all: "all";
|
|
3528
3558
|
evalIds: "evalIds";
|
|
3559
|
+
caseIds: "caseIds";
|
|
3529
3560
|
}>;
|
|
3530
3561
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
3531
3562
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -3539,9 +3570,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3539
3570
|
median: "median";
|
|
3540
3571
|
}>>>;
|
|
3541
3572
|
cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3542
|
-
refresh: "refresh";
|
|
3543
|
-
bypass: "bypass";
|
|
3544
3573
|
use: "use";
|
|
3574
|
+
bypass: "bypass";
|
|
3575
|
+
refresh: "refresh";
|
|
3545
3576
|
}>>;
|
|
3546
3577
|
}, z$1.core.$strip>;
|
|
3547
3578
|
/** Persisted lifecycle metadata for a single eval run. */
|
|
@@ -3610,7 +3641,7 @@ type ScopedCaseSummary = {
|
|
|
3610
3641
|
//#endregion
|
|
3611
3642
|
//#region src/evalStatus.d.ts
|
|
3612
3643
|
/** Display status used for eval, file, and folder UI surfaces. */
|
|
3613
|
-
type EvalDisplayStatus = DerivedStatus | 'stale' | 'outdated' | 'unscored';
|
|
3644
|
+
type EvalDisplayStatus = DerivedStatus | 'enqueued' | 'stale' | 'outdated' | 'unscored';
|
|
3614
3645
|
/**
|
|
3615
3646
|
* Derive the user-facing eval status from the raw latest run result plus
|
|
3616
3647
|
* freshness state.
|
|
@@ -3658,10 +3689,17 @@ type EvalCase$1<TInput = unknown> = {
|
|
|
3658
3689
|
};
|
|
3659
3690
|
/** Query helpers built from the flattened trace recorded for one eval case. */
|
|
3660
3691
|
type EvalTraceTree$1 = {
|
|
3661
|
-
spans: EvalTraceSpan$1[];
|
|
3662
|
-
rootSpans: EvalTraceSpan$1[];
|
|
3663
|
-
findSpan: (name: string) => EvalTraceSpan$1 | undefined;
|
|
3664
|
-
|
|
3692
|
+
/** Flat span list in creation order. */spans: EvalTraceSpan$1[]; /** Top-level spans whose `parentId` is `null`. */
|
|
3693
|
+
rootSpans: EvalTraceSpan$1[]; /** Return the first span whose name exactly matches `name`. */
|
|
3694
|
+
findSpan: (name: string) => EvalTraceSpan$1 | undefined; /** Return every span whose name exactly matches `name`. */
|
|
3695
|
+
findSpans: (name: string) => EvalTraceSpan$1[]; /** Return whether any span name exactly matches `name`. */
|
|
3696
|
+
hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
|
|
3697
|
+
findSpansByKind: (kind: string) => EvalTraceSpan$1[]; /** Return every span with `kind: 'tool'`. */
|
|
3698
|
+
findToolCallSpans: () => EvalTraceSpan$1[]; /** Return the names of every span with `kind: 'tool'`. */
|
|
3699
|
+
listToolCallSpanNames: () => string[]; /** Return whether a `kind: 'tool'` span has a name exactly matching `name`. */
|
|
3700
|
+
hasToolCallSpan: (name: string) => boolean; /** Return span names in creation order, optionally filtered by kind. */
|
|
3701
|
+
listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
|
|
3702
|
+
listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
|
|
3665
3703
|
flattenDfs: () => EvalTraceSpan$1[];
|
|
3666
3704
|
checkpoints: Map<string, unknown>;
|
|
3667
3705
|
};
|
|
@@ -3681,6 +3719,11 @@ type EvalDeriveFn$1<TInput = unknown> = (ctx: EvalDeriveContext$1<TInput>) => Re
|
|
|
3681
3719
|
/** Trace-derived output config accepted globally and on eval definitions. */
|
|
3682
3720
|
type EvalDeriveConfig$1<TInput = unknown> = EvalDeriveMap$1<TInput> | EvalDeriveFn$1<TInput>;
|
|
3683
3721
|
/** Schema for keyed or object-returning trace-derived output config. */
|
|
3722
|
+
/** Function that records trace-derived assertions for one case. */
|
|
3723
|
+
type EvalTracingAssertionsFn$1<TInput = unknown> = (ctx: EvalDeriveContext$1<TInput>) => MaybePromise<void>;
|
|
3724
|
+
/** Trace-derived assertion config accepted globally and on eval definitions. */
|
|
3725
|
+
type EvalTracingAssertionsConfig$1<TInput = unknown> = EvalTracingAssertionsFn$1<TInput>;
|
|
3726
|
+
/** Schema for trace-derived assertion config. */
|
|
3684
3727
|
/** UI overrides for a derived or scored column emitted by an eval. */
|
|
3685
3728
|
type EvalColumnOverride$1 = {
|
|
3686
3729
|
/** Display label shown for the column in tables and detail views. */label?: string;
|
|
@@ -4133,9 +4176,19 @@ type AgentEvalsConfig$1 = {
|
|
|
4133
4176
|
* Prefer the keyed map form for shared metrics:
|
|
4134
4177
|
* `{ toolCalls: ({ trace }) => trace.findSpansByKind('tool').length }`.
|
|
4135
4178
|
* The object-returning function form is also supported. Derived outputs
|
|
4136
|
-
* only fill keys that were not already recorded by eval execution.
|
|
4179
|
+
* only fill keys that were not already recorded by eval execution. Do not
|
|
4180
|
+
* call assertion helpers here; use `tracingAssertions` for trace-derived
|
|
4181
|
+
* pass/fail checks.
|
|
4137
4182
|
*/
|
|
4138
4183
|
deriveFromTracing?: EvalDeriveConfig$1;
|
|
4184
|
+
/**
|
|
4185
|
+
* Workspace-wide assertions derived from the finished execution trace.
|
|
4186
|
+
*
|
|
4187
|
+
* These run after `deriveFromTracing` and before output schema validation and
|
|
4188
|
+
* scores. Use `evalAssert(...)` or `evalExpect(...)` inside the callback to
|
|
4189
|
+
* record normal assertion results without creating fake score columns.
|
|
4190
|
+
*/
|
|
4191
|
+
tracingAssertions?: EvalTracingAssertionsConfig$1;
|
|
4139
4192
|
/**
|
|
4140
4193
|
* Workspace-wide stats prepended to every eval's stats row.
|
|
4141
4194
|
*
|
|
@@ -4466,9 +4519,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
|
|
|
4466
4519
|
* - `refresh`: never read, always write (forces re-execution and overwrites).
|
|
4467
4520
|
*/
|
|
4468
4521
|
declare const cacheModeSchema: z$1.ZodEnum<{
|
|
4469
|
-
refresh: "refresh";
|
|
4470
|
-
bypass: "bypass";
|
|
4471
4522
|
use: "use";
|
|
4523
|
+
bypass: "bypass";
|
|
4524
|
+
refresh: "refresh";
|
|
4472
4525
|
}>;
|
|
4473
4526
|
/** Mode controlling how cached spans behave during a run. */
|
|
4474
4527
|
type CacheMode = z$1.infer<typeof cacheModeSchema>;
|
|
@@ -4482,17 +4535,17 @@ declare const spanCacheOptionsSchema: z$1.ZodObject<{
|
|
|
4482
4535
|
type SpanCacheOptions = z$1.infer<typeof spanCacheOptionsSchema>;
|
|
4483
4536
|
/** Category of operation stored in the eval cache. */
|
|
4484
4537
|
declare const cacheOperationTypeSchema: z$1.ZodEnum<{
|
|
4485
|
-
value: "value";
|
|
4486
4538
|
span: "span";
|
|
4539
|
+
value: "value";
|
|
4487
4540
|
}>;
|
|
4488
4541
|
/** Category of operation stored in the eval cache. */
|
|
4489
4542
|
type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
|
|
4490
4543
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4491
4544
|
declare const cacheStatusSchema: z$1.ZodEnum<{
|
|
4545
|
+
bypass: "bypass";
|
|
4546
|
+
refresh: "refresh";
|
|
4492
4547
|
hit: "hit";
|
|
4493
4548
|
miss: "miss";
|
|
4494
|
-
refresh: "refresh";
|
|
4495
|
-
bypass: "bypass";
|
|
4496
4549
|
}>;
|
|
4497
4550
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4498
4551
|
type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
|
|
@@ -4509,10 +4562,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
|
|
|
4509
4562
|
namespace: z$1.ZodString;
|
|
4510
4563
|
key: z$1.ZodString;
|
|
4511
4564
|
status: z$1.ZodEnum<{
|
|
4565
|
+
bypass: "bypass";
|
|
4566
|
+
refresh: "refresh";
|
|
4512
4567
|
hit: "hit";
|
|
4513
4568
|
miss: "miss";
|
|
4514
|
-
refresh: "refresh";
|
|
4515
|
-
bypass: "bypass";
|
|
4516
4569
|
}>;
|
|
4517
4570
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
4518
4571
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -4620,8 +4673,8 @@ declare const cacheRecordingSchema: z$1.ZodObject<{
|
|
|
4620
4673
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4621
4674
|
error: "error";
|
|
4622
4675
|
running: "running";
|
|
4623
|
-
cancelled: "cancelled";
|
|
4624
4676
|
ok: "ok";
|
|
4677
|
+
cancelled: "cancelled";
|
|
4625
4678
|
}>>;
|
|
4626
4679
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4627
4680
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4708,8 +4761,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
|
|
|
4708
4761
|
key: z$1.ZodString;
|
|
4709
4762
|
namespace: z$1.ZodString;
|
|
4710
4763
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4711
|
-
value: "value";
|
|
4712
4764
|
span: "span";
|
|
4765
|
+
value: "value";
|
|
4713
4766
|
}>>;
|
|
4714
4767
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4715
4768
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4721,8 +4774,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
|
|
|
4721
4774
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4722
4775
|
error: "error";
|
|
4723
4776
|
running: "running";
|
|
4724
|
-
cancelled: "cancelled";
|
|
4725
4777
|
ok: "ok";
|
|
4778
|
+
cancelled: "cancelled";
|
|
4726
4779
|
}>>;
|
|
4727
4780
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4728
4781
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4815,8 +4868,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
4815
4868
|
key: z$1.ZodString;
|
|
4816
4869
|
namespace: z$1.ZodString;
|
|
4817
4870
|
operationType: z$1.ZodEnum<{
|
|
4818
|
-
value: "value";
|
|
4819
4871
|
span: "span";
|
|
4872
|
+
value: "value";
|
|
4820
4873
|
}>;
|
|
4821
4874
|
operationName: z$1.ZodString;
|
|
4822
4875
|
storedAt: z$1.ZodString;
|
|
@@ -4826,8 +4879,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
4826
4879
|
key: z$1.ZodString;
|
|
4827
4880
|
namespace: z$1.ZodString;
|
|
4828
4881
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4829
|
-
value: "value";
|
|
4830
4882
|
span: "span";
|
|
4883
|
+
value: "value";
|
|
4831
4884
|
}>>;
|
|
4832
4885
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4833
4886
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4839,8 +4892,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
4839
4892
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4840
4893
|
error: "error";
|
|
4841
4894
|
running: "running";
|
|
4842
|
-
cancelled: "cancelled";
|
|
4843
4895
|
ok: "ok";
|
|
4896
|
+
cancelled: "cancelled";
|
|
4844
4897
|
}>>;
|
|
4845
4898
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4846
4899
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4933,8 +4986,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4933
4986
|
key: z$1.ZodString;
|
|
4934
4987
|
namespace: z$1.ZodString;
|
|
4935
4988
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4936
|
-
value: "value";
|
|
4937
4989
|
span: "span";
|
|
4990
|
+
value: "value";
|
|
4938
4991
|
}>>;
|
|
4939
4992
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4940
4993
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4946,8 +4999,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4946
4999
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4947
5000
|
error: "error";
|
|
4948
5001
|
running: "running";
|
|
4949
|
-
cancelled: "cancelled";
|
|
4950
5002
|
ok: "ok";
|
|
5003
|
+
cancelled: "cancelled";
|
|
4951
5004
|
}>>;
|
|
4952
5005
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
4953
5006
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5031,8 +5084,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
5031
5084
|
key: z$1.ZodString;
|
|
5032
5085
|
namespace: z$1.ZodString;
|
|
5033
5086
|
operationType: z$1.ZodEnum<{
|
|
5034
|
-
value: "value";
|
|
5035
5087
|
span: "span";
|
|
5088
|
+
value: "value";
|
|
5036
5089
|
}>;
|
|
5037
5090
|
operationName: z$1.ZodString;
|
|
5038
5091
|
storedAt: z$1.ZodString;
|
|
@@ -5042,8 +5095,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
5042
5095
|
key: z$1.ZodString;
|
|
5043
5096
|
namespace: z$1.ZodString;
|
|
5044
5097
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5045
|
-
value: "value";
|
|
5046
5098
|
span: "span";
|
|
5099
|
+
value: "value";
|
|
5047
5100
|
}>>;
|
|
5048
5101
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
5049
5102
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5055,8 +5108,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
5055
5108
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5056
5109
|
error: "error";
|
|
5057
5110
|
running: "running";
|
|
5058
|
-
cancelled: "cancelled";
|
|
5059
5111
|
ok: "ok";
|
|
5112
|
+
cancelled: "cancelled";
|
|
5060
5113
|
}>>;
|
|
5061
5114
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
5062
5115
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5149,8 +5202,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
5149
5202
|
key: z$1.ZodString;
|
|
5150
5203
|
namespace: z$1.ZodString;
|
|
5151
5204
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5152
|
-
value: "value";
|
|
5153
5205
|
span: "span";
|
|
5206
|
+
value: "value";
|
|
5154
5207
|
}>>;
|
|
5155
5208
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
5156
5209
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5162,8 +5215,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
5162
5215
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5163
5216
|
error: "error";
|
|
5164
5217
|
running: "running";
|
|
5165
|
-
cancelled: "cancelled";
|
|
5166
5218
|
ok: "ok";
|
|
5219
|
+
cancelled: "cancelled";
|
|
5167
5220
|
}>>;
|
|
5168
5221
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
5169
5222
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5255,8 +5308,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
5255
5308
|
key: z$1.ZodString;
|
|
5256
5309
|
namespace: z$1.ZodString;
|
|
5257
5310
|
operationType: z$1.ZodEnum<{
|
|
5258
|
-
value: "value";
|
|
5259
5311
|
span: "span";
|
|
5312
|
+
value: "value";
|
|
5260
5313
|
}>;
|
|
5261
5314
|
operationName: z$1.ZodString;
|
|
5262
5315
|
storedAt: z$1.ZodString;
|
|
@@ -5266,8 +5319,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
5266
5319
|
key: z$1.ZodString;
|
|
5267
5320
|
namespace: z$1.ZodString;
|
|
5268
5321
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5269
|
-
value: "value";
|
|
5270
5322
|
span: "span";
|
|
5323
|
+
value: "value";
|
|
5271
5324
|
}>>;
|
|
5272
5325
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
5273
5326
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5279,8 +5332,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
5279
5332
|
finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5280
5333
|
error: "error";
|
|
5281
5334
|
running: "running";
|
|
5282
|
-
cancelled: "cancelled";
|
|
5283
5335
|
ok: "ok";
|
|
5336
|
+
cancelled: "cancelled";
|
|
5284
5337
|
}>>;
|
|
5285
5338
|
finalError: z$1.ZodOptional<z$1.ZodObject<{
|
|
5286
5339
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5441,8 +5494,8 @@ type SseEnvelope = z$1.infer<typeof sseEnvelopeSchema$1>; //#endregion
|
|
|
5441
5494
|
//#region src/schemas/api.d.ts
|
|
5442
5495
|
/** Lifecycle state for an app config reload triggered by `agent-evals.config.ts`. */
|
|
5443
5496
|
declare const configReloadStatusSchema: z$1.ZodEnum<{
|
|
5444
|
-
pending: "pending";
|
|
5445
5497
|
idle: "idle";
|
|
5498
|
+
pending: "pending";
|
|
5446
5499
|
reloading: "reloading";
|
|
5447
5500
|
}>;
|
|
5448
5501
|
/** Status for config reloads in the long-running app server. */
|
|
@@ -5450,8 +5503,8 @@ type ConfigReloadStatus = z$1.infer<typeof configReloadStatusSchema>;
|
|
|
5450
5503
|
/** UI/API-visible state for config reloads in `agent-evals app`. */
|
|
5451
5504
|
declare const configReloadStateSchema$1: z$1.ZodObject<{
|
|
5452
5505
|
status: z$1.ZodEnum<{
|
|
5453
|
-
pending: "pending";
|
|
5454
5506
|
idle: "idle";
|
|
5507
|
+
pending: "pending";
|
|
5455
5508
|
reloading: "reloading";
|
|
5456
5509
|
}>;
|
|
5457
5510
|
activeRunCount: z$1.ZodNumber;
|
|
@@ -5464,9 +5517,9 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema$1>;
|
|
|
5464
5517
|
declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
5465
5518
|
target: z$1.ZodObject<{
|
|
5466
5519
|
mode: z$1.ZodEnum<{
|
|
5467
|
-
caseIds: "caseIds";
|
|
5468
5520
|
all: "all";
|
|
5469
5521
|
evalIds: "evalIds";
|
|
5522
|
+
caseIds: "caseIds";
|
|
5470
5523
|
}>;
|
|
5471
5524
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
5472
5525
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -5478,9 +5531,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
|
5478
5531
|
temporary: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5479
5532
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
5480
5533
|
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
5481
|
-
refresh: "refresh";
|
|
5482
|
-
bypass: "bypass";
|
|
5483
5534
|
use: "use";
|
|
5535
|
+
bypass: "bypass";
|
|
5536
|
+
refresh: "refresh";
|
|
5484
5537
|
}>>;
|
|
5485
5538
|
}, z$1.core.$strip>>;
|
|
5486
5539
|
manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
@@ -6366,6 +6419,7 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
6366
6419
|
phase: z$1.ZodEnum<{
|
|
6367
6420
|
eval: "eval";
|
|
6368
6421
|
derive: "derive";
|
|
6422
|
+
tracingAssertions: "tracingAssertions";
|
|
6369
6423
|
outputsSchema: "outputsSchema";
|
|
6370
6424
|
scorer: "scorer";
|
|
6371
6425
|
}>;
|
|
@@ -6992,7 +7046,8 @@ type EvalRunner = {
|
|
|
6992
7046
|
getEvals(): EvalSummary$1[]; /** Look up one discovered eval by id. */
|
|
6993
7047
|
getEval(id: string): EvalSummary$1 | undefined; /** Return discovery errors that should be shown before running evals. */
|
|
6994
7048
|
getDiscoveryIssues(): DiscoveryIssue$1[]; /** Return current config-reload state for the long-running app server. */
|
|
6995
|
-
getConfigReloadState(): ConfigReloadState$1; /**
|
|
7049
|
+
getConfigReloadState(): ConfigReloadState$1; /** Return the effective per-run case concurrency after applying defaults. */
|
|
7050
|
+
getConfiguredConcurrency(): number; /** Re-scan configured eval files and emit a discovery update to listeners. */
|
|
6996
7051
|
refreshDiscovery(): Promise<void>;
|
|
6997
7052
|
startRun(request: CreateRunRequest$1): Promise<{
|
|
6998
7053
|
manifest: RunManifest$1;
|
|
@@ -7227,4 +7282,4 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
|
|
|
7227
7282
|
/** Return whether the active eval case has tags matching the typed input. */
|
|
7228
7283
|
declare function matchesEvalTags(input: EvalTagMatchInput): boolean;
|
|
7229
7284
|
//#endregion
|
|
7230
|
-
export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
|
7285
|
+
export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, EvalRuntimeUsageError, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type EvalTracingAssertionsConfig, type EvalTracingAssertionsFn, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|