@ls-stack/agent-eval 0.58.4 → 0.59.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-sGeXC4AT.mjs → app-B3PEtWqH.mjs} +5 -5
- package/dist/apps/web/dist/assets/{index-BXFsxHVc.js → index-BD6FXk5p.js} +79 -79
- package/dist/apps/web/dist/assets/index-C2fbGEsB.css +1 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +1 -1
- package/dist/{cli-Bf5RzM8O.mjs → cli-Dkp2-rBm.mjs} +5 -5
- package/dist/index.d.mts +150 -112
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +2 -2
- package/dist/{runExecution-CLkC-4Z1.mjs → runExecution-C3XVZHRC.mjs} +192 -11
- package/dist/{runOrchestration-BS-WxTee.mjs → runOrchestration-B5An-AEi.mjs} +2 -2
- package/dist/{runner-Bz5ZPqmm.mjs → runner-BJXz_V_V.mjs} +1 -1
- package/dist/{runner-DW-11txl.mjs → runner-C9J-1fkp.mjs} +2 -2
- package/dist/{src-BjMMDm_O.mjs → src-8dGXUULC.mjs} +2 -2
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +21 -5
- package/dist/apps/web/dist/assets/index-CHH7m5Cv.css +0 -1
package/dist/index.d.mts
CHANGED
|
@@ -398,6 +398,18 @@ type EvalCase$1$1<TInput = unknown> = {
|
|
|
398
398
|
input: TInput;
|
|
399
399
|
tags?: string[];
|
|
400
400
|
};
|
|
401
|
+
/** Normalized view of one tool-call span and its common tool metadata. */
|
|
402
|
+
type EvalToolCallSpan = {
|
|
403
|
+
/** Preferred tool name, using GenAI/Mastra identity metadata when present. */name: string; /** Original trace span display name. */
|
|
404
|
+
spanName: string; /** Original trace span kind. */
|
|
405
|
+
kind: string; /** Parsed tool-call arguments, or the raw value when parsing is not possible. */
|
|
406
|
+
arguments: unknown; /** Parsed tool-call result, or the raw value when parsing is not possible. */
|
|
407
|
+
result: unknown; /** Tool description from GenAI/Mastra metadata when present. */
|
|
408
|
+
description: string | undefined; /** Tool type from GenAI/Mastra metadata when present. */
|
|
409
|
+
toolType: string | undefined; /** Original span attributes. */
|
|
410
|
+
attributes: Record<string, unknown> | undefined; /** Original trace span for fields not normalized above. */
|
|
411
|
+
span: EvalTraceSpan$2;
|
|
412
|
+
};
|
|
401
413
|
/** Query helpers built from the flattened trace recorded for one eval case. */
|
|
402
414
|
type EvalTraceTree = {
|
|
403
415
|
/** Flat span list in creation order. */spans: EvalTraceSpan$2[]; /** Top-level spans whose `parentId` is `null`. */
|
|
@@ -405,10 +417,17 @@ type EvalTraceTree = {
|
|
|
405
417
|
findSpan: (name: string) => EvalTraceSpan$2 | undefined; /** Return every span whose name exactly matches `name`. */
|
|
406
418
|
findSpans: (name: string) => EvalTraceSpan$2[]; /** Return whether any span name exactly matches `name`. */
|
|
407
419
|
hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
|
|
408
|
-
findSpansByKind: (kind: string) => EvalTraceSpan$2[]; /** Return every span with `kind: 'tool'`. */
|
|
409
|
-
findToolCallSpans: () => EvalTraceSpan$2[];
|
|
410
|
-
|
|
411
|
-
|
|
420
|
+
findSpansByKind: (kind: string) => EvalTraceSpan$2[]; /** Return every span with `kind: 'tool'` or `kind: 'tool_call'`. */
|
|
421
|
+
findToolCallSpans: () => EvalTraceSpan$2[];
|
|
422
|
+
/**
|
|
423
|
+
* Return tool-call names, preferring GenAI/Mastra tool identity attributes
|
|
424
|
+
* when available.
|
|
425
|
+
*/
|
|
426
|
+
listToolCallSpanNames: () => string[]; /** Return whether a tool-call span name or tool identity matches `name`. */
|
|
427
|
+
hasToolCallSpan: (name: string) => boolean; /** Return normalized tool-call spans whose name or tool identity matches `name`. */
|
|
428
|
+
getToolCallSpans: (name: string) => EvalToolCallSpan[]; /** Return how many tool-call spans have a name or tool identity matching `toolName`. */
|
|
429
|
+
getToolCallSpanCount: (toolName: string) => number; /** Return whether a tool-call span name or tool identity appears exactly `expectedCalls` times. */
|
|
430
|
+
hasToolCallSpanCount: (toolName: string, expectedCalls: number) => boolean; /** Return span names in creation order, optionally filtered by kind. */
|
|
412
431
|
listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
|
|
413
432
|
listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
|
|
414
433
|
flattenDfs: () => EvalTraceSpan$2[];
|
|
@@ -2241,10 +2260,10 @@ type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
|
|
|
2241
2260
|
* `best` selects the highest finite value and `worst` selects the lowest.
|
|
2242
2261
|
*/
|
|
2243
2262
|
declare const evalStatAggregateSchema: z$1.ZodEnum<{
|
|
2244
|
-
min: "min";
|
|
2245
|
-
max: "max";
|
|
2246
2263
|
sum: "sum";
|
|
2247
2264
|
avg: "avg";
|
|
2265
|
+
min: "min";
|
|
2266
|
+
max: "max";
|
|
2248
2267
|
best: "best";
|
|
2249
2268
|
worst: "worst";
|
|
2250
2269
|
}>;
|
|
@@ -2273,10 +2292,10 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2273
2292
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2274
2293
|
kind: z$1.ZodLiteral<"duration">;
|
|
2275
2294
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2276
|
-
min: "min";
|
|
2277
|
-
max: "max";
|
|
2278
2295
|
sum: "sum";
|
|
2279
2296
|
avg: "avg";
|
|
2297
|
+
min: "min";
|
|
2298
|
+
max: "max";
|
|
2280
2299
|
best: "best";
|
|
2281
2300
|
worst: "worst";
|
|
2282
2301
|
}>>;
|
|
@@ -2284,10 +2303,10 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2284
2303
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2285
2304
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2286
2305
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2287
|
-
min: "min";
|
|
2288
|
-
max: "max";
|
|
2289
2306
|
sum: "sum";
|
|
2290
2307
|
avg: "avg";
|
|
2308
|
+
min: "min";
|
|
2309
|
+
max: "max";
|
|
2291
2310
|
best: "best";
|
|
2292
2311
|
worst: "worst";
|
|
2293
2312
|
}>>;
|
|
@@ -2297,10 +2316,10 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2297
2316
|
key: z$1.ZodString;
|
|
2298
2317
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2299
2318
|
aggregate: z$1.ZodEnum<{
|
|
2300
|
-
min: "min";
|
|
2301
|
-
max: "max";
|
|
2302
2319
|
sum: "sum";
|
|
2303
2320
|
avg: "avg";
|
|
2321
|
+
min: "min";
|
|
2322
|
+
max: "max";
|
|
2304
2323
|
best: "best";
|
|
2305
2324
|
worst: "worst";
|
|
2306
2325
|
}>;
|
|
@@ -2337,10 +2356,10 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2337
2356
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2338
2357
|
kind: z$1.ZodLiteral<"duration">;
|
|
2339
2358
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2340
|
-
min: "min";
|
|
2341
|
-
max: "max";
|
|
2342
2359
|
sum: "sum";
|
|
2343
2360
|
avg: "avg";
|
|
2361
|
+
min: "min";
|
|
2362
|
+
max: "max";
|
|
2344
2363
|
best: "best";
|
|
2345
2364
|
worst: "worst";
|
|
2346
2365
|
}>>;
|
|
@@ -2348,10 +2367,10 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2348
2367
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2349
2368
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2350
2369
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2351
|
-
min: "min";
|
|
2352
|
-
max: "max";
|
|
2353
2370
|
sum: "sum";
|
|
2354
2371
|
avg: "avg";
|
|
2372
|
+
min: "min";
|
|
2373
|
+
max: "max";
|
|
2355
2374
|
best: "best";
|
|
2356
2375
|
worst: "worst";
|
|
2357
2376
|
}>>;
|
|
@@ -2361,10 +2380,10 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2361
2380
|
key: z$1.ZodString;
|
|
2362
2381
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2363
2382
|
aggregate: z$1.ZodEnum<{
|
|
2364
|
-
min: "min";
|
|
2365
|
-
max: "max";
|
|
2366
2383
|
sum: "sum";
|
|
2367
2384
|
avg: "avg";
|
|
2385
|
+
min: "min";
|
|
2386
|
+
max: "max";
|
|
2368
2387
|
best: "best";
|
|
2369
2388
|
worst: "worst";
|
|
2370
2389
|
}>;
|
|
@@ -2464,10 +2483,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2464
2483
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2465
2484
|
kind: z$1.ZodLiteral<"duration">;
|
|
2466
2485
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2467
|
-
min: "min";
|
|
2468
|
-
max: "max";
|
|
2469
2486
|
sum: "sum";
|
|
2470
2487
|
avg: "avg";
|
|
2488
|
+
min: "min";
|
|
2489
|
+
max: "max";
|
|
2471
2490
|
best: "best";
|
|
2472
2491
|
worst: "worst";
|
|
2473
2492
|
}>>;
|
|
@@ -2475,10 +2494,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2475
2494
|
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2476
2495
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2477
2496
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2478
|
-
min: "min";
|
|
2479
|
-
max: "max";
|
|
2480
2497
|
sum: "sum";
|
|
2481
2498
|
avg: "avg";
|
|
2499
|
+
min: "min";
|
|
2500
|
+
max: "max";
|
|
2482
2501
|
best: "best";
|
|
2483
2502
|
worst: "worst";
|
|
2484
2503
|
}>>;
|
|
@@ -2488,10 +2507,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2488
2507
|
key: z$1.ZodString;
|
|
2489
2508
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2490
2509
|
aggregate: z$1.ZodEnum<{
|
|
2491
|
-
min: "min";
|
|
2492
|
-
max: "max";
|
|
2493
2510
|
sum: "sum";
|
|
2494
2511
|
avg: "avg";
|
|
2512
|
+
min: "min";
|
|
2513
|
+
max: "max";
|
|
2495
2514
|
best: "best";
|
|
2496
2515
|
worst: "worst";
|
|
2497
2516
|
}>;
|
|
@@ -2515,10 +2534,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2515
2534
|
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2516
2535
|
}, z$1.core.$strip>], "kind">>>;
|
|
2517
2536
|
defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2518
|
-
min: "min";
|
|
2519
|
-
max: "max";
|
|
2520
2537
|
sum: "sum";
|
|
2521
2538
|
avg: "avg";
|
|
2539
|
+
min: "min";
|
|
2540
|
+
max: "max";
|
|
2522
2541
|
best: "best";
|
|
2523
2542
|
worst: "worst";
|
|
2524
2543
|
}>>;
|
|
@@ -2534,15 +2553,15 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2534
2553
|
metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
2535
2554
|
source: z$1.ZodLiteral<"builtin">;
|
|
2536
2555
|
metric: z$1.ZodEnum<{
|
|
2537
|
-
durationMs: "durationMs";
|
|
2538
2556
|
passRate: "passRate";
|
|
2557
|
+
durationMs: "durationMs";
|
|
2539
2558
|
}>;
|
|
2540
2559
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2541
2560
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2542
|
-
error: "error";
|
|
2543
2561
|
success: "success";
|
|
2544
|
-
|
|
2562
|
+
error: "error";
|
|
2545
2563
|
warning: "warning";
|
|
2564
|
+
accent: "accent";
|
|
2546
2565
|
accentDim: "accentDim";
|
|
2547
2566
|
textMuted: "textMuted";
|
|
2548
2567
|
}>>;
|
|
@@ -2554,19 +2573,19 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2554
2573
|
source: z$1.ZodLiteral<"column">;
|
|
2555
2574
|
key: z$1.ZodString;
|
|
2556
2575
|
aggregate: z$1.ZodEnum<{
|
|
2557
|
-
min: "min";
|
|
2558
|
-
max: "max";
|
|
2559
2576
|
sum: "sum";
|
|
2560
2577
|
avg: "avg";
|
|
2578
|
+
min: "min";
|
|
2579
|
+
max: "max";
|
|
2561
2580
|
latest: "latest";
|
|
2562
2581
|
passThresholdRate: "passThresholdRate";
|
|
2563
2582
|
}>;
|
|
2564
2583
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2565
2584
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2566
|
-
error: "error";
|
|
2567
2585
|
success: "success";
|
|
2568
|
-
|
|
2586
|
+
error: "error";
|
|
2569
2587
|
warning: "warning";
|
|
2588
|
+
accent: "accent";
|
|
2570
2589
|
accentDim: "accentDim";
|
|
2571
2590
|
textMuted: "textMuted";
|
|
2572
2591
|
}>>;
|
|
@@ -2588,18 +2607,18 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2588
2607
|
tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
2589
2608
|
source: z$1.ZodLiteral<"builtin">;
|
|
2590
2609
|
metric: z$1.ZodEnum<{
|
|
2591
|
-
durationMs: "durationMs";
|
|
2592
2610
|
passRate: "passRate";
|
|
2611
|
+
durationMs: "durationMs";
|
|
2593
2612
|
}>;
|
|
2594
2613
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2595
2614
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2596
2615
|
source: z$1.ZodLiteral<"column">;
|
|
2597
2616
|
key: z$1.ZodString;
|
|
2598
2617
|
aggregate: z$1.ZodEnum<{
|
|
2599
|
-
min: "min";
|
|
2600
|
-
max: "max";
|
|
2601
2618
|
sum: "sum";
|
|
2602
2619
|
avg: "avg";
|
|
2620
|
+
min: "min";
|
|
2621
|
+
max: "max";
|
|
2603
2622
|
latest: "latest";
|
|
2604
2623
|
passThresholdRate: "passThresholdRate";
|
|
2605
2624
|
}>;
|
|
@@ -2698,9 +2717,9 @@ declare const caseRowSchema$1: z$1.ZodObject<{
|
|
|
2698
2717
|
error: "error";
|
|
2699
2718
|
running: "running";
|
|
2700
2719
|
cancelled: "cancelled";
|
|
2701
|
-
pending: "pending";
|
|
2702
2720
|
pass: "pass";
|
|
2703
2721
|
fail: "fail";
|
|
2722
|
+
pending: "pending";
|
|
2704
2723
|
}>;
|
|
2705
2724
|
durationMs: z$1.ZodNullable<z$1.ZodNumber>;
|
|
2706
2725
|
cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
|
|
@@ -2779,9 +2798,9 @@ declare const runLogLevelSchema: z$1.ZodEnum<{
|
|
|
2779
2798
|
type RunLogLevel = z$1.infer<typeof runLogLevelSchema>;
|
|
2780
2799
|
/** Eval runner phase that emitted a captured case log. */
|
|
2781
2800
|
declare const runLogPhaseSchema: z$1.ZodEnum<{
|
|
2782
|
-
tracingAssertions: "tracingAssertions";
|
|
2783
2801
|
eval: "eval";
|
|
2784
2802
|
derive: "derive";
|
|
2803
|
+
tracingAssertions: "tracingAssertions";
|
|
2785
2804
|
outputsSchema: "outputsSchema";
|
|
2786
2805
|
scorer: "scorer";
|
|
2787
2806
|
}>;
|
|
@@ -2806,9 +2825,9 @@ declare const runLogEntrySchema: z$1.ZodObject<{
|
|
|
2806
2825
|
warn: "warn";
|
|
2807
2826
|
}>;
|
|
2808
2827
|
phase: z$1.ZodEnum<{
|
|
2809
|
-
tracingAssertions: "tracingAssertions";
|
|
2810
2828
|
eval: "eval";
|
|
2811
2829
|
derive: "derive";
|
|
2830
|
+
tracingAssertions: "tracingAssertions";
|
|
2812
2831
|
outputsSchema: "outputsSchema";
|
|
2813
2832
|
scorer: "scorer";
|
|
2814
2833
|
}>;
|
|
@@ -2901,10 +2920,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2901
2920
|
namespace: z$1.ZodString;
|
|
2902
2921
|
key: z$1.ZodString;
|
|
2903
2922
|
status: z$1.ZodEnum<{
|
|
2904
|
-
bypass: "bypass";
|
|
2905
|
-
refresh: "refresh";
|
|
2906
2923
|
hit: "hit";
|
|
2907
2924
|
miss: "miss";
|
|
2925
|
+
refresh: "refresh";
|
|
2926
|
+
bypass: "bypass";
|
|
2908
2927
|
}>;
|
|
2909
2928
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2910
2929
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2925,9 +2944,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2925
2944
|
error: "error";
|
|
2926
2945
|
running: "running";
|
|
2927
2946
|
cancelled: "cancelled";
|
|
2928
|
-
pending: "pending";
|
|
2929
2947
|
pass: "pass";
|
|
2930
2948
|
fail: "fail";
|
|
2949
|
+
pending: "pending";
|
|
2931
2950
|
}>;
|
|
2932
2951
|
input: z$1.ZodUnknown;
|
|
2933
2952
|
trace: z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -3073,10 +3092,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3073
3092
|
namespace: z$1.ZodString;
|
|
3074
3093
|
key: z$1.ZodString;
|
|
3075
3094
|
status: z$1.ZodEnum<{
|
|
3076
|
-
bypass: "bypass";
|
|
3077
|
-
refresh: "refresh";
|
|
3078
3095
|
hit: "hit";
|
|
3079
3096
|
miss: "miss";
|
|
3097
|
+
refresh: "refresh";
|
|
3098
|
+
bypass: "bypass";
|
|
3080
3099
|
}>;
|
|
3081
3100
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
3082
3101
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -3165,9 +3184,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3165
3184
|
warn: "warn";
|
|
3166
3185
|
}>;
|
|
3167
3186
|
phase: z$1.ZodEnum<{
|
|
3168
|
-
tracingAssertions: "tracingAssertions";
|
|
3169
3187
|
eval: "eval";
|
|
3170
3188
|
derive: "derive";
|
|
3189
|
+
tracingAssertions: "tracingAssertions";
|
|
3171
3190
|
outputsSchema: "outputsSchema";
|
|
3172
3191
|
scorer: "scorer";
|
|
3173
3192
|
}>;
|
|
@@ -3194,10 +3213,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3194
3213
|
namespace: z$1.ZodString;
|
|
3195
3214
|
key: z$1.ZodString;
|
|
3196
3215
|
status: z$1.ZodEnum<{
|
|
3197
|
-
bypass: "bypass";
|
|
3198
|
-
refresh: "refresh";
|
|
3199
3216
|
hit: "hit";
|
|
3200
3217
|
miss: "miss";
|
|
3218
|
+
refresh: "refresh";
|
|
3219
|
+
bypass: "bypass";
|
|
3201
3220
|
}>;
|
|
3202
3221
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
3203
3222
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -3240,8 +3259,8 @@ type EvalChartType = z$1.infer<typeof evalChartTypeSchema>;
|
|
|
3240
3259
|
* than from a per-case column.
|
|
3241
3260
|
*/
|
|
3242
3261
|
declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
|
|
3243
|
-
durationMs: "durationMs";
|
|
3244
3262
|
passRate: "passRate";
|
|
3263
|
+
durationMs: "durationMs";
|
|
3245
3264
|
}>;
|
|
3246
3265
|
/**
|
|
3247
3266
|
* Run-level metric sourced from the aggregated `RunSummary` for a run, rather
|
|
@@ -3250,10 +3269,10 @@ declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
|
|
|
3250
3269
|
type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
|
|
3251
3270
|
/** Reducer applied to a numeric column across all cases of a single run. */
|
|
3252
3271
|
declare const evalChartAggregateSchema: z$1.ZodEnum<{
|
|
3253
|
-
min: "min";
|
|
3254
|
-
max: "max";
|
|
3255
3272
|
sum: "sum";
|
|
3256
3273
|
avg: "avg";
|
|
3274
|
+
min: "min";
|
|
3275
|
+
max: "max";
|
|
3257
3276
|
latest: "latest";
|
|
3258
3277
|
passThresholdRate: "passThresholdRate";
|
|
3259
3278
|
}>;
|
|
@@ -3264,10 +3283,10 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
|
|
|
3264
3283
|
* not emit raw hex so authored evals stay decoupled from the web theme.
|
|
3265
3284
|
*/
|
|
3266
3285
|
declare const evalChartColorSchema: z$1.ZodEnum<{
|
|
3267
|
-
error: "error";
|
|
3268
3286
|
success: "success";
|
|
3269
|
-
|
|
3287
|
+
error: "error";
|
|
3270
3288
|
warning: "warning";
|
|
3289
|
+
accent: "accent";
|
|
3271
3290
|
accentDim: "accentDim";
|
|
3272
3291
|
textMuted: "textMuted";
|
|
3273
3292
|
}>;
|
|
@@ -3288,15 +3307,15 @@ type EvalChartAxis = z$1.infer<typeof evalChartAxisSchema>;
|
|
|
3288
3307
|
declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3289
3308
|
source: z$1.ZodLiteral<"builtin">;
|
|
3290
3309
|
metric: z$1.ZodEnum<{
|
|
3291
|
-
durationMs: "durationMs";
|
|
3292
3310
|
passRate: "passRate";
|
|
3311
|
+
durationMs: "durationMs";
|
|
3293
3312
|
}>;
|
|
3294
3313
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3295
3314
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3296
|
-
error: "error";
|
|
3297
3315
|
success: "success";
|
|
3298
|
-
|
|
3316
|
+
error: "error";
|
|
3299
3317
|
warning: "warning";
|
|
3318
|
+
accent: "accent";
|
|
3300
3319
|
accentDim: "accentDim";
|
|
3301
3320
|
textMuted: "textMuted";
|
|
3302
3321
|
}>>;
|
|
@@ -3308,19 +3327,19 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
3308
3327
|
source: z$1.ZodLiteral<"column">;
|
|
3309
3328
|
key: z$1.ZodString;
|
|
3310
3329
|
aggregate: z$1.ZodEnum<{
|
|
3311
|
-
min: "min";
|
|
3312
|
-
max: "max";
|
|
3313
3330
|
sum: "sum";
|
|
3314
3331
|
avg: "avg";
|
|
3332
|
+
min: "min";
|
|
3333
|
+
max: "max";
|
|
3315
3334
|
latest: "latest";
|
|
3316
3335
|
passThresholdRate: "passThresholdRate";
|
|
3317
3336
|
}>;
|
|
3318
3337
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3319
3338
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3320
|
-
error: "error";
|
|
3321
3339
|
success: "success";
|
|
3322
|
-
|
|
3340
|
+
error: "error";
|
|
3323
3341
|
warning: "warning";
|
|
3342
|
+
accent: "accent";
|
|
3324
3343
|
accentDim: "accentDim";
|
|
3325
3344
|
textMuted: "textMuted";
|
|
3326
3345
|
}>>;
|
|
@@ -3335,18 +3354,18 @@ type EvalChartMetric = z$1.infer<typeof evalChartMetricSchema>;
|
|
|
3335
3354
|
declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3336
3355
|
source: z$1.ZodLiteral<"builtin">;
|
|
3337
3356
|
metric: z$1.ZodEnum<{
|
|
3338
|
-
durationMs: "durationMs";
|
|
3339
3357
|
passRate: "passRate";
|
|
3358
|
+
durationMs: "durationMs";
|
|
3340
3359
|
}>;
|
|
3341
3360
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3342
3361
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
3343
3362
|
source: z$1.ZodLiteral<"column">;
|
|
3344
3363
|
key: z$1.ZodString;
|
|
3345
3364
|
aggregate: z$1.ZodEnum<{
|
|
3346
|
-
min: "min";
|
|
3347
|
-
max: "max";
|
|
3348
3365
|
sum: "sum";
|
|
3349
3366
|
avg: "avg";
|
|
3367
|
+
min: "min";
|
|
3368
|
+
max: "max";
|
|
3350
3369
|
latest: "latest";
|
|
3351
3370
|
passThresholdRate: "passThresholdRate";
|
|
3352
3371
|
}>;
|
|
@@ -3371,15 +3390,15 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3371
3390
|
metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3372
3391
|
source: z$1.ZodLiteral<"builtin">;
|
|
3373
3392
|
metric: z$1.ZodEnum<{
|
|
3374
|
-
durationMs: "durationMs";
|
|
3375
3393
|
passRate: "passRate";
|
|
3394
|
+
durationMs: "durationMs";
|
|
3376
3395
|
}>;
|
|
3377
3396
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3378
3397
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3379
|
-
error: "error";
|
|
3380
3398
|
success: "success";
|
|
3381
|
-
|
|
3399
|
+
error: "error";
|
|
3382
3400
|
warning: "warning";
|
|
3401
|
+
accent: "accent";
|
|
3383
3402
|
accentDim: "accentDim";
|
|
3384
3403
|
textMuted: "textMuted";
|
|
3385
3404
|
}>>;
|
|
@@ -3391,19 +3410,19 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3391
3410
|
source: z$1.ZodLiteral<"column">;
|
|
3392
3411
|
key: z$1.ZodString;
|
|
3393
3412
|
aggregate: z$1.ZodEnum<{
|
|
3394
|
-
min: "min";
|
|
3395
|
-
max: "max";
|
|
3396
3413
|
sum: "sum";
|
|
3397
3414
|
avg: "avg";
|
|
3415
|
+
min: "min";
|
|
3416
|
+
max: "max";
|
|
3398
3417
|
latest: "latest";
|
|
3399
3418
|
passThresholdRate: "passThresholdRate";
|
|
3400
3419
|
}>;
|
|
3401
3420
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3402
3421
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3403
|
-
error: "error";
|
|
3404
3422
|
success: "success";
|
|
3405
|
-
|
|
3423
|
+
error: "error";
|
|
3406
3424
|
warning: "warning";
|
|
3425
|
+
accent: "accent";
|
|
3407
3426
|
accentDim: "accentDim";
|
|
3408
3427
|
textMuted: "textMuted";
|
|
3409
3428
|
}>>;
|
|
@@ -3425,18 +3444,18 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3425
3444
|
tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3426
3445
|
source: z$1.ZodLiteral<"builtin">;
|
|
3427
3446
|
metric: z$1.ZodEnum<{
|
|
3428
|
-
durationMs: "durationMs";
|
|
3429
3447
|
passRate: "passRate";
|
|
3448
|
+
durationMs: "durationMs";
|
|
3430
3449
|
}>;
|
|
3431
3450
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3432
3451
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
3433
3452
|
source: z$1.ZodLiteral<"column">;
|
|
3434
3453
|
key: z$1.ZodString;
|
|
3435
3454
|
aggregate: z$1.ZodEnum<{
|
|
3436
|
-
min: "min";
|
|
3437
|
-
max: "max";
|
|
3438
3455
|
sum: "sum";
|
|
3439
3456
|
avg: "avg";
|
|
3457
|
+
min: "min";
|
|
3458
|
+
max: "max";
|
|
3440
3459
|
latest: "latest";
|
|
3441
3460
|
passThresholdRate: "passThresholdRate";
|
|
3442
3461
|
}>;
|
|
@@ -3461,15 +3480,15 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3461
3480
|
metrics: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3462
3481
|
source: z$1.ZodLiteral<"builtin">;
|
|
3463
3482
|
metric: z$1.ZodEnum<{
|
|
3464
|
-
durationMs: "durationMs";
|
|
3465
3483
|
passRate: "passRate";
|
|
3484
|
+
durationMs: "durationMs";
|
|
3466
3485
|
}>;
|
|
3467
3486
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3468
3487
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3469
|
-
error: "error";
|
|
3470
3488
|
success: "success";
|
|
3471
|
-
|
|
3489
|
+
error: "error";
|
|
3472
3490
|
warning: "warning";
|
|
3491
|
+
accent: "accent";
|
|
3473
3492
|
accentDim: "accentDim";
|
|
3474
3493
|
textMuted: "textMuted";
|
|
3475
3494
|
}>>;
|
|
@@ -3481,19 +3500,19 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3481
3500
|
source: z$1.ZodLiteral<"column">;
|
|
3482
3501
|
key: z$1.ZodString;
|
|
3483
3502
|
aggregate: z$1.ZodEnum<{
|
|
3484
|
-
min: "min";
|
|
3485
|
-
max: "max";
|
|
3486
3503
|
sum: "sum";
|
|
3487
3504
|
avg: "avg";
|
|
3505
|
+
min: "min";
|
|
3506
|
+
max: "max";
|
|
3488
3507
|
latest: "latest";
|
|
3489
3508
|
passThresholdRate: "passThresholdRate";
|
|
3490
3509
|
}>;
|
|
3491
3510
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3492
3511
|
color: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3493
|
-
error: "error";
|
|
3494
3512
|
success: "success";
|
|
3495
|
-
|
|
3513
|
+
error: "error";
|
|
3496
3514
|
warning: "warning";
|
|
3515
|
+
accent: "accent";
|
|
3497
3516
|
accentDim: "accentDim";
|
|
3498
3517
|
textMuted: "textMuted";
|
|
3499
3518
|
}>>;
|
|
@@ -3515,18 +3534,18 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3515
3534
|
tooltipExtras: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
3516
3535
|
source: z$1.ZodLiteral<"builtin">;
|
|
3517
3536
|
metric: z$1.ZodEnum<{
|
|
3518
|
-
durationMs: "durationMs";
|
|
3519
3537
|
passRate: "passRate";
|
|
3538
|
+
durationMs: "durationMs";
|
|
3520
3539
|
}>;
|
|
3521
3540
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
3522
3541
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
3523
3542
|
source: z$1.ZodLiteral<"column">;
|
|
3524
3543
|
key: z$1.ZodString;
|
|
3525
3544
|
aggregate: z$1.ZodEnum<{
|
|
3526
|
-
min: "min";
|
|
3527
|
-
max: "max";
|
|
3528
3545
|
sum: "sum";
|
|
3529
3546
|
avg: "avg";
|
|
3547
|
+
min: "min";
|
|
3548
|
+
max: "max";
|
|
3530
3549
|
latest: "latest";
|
|
3531
3550
|
passThresholdRate: "passThresholdRate";
|
|
3532
3551
|
}>;
|
|
@@ -3555,8 +3574,8 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3555
3574
|
target: z$1.ZodObject<{
|
|
3556
3575
|
mode: z$1.ZodEnum<{
|
|
3557
3576
|
all: "all";
|
|
3558
|
-
evalIds: "evalIds";
|
|
3559
3577
|
caseIds: "caseIds";
|
|
3578
|
+
evalIds: "evalIds";
|
|
3560
3579
|
}>;
|
|
3561
3580
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
3562
3581
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -3570,9 +3589,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3570
3589
|
median: "median";
|
|
3571
3590
|
}>>>;
|
|
3572
3591
|
cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3573
|
-
use: "use";
|
|
3574
|
-
bypass: "bypass";
|
|
3575
3592
|
refresh: "refresh";
|
|
3593
|
+
bypass: "bypass";
|
|
3594
|
+
use: "use";
|
|
3576
3595
|
}>>;
|
|
3577
3596
|
}, z$1.core.$strip>;
|
|
3578
3597
|
/** Persisted lifecycle metadata for a single eval run. */
|
|
@@ -3668,8 +3687,8 @@ type TrialSelectionMode = z$1.infer<typeof trialSelectionModeSchema>;
|
|
|
3668
3687
|
/** Built-in eval-level output/column keys. */
|
|
3669
3688
|
/** Removal config for built-in eval-level outputs and UI metadata. */
|
|
3670
3689
|
declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<true>, z$1.ZodArray<z$1.ZodEnum<{
|
|
3671
|
-
apiCalls: "apiCalls";
|
|
3672
3690
|
costUsd: "costUsd";
|
|
3691
|
+
apiCalls: "apiCalls";
|
|
3673
3692
|
llmTurns: "llmTurns";
|
|
3674
3693
|
inputTokens: "inputTokens";
|
|
3675
3694
|
outputTokens: "outputTokens";
|
|
@@ -3687,6 +3706,18 @@ type EvalCase$1<TInput = unknown> = {
|
|
|
3687
3706
|
input: TInput;
|
|
3688
3707
|
tags?: string[];
|
|
3689
3708
|
};
|
|
3709
|
+
/** Normalized view of one tool-call span and its common tool metadata. */
|
|
3710
|
+
type EvalToolCallSpan$1 = {
|
|
3711
|
+
/** Preferred tool name, using GenAI/Mastra identity metadata when present. */name: string; /** Original trace span display name. */
|
|
3712
|
+
spanName: string; /** Original trace span kind. */
|
|
3713
|
+
kind: string; /** Parsed tool-call arguments, or the raw value when parsing is not possible. */
|
|
3714
|
+
arguments: unknown; /** Parsed tool-call result, or the raw value when parsing is not possible. */
|
|
3715
|
+
result: unknown; /** Tool description from GenAI/Mastra metadata when present. */
|
|
3716
|
+
description: string | undefined; /** Tool type from GenAI/Mastra metadata when present. */
|
|
3717
|
+
toolType: string | undefined; /** Original span attributes. */
|
|
3718
|
+
attributes: Record<string, unknown> | undefined; /** Original trace span for fields not normalized above. */
|
|
3719
|
+
span: EvalTraceSpan$1;
|
|
3720
|
+
};
|
|
3690
3721
|
/** Query helpers built from the flattened trace recorded for one eval case. */
|
|
3691
3722
|
type EvalTraceTree$1 = {
|
|
3692
3723
|
/** Flat span list in creation order. */spans: EvalTraceSpan$1[]; /** Top-level spans whose `parentId` is `null`. */
|
|
@@ -3694,10 +3725,17 @@ type EvalTraceTree$1 = {
|
|
|
3694
3725
|
findSpan: (name: string) => EvalTraceSpan$1 | undefined; /** Return every span whose name exactly matches `name`. */
|
|
3695
3726
|
findSpans: (name: string) => EvalTraceSpan$1[]; /** Return whether any span name exactly matches `name`. */
|
|
3696
3727
|
hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
|
|
3697
|
-
findSpansByKind: (kind: string) => EvalTraceSpan$1[]; /** Return every span with `kind: 'tool'`. */
|
|
3698
|
-
findToolCallSpans: () => EvalTraceSpan$1[];
|
|
3699
|
-
|
|
3700
|
-
|
|
3728
|
+
findSpansByKind: (kind: string) => EvalTraceSpan$1[]; /** Return every span with `kind: 'tool'` or `kind: 'tool_call'`. */
|
|
3729
|
+
findToolCallSpans: () => EvalTraceSpan$1[];
|
|
3730
|
+
/**
|
|
3731
|
+
* Return tool-call names, preferring GenAI/Mastra tool identity attributes
|
|
3732
|
+
* when available.
|
|
3733
|
+
*/
|
|
3734
|
+
listToolCallSpanNames: () => string[]; /** Return whether a tool-call span name or tool identity matches `name`. */
|
|
3735
|
+
hasToolCallSpan: (name: string) => boolean; /** Return normalized tool-call spans whose name or tool identity matches `name`. */
|
|
3736
|
+
getToolCallSpans: (name: string) => EvalToolCallSpan$1[]; /** Return how many tool-call spans have a name or tool identity matching `toolName`. */
|
|
3737
|
+
getToolCallSpanCount: (toolName: string) => number; /** Return whether a tool-call span name or tool identity appears exactly `expectedCalls` times. */
|
|
3738
|
+
hasToolCallSpanCount: (toolName: string, expectedCalls: number) => boolean; /** Return span names in creation order, optionally filtered by kind. */
|
|
3701
3739
|
listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
|
|
3702
3740
|
listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
|
|
3703
3741
|
flattenDfs: () => EvalTraceSpan$1[];
|
|
@@ -4529,9 +4567,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
|
|
|
4529
4567
|
* - `refresh`: never read, always write (forces re-execution and overwrites).
|
|
4530
4568
|
*/
|
|
4531
4569
|
declare const cacheModeSchema: z$1.ZodEnum<{
|
|
4532
|
-
use: "use";
|
|
4533
|
-
bypass: "bypass";
|
|
4534
4570
|
refresh: "refresh";
|
|
4571
|
+
bypass: "bypass";
|
|
4572
|
+
use: "use";
|
|
4535
4573
|
}>;
|
|
4536
4574
|
/** Mode controlling how cached spans behave during a run. */
|
|
4537
4575
|
type CacheMode = z$1.infer<typeof cacheModeSchema>;
|
|
@@ -4545,17 +4583,17 @@ declare const spanCacheOptionsSchema: z$1.ZodObject<{
|
|
|
4545
4583
|
type SpanCacheOptions = z$1.infer<typeof spanCacheOptionsSchema>;
|
|
4546
4584
|
/** Category of operation stored in the eval cache. */
|
|
4547
4585
|
declare const cacheOperationTypeSchema: z$1.ZodEnum<{
|
|
4548
|
-
span: "span";
|
|
4549
4586
|
value: "value";
|
|
4587
|
+
span: "span";
|
|
4550
4588
|
}>;
|
|
4551
4589
|
/** Category of operation stored in the eval cache. */
|
|
4552
4590
|
type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
|
|
4553
4591
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4554
4592
|
declare const cacheStatusSchema: z$1.ZodEnum<{
|
|
4555
|
-
bypass: "bypass";
|
|
4556
|
-
refresh: "refresh";
|
|
4557
4593
|
hit: "hit";
|
|
4558
4594
|
miss: "miss";
|
|
4595
|
+
refresh: "refresh";
|
|
4596
|
+
bypass: "bypass";
|
|
4559
4597
|
}>;
|
|
4560
4598
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4561
4599
|
type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
|
|
@@ -4572,10 +4610,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
|
|
|
4572
4610
|
namespace: z$1.ZodString;
|
|
4573
4611
|
key: z$1.ZodString;
|
|
4574
4612
|
status: z$1.ZodEnum<{
|
|
4575
|
-
bypass: "bypass";
|
|
4576
|
-
refresh: "refresh";
|
|
4577
4613
|
hit: "hit";
|
|
4578
4614
|
miss: "miss";
|
|
4615
|
+
refresh: "refresh";
|
|
4616
|
+
bypass: "bypass";
|
|
4579
4617
|
}>;
|
|
4580
4618
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
4581
4619
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -4771,8 +4809,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
|
|
|
4771
4809
|
key: z$1.ZodString;
|
|
4772
4810
|
namespace: z$1.ZodString;
|
|
4773
4811
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4774
|
-
span: "span";
|
|
4775
4812
|
value: "value";
|
|
4813
|
+
span: "span";
|
|
4776
4814
|
}>>;
|
|
4777
4815
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4778
4816
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4878,8 +4916,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
4878
4916
|
key: z$1.ZodString;
|
|
4879
4917
|
namespace: z$1.ZodString;
|
|
4880
4918
|
operationType: z$1.ZodEnum<{
|
|
4881
|
-
span: "span";
|
|
4882
4919
|
value: "value";
|
|
4920
|
+
span: "span";
|
|
4883
4921
|
}>;
|
|
4884
4922
|
operationName: z$1.ZodString;
|
|
4885
4923
|
storedAt: z$1.ZodString;
|
|
@@ -4889,8 +4927,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
4889
4927
|
key: z$1.ZodString;
|
|
4890
4928
|
namespace: z$1.ZodString;
|
|
4891
4929
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4892
|
-
span: "span";
|
|
4893
4930
|
value: "value";
|
|
4931
|
+
span: "span";
|
|
4894
4932
|
}>>;
|
|
4895
4933
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
4896
4934
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -4996,8 +5034,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
4996
5034
|
key: z$1.ZodString;
|
|
4997
5035
|
namespace: z$1.ZodString;
|
|
4998
5036
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
4999
|
-
span: "span";
|
|
5000
5037
|
value: "value";
|
|
5038
|
+
span: "span";
|
|
5001
5039
|
}>>;
|
|
5002
5040
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
5003
5041
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5094,8 +5132,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
5094
5132
|
key: z$1.ZodString;
|
|
5095
5133
|
namespace: z$1.ZodString;
|
|
5096
5134
|
operationType: z$1.ZodEnum<{
|
|
5097
|
-
span: "span";
|
|
5098
5135
|
value: "value";
|
|
5136
|
+
span: "span";
|
|
5099
5137
|
}>;
|
|
5100
5138
|
operationName: z$1.ZodString;
|
|
5101
5139
|
storedAt: z$1.ZodString;
|
|
@@ -5105,8 +5143,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
|
|
|
5105
5143
|
key: z$1.ZodString;
|
|
5106
5144
|
namespace: z$1.ZodString;
|
|
5107
5145
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5108
|
-
span: "span";
|
|
5109
5146
|
value: "value";
|
|
5147
|
+
span: "span";
|
|
5110
5148
|
}>>;
|
|
5111
5149
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
5112
5150
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5212,8 +5250,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
5212
5250
|
key: z$1.ZodString;
|
|
5213
5251
|
namespace: z$1.ZodString;
|
|
5214
5252
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5215
|
-
span: "span";
|
|
5216
5253
|
value: "value";
|
|
5254
|
+
span: "span";
|
|
5217
5255
|
}>>;
|
|
5218
5256
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
5219
5257
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5318,8 +5356,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
5318
5356
|
key: z$1.ZodString;
|
|
5319
5357
|
namespace: z$1.ZodString;
|
|
5320
5358
|
operationType: z$1.ZodEnum<{
|
|
5321
|
-
span: "span";
|
|
5322
5359
|
value: "value";
|
|
5360
|
+
span: "span";
|
|
5323
5361
|
}>;
|
|
5324
5362
|
operationName: z$1.ZodString;
|
|
5325
5363
|
storedAt: z$1.ZodString;
|
|
@@ -5329,8 +5367,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
5329
5367
|
key: z$1.ZodString;
|
|
5330
5368
|
namespace: z$1.ZodString;
|
|
5331
5369
|
operationType: z$1.ZodOptional<z$1.ZodEnum<{
|
|
5332
|
-
span: "span";
|
|
5333
5370
|
value: "value";
|
|
5371
|
+
span: "span";
|
|
5334
5372
|
}>>;
|
|
5335
5373
|
operationName: z$1.ZodOptional<z$1.ZodString>;
|
|
5336
5374
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -5504,8 +5542,8 @@ type SseEnvelope = z$1.infer<typeof sseEnvelopeSchema$1>; //#endregion
|
|
|
5504
5542
|
//#region src/schemas/api.d.ts
|
|
5505
5543
|
/** Lifecycle state for an app config reload triggered by `agent-evals.config.ts`. */
|
|
5506
5544
|
declare const configReloadStatusSchema: z$1.ZodEnum<{
|
|
5507
|
-
idle: "idle";
|
|
5508
5545
|
pending: "pending";
|
|
5546
|
+
idle: "idle";
|
|
5509
5547
|
reloading: "reloading";
|
|
5510
5548
|
}>;
|
|
5511
5549
|
/** Status for config reloads in the long-running app server. */
|
|
@@ -5513,8 +5551,8 @@ type ConfigReloadStatus = z$1.infer<typeof configReloadStatusSchema>;
|
|
|
5513
5551
|
/** UI/API-visible state for config reloads in `agent-evals app`. */
|
|
5514
5552
|
declare const configReloadStateSchema$1: z$1.ZodObject<{
|
|
5515
5553
|
status: z$1.ZodEnum<{
|
|
5516
|
-
idle: "idle";
|
|
5517
5554
|
pending: "pending";
|
|
5555
|
+
idle: "idle";
|
|
5518
5556
|
reloading: "reloading";
|
|
5519
5557
|
}>;
|
|
5520
5558
|
activeRunCount: z$1.ZodNumber;
|
|
@@ -5528,8 +5566,8 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
|
5528
5566
|
target: z$1.ZodObject<{
|
|
5529
5567
|
mode: z$1.ZodEnum<{
|
|
5530
5568
|
all: "all";
|
|
5531
|
-
evalIds: "evalIds";
|
|
5532
5569
|
caseIds: "caseIds";
|
|
5570
|
+
evalIds: "evalIds";
|
|
5533
5571
|
}>;
|
|
5534
5572
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
5535
5573
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -5541,9 +5579,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
|
5541
5579
|
temporary: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5542
5580
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
5543
5581
|
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
5544
|
-
use: "use";
|
|
5545
|
-
bypass: "bypass";
|
|
5546
5582
|
refresh: "refresh";
|
|
5583
|
+
bypass: "bypass";
|
|
5584
|
+
use: "use";
|
|
5547
5585
|
}>>;
|
|
5548
5586
|
}, z$1.core.$strip>>;
|
|
5549
5587
|
manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
@@ -7292,4 +7330,4 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
|
|
|
7292
7330
|
/** Return whether the active eval case has tags matching the typed input. */
|
|
7293
7331
|
declare function matchesEvalTags(input: EvalTagMatchInput): boolean;
|
|
7294
7332
|
//#endregion
|
|
7295
|
-
export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, EvalRuntimeUsageError, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type EvalTracingAssertionsConfig, type EvalTracingAssertionsFn, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
|
7333
|
+
export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, EvalRuntimeUsageError, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalToolCallSpan, type EvalTraceTree, type EvalTracingAssertionsConfig, type EvalTracingAssertionsFn, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|