@ls-stack/agent-eval 0.61.1 → 0.62.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-Dm_9ZTVa.mjs → app-CByWi7LX.mjs} +4 -4
- package/dist/apps/web/dist/assets/{index-CwSehYad.js → index-CE1teCsp.js} +70 -70
- package/dist/apps/web/dist/assets/{index-CM_zUhl_.css → index-zWPuRQmP.css} +1 -1
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +1 -1
- package/dist/{cli-CPBIcMP-.mjs → cli-CCHcjbC1.mjs} +4 -4
- package/dist/index.d.mts +70 -58
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +2 -2
- package/dist/{runExecution-D-CnSRYy.mjs → runExecution-C24aYsk3.mjs} +97 -15
- package/dist/{runOrchestration-Basvyp4u.mjs → runOrchestration-9XKoYcP9.mjs} +1 -1
- package/dist/{runner-B6UT1K7L.mjs → runner-5eU-FLHV.mjs} +1 -1
- package/dist/{runner-DwNb5TCb.mjs → runner-BEQGkHF0.mjs} +2 -2
- package/dist/{src-SixIk0b7.mjs → src-C8n7QANC.mjs} +2 -2
- package/package.json +3 -3
- package/skills/agent-eval/SKILL.md +3 -3
package/dist/index.d.mts
CHANGED
|
@@ -377,7 +377,12 @@ declare const evalChartsConfigSchema$1: z.ZodArray<z.ZodObject<{
|
|
|
377
377
|
/** Ordered list of history charts rendered for an eval. */
|
|
378
378
|
type EvalChartsConfig$1 = z.infer<typeof evalChartsConfigSchema$1>; //#endregion
|
|
379
379
|
//#region ../shared/src/schemas/config.d.ts
|
|
380
|
-
/**
|
|
380
|
+
/**
|
|
381
|
+
* Built-in eval-level output/column keys.
|
|
382
|
+
*
|
|
383
|
+
* `costUsd` controls the default LLM cost family: actual billed cost plus the
|
|
384
|
+
* normalized `costUsdWithoutCache` and `costUsdWarmedCache` chart outputs.
|
|
385
|
+
*/
|
|
381
386
|
declare const defaultConfigKeySchema: z.ZodEnum<{
|
|
382
387
|
apiCalls: "apiCalls";
|
|
383
388
|
costUsd: "costUsd";
|
|
@@ -2061,9 +2066,9 @@ declare const traceAttributeDisplaySchema: z.ZodObject<{
|
|
|
2061
2066
|
subtree: "subtree";
|
|
2062
2067
|
}>>;
|
|
2063
2068
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
2064
|
-
sum: "sum";
|
|
2065
2069
|
all: "all";
|
|
2066
2070
|
last: "last";
|
|
2071
|
+
sum: "sum";
|
|
2067
2072
|
}>>;
|
|
2068
2073
|
}, z.core.$strip>;
|
|
2069
2074
|
/**
|
|
@@ -2097,9 +2102,9 @@ declare const traceDisplayConfigSchema: z.ZodObject<{
|
|
|
2097
2102
|
subtree: "subtree";
|
|
2098
2103
|
}>>;
|
|
2099
2104
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
2100
|
-
sum: "sum";
|
|
2101
2105
|
all: "all";
|
|
2102
2106
|
last: "last";
|
|
2107
|
+
sum: "sum";
|
|
2103
2108
|
}>>;
|
|
2104
2109
|
}, z.core.$strip>>>;
|
|
2105
2110
|
}, z.core.$strip>;
|
|
@@ -2137,9 +2142,9 @@ declare const traceAttributeDisplayInputSchema: z.ZodObject<{
|
|
|
2137
2142
|
subtree: "subtree";
|
|
2138
2143
|
}>>;
|
|
2139
2144
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
2140
|
-
sum: "sum";
|
|
2141
2145
|
all: "all";
|
|
2142
2146
|
last: "last";
|
|
2147
|
+
sum: "sum";
|
|
2143
2148
|
}>>;
|
|
2144
2149
|
transform: z.ZodOptional<z.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
2145
2150
|
}, z.core.$strip>;
|
|
@@ -2175,9 +2180,9 @@ declare const traceDisplayInputConfigSchema: z.ZodObject<{
|
|
|
2175
2180
|
subtree: "subtree";
|
|
2176
2181
|
}>>;
|
|
2177
2182
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
2178
|
-
sum: "sum";
|
|
2179
2183
|
all: "all";
|
|
2180
2184
|
last: "last";
|
|
2185
|
+
sum: "sum";
|
|
2181
2186
|
}>>;
|
|
2182
2187
|
transform: z.ZodOptional<z.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
2183
2188
|
}, z.core.$strip>>>;
|
|
@@ -2214,8 +2219,8 @@ declare const traceSpanSchema$1: z.ZodObject<{
|
|
|
2214
2219
|
status: z.ZodEnum<{
|
|
2215
2220
|
error: "error";
|
|
2216
2221
|
running: "running";
|
|
2217
|
-
cancelled: "cancelled";
|
|
2218
2222
|
ok: "ok";
|
|
2223
|
+
cancelled: "cancelled";
|
|
2219
2224
|
}>;
|
|
2220
2225
|
attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
2221
2226
|
error: z.ZodOptional<z.ZodObject<{
|
|
@@ -2260,10 +2265,10 @@ type EvalFreshnessStatus = z.infer<typeof evalFreshnessStatusSchema>;
|
|
|
2260
2265
|
* `best` selects the highest finite value and `worst` selects the lowest.
|
|
2261
2266
|
*/
|
|
2262
2267
|
declare const evalStatAggregateSchema: z.ZodEnum<{
|
|
2268
|
+
sum: "sum";
|
|
2263
2269
|
avg: "avg";
|
|
2264
2270
|
min: "min";
|
|
2265
2271
|
max: "max";
|
|
2266
|
-
sum: "sum";
|
|
2267
2272
|
best: "best";
|
|
2268
2273
|
worst: "worst";
|
|
2269
2274
|
}>;
|
|
@@ -2292,10 +2297,10 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
2292
2297
|
hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
|
|
2293
2298
|
kind: z.ZodLiteral<"duration">;
|
|
2294
2299
|
aggregate: z.ZodOptional<z.ZodEnum<{
|
|
2300
|
+
sum: "sum";
|
|
2295
2301
|
avg: "avg";
|
|
2296
2302
|
min: "min";
|
|
2297
2303
|
max: "max";
|
|
2298
|
-
sum: "sum";
|
|
2299
2304
|
best: "best";
|
|
2300
2305
|
worst: "worst";
|
|
2301
2306
|
}>>;
|
|
@@ -2303,10 +2308,10 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
2303
2308
|
hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
|
|
2304
2309
|
kind: z.ZodLiteral<"cacheHits">;
|
|
2305
2310
|
aggregate: z.ZodOptional<z.ZodEnum<{
|
|
2311
|
+
sum: "sum";
|
|
2306
2312
|
avg: "avg";
|
|
2307
2313
|
min: "min";
|
|
2308
2314
|
max: "max";
|
|
2309
|
-
sum: "sum";
|
|
2310
2315
|
best: "best";
|
|
2311
2316
|
worst: "worst";
|
|
2312
2317
|
}>>;
|
|
@@ -2316,10 +2321,10 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
2316
2321
|
key: z.ZodString;
|
|
2317
2322
|
label: z.ZodOptional<z.ZodString>;
|
|
2318
2323
|
aggregate: z.ZodEnum<{
|
|
2324
|
+
sum: "sum";
|
|
2319
2325
|
avg: "avg";
|
|
2320
2326
|
min: "min";
|
|
2321
2327
|
max: "max";
|
|
2322
|
-
sum: "sum";
|
|
2323
2328
|
best: "best";
|
|
2324
2329
|
worst: "worst";
|
|
2325
2330
|
}>;
|
|
@@ -2356,10 +2361,10 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
|
|
|
2356
2361
|
hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
|
|
2357
2362
|
kind: z.ZodLiteral<"duration">;
|
|
2358
2363
|
aggregate: z.ZodOptional<z.ZodEnum<{
|
|
2364
|
+
sum: "sum";
|
|
2359
2365
|
avg: "avg";
|
|
2360
2366
|
min: "min";
|
|
2361
2367
|
max: "max";
|
|
2362
|
-
sum: "sum";
|
|
2363
2368
|
best: "best";
|
|
2364
2369
|
worst: "worst";
|
|
2365
2370
|
}>>;
|
|
@@ -2367,10 +2372,10 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
|
|
|
2367
2372
|
hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
|
|
2368
2373
|
kind: z.ZodLiteral<"cacheHits">;
|
|
2369
2374
|
aggregate: z.ZodOptional<z.ZodEnum<{
|
|
2375
|
+
sum: "sum";
|
|
2370
2376
|
avg: "avg";
|
|
2371
2377
|
min: "min";
|
|
2372
2378
|
max: "max";
|
|
2373
|
-
sum: "sum";
|
|
2374
2379
|
best: "best";
|
|
2375
2380
|
worst: "worst";
|
|
2376
2381
|
}>>;
|
|
@@ -2380,10 +2385,10 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
|
|
|
2380
2385
|
key: z.ZodString;
|
|
2381
2386
|
label: z.ZodOptional<z.ZodString>;
|
|
2382
2387
|
aggregate: z.ZodEnum<{
|
|
2388
|
+
sum: "sum";
|
|
2383
2389
|
avg: "avg";
|
|
2384
2390
|
min: "min";
|
|
2385
2391
|
max: "max";
|
|
2386
|
-
sum: "sum";
|
|
2387
2392
|
best: "best";
|
|
2388
2393
|
worst: "worst";
|
|
2389
2394
|
}>;
|
|
@@ -2466,10 +2471,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2466
2471
|
caseIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
2467
2472
|
lastRunStatus: z.ZodNullable<z.ZodEnum<{
|
|
2468
2473
|
error: "error";
|
|
2469
|
-
pass: "pass";
|
|
2470
|
-
fail: "fail";
|
|
2471
2474
|
running: "running";
|
|
2472
2475
|
cancelled: "cancelled";
|
|
2476
|
+
pass: "pass";
|
|
2477
|
+
fail: "fail";
|
|
2473
2478
|
unscored: "unscored";
|
|
2474
2479
|
}>>;
|
|
2475
2480
|
stats: z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
@@ -2483,10 +2488,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2483
2488
|
hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
|
|
2484
2489
|
kind: z.ZodLiteral<"duration">;
|
|
2485
2490
|
aggregate: z.ZodOptional<z.ZodEnum<{
|
|
2491
|
+
sum: "sum";
|
|
2486
2492
|
avg: "avg";
|
|
2487
2493
|
min: "min";
|
|
2488
2494
|
max: "max";
|
|
2489
|
-
sum: "sum";
|
|
2490
2495
|
best: "best";
|
|
2491
2496
|
worst: "worst";
|
|
2492
2497
|
}>>;
|
|
@@ -2494,10 +2499,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2494
2499
|
hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
|
|
2495
2500
|
kind: z.ZodLiteral<"cacheHits">;
|
|
2496
2501
|
aggregate: z.ZodOptional<z.ZodEnum<{
|
|
2502
|
+
sum: "sum";
|
|
2497
2503
|
avg: "avg";
|
|
2498
2504
|
min: "min";
|
|
2499
2505
|
max: "max";
|
|
2500
|
-
sum: "sum";
|
|
2501
2506
|
best: "best";
|
|
2502
2507
|
worst: "worst";
|
|
2503
2508
|
}>>;
|
|
@@ -2507,10 +2512,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2507
2512
|
key: z.ZodString;
|
|
2508
2513
|
label: z.ZodOptional<z.ZodString>;
|
|
2509
2514
|
aggregate: z.ZodEnum<{
|
|
2515
|
+
sum: "sum";
|
|
2510
2516
|
avg: "avg";
|
|
2511
2517
|
min: "min";
|
|
2512
2518
|
max: "max";
|
|
2513
|
-
sum: "sum";
|
|
2514
2519
|
best: "best";
|
|
2515
2520
|
worst: "worst";
|
|
2516
2521
|
}>;
|
|
@@ -2534,10 +2539,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2534
2539
|
accent: z.ZodOptional<z.ZodBoolean>;
|
|
2535
2540
|
}, z.core.$strip>], "kind">>>;
|
|
2536
2541
|
defaultStatAggregate: z.ZodOptional<z.ZodEnum<{
|
|
2542
|
+
sum: "sum";
|
|
2537
2543
|
avg: "avg";
|
|
2538
2544
|
min: "min";
|
|
2539
2545
|
max: "max";
|
|
2540
|
-
sum: "sum";
|
|
2541
2546
|
best: "best";
|
|
2542
2547
|
worst: "worst";
|
|
2543
2548
|
}>>;
|
|
@@ -2560,9 +2565,9 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2560
2565
|
color: z.ZodOptional<z.ZodEnum<{
|
|
2561
2566
|
success: "success";
|
|
2562
2567
|
error: "error";
|
|
2568
|
+
warning: "warning";
|
|
2563
2569
|
accent: "accent";
|
|
2564
2570
|
accentDim: "accentDim";
|
|
2565
|
-
warning: "warning";
|
|
2566
2571
|
textMuted: "textMuted";
|
|
2567
2572
|
}>>;
|
|
2568
2573
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -2573,10 +2578,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2573
2578
|
source: z.ZodLiteral<"column">;
|
|
2574
2579
|
key: z.ZodString;
|
|
2575
2580
|
aggregate: z.ZodEnum<{
|
|
2581
|
+
sum: "sum";
|
|
2576
2582
|
avg: "avg";
|
|
2577
2583
|
min: "min";
|
|
2578
2584
|
max: "max";
|
|
2579
|
-
sum: "sum";
|
|
2580
2585
|
latest: "latest";
|
|
2581
2586
|
passThresholdRate: "passThresholdRate";
|
|
2582
2587
|
}>;
|
|
@@ -2584,9 +2589,9 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2584
2589
|
color: z.ZodOptional<z.ZodEnum<{
|
|
2585
2590
|
success: "success";
|
|
2586
2591
|
error: "error";
|
|
2592
|
+
warning: "warning";
|
|
2587
2593
|
accent: "accent";
|
|
2588
2594
|
accentDim: "accentDim";
|
|
2589
|
-
warning: "warning";
|
|
2590
2595
|
textMuted: "textMuted";
|
|
2591
2596
|
}>>;
|
|
2592
2597
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -2615,10 +2620,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
|
|
|
2615
2620
|
source: z.ZodLiteral<"column">;
|
|
2616
2621
|
key: z.ZodString;
|
|
2617
2622
|
aggregate: z.ZodEnum<{
|
|
2623
|
+
sum: "sum";
|
|
2618
2624
|
avg: "avg";
|
|
2619
2625
|
min: "min";
|
|
2620
2626
|
max: "max";
|
|
2621
|
-
sum: "sum";
|
|
2622
2627
|
latest: "latest";
|
|
2623
2628
|
passThresholdRate: "passThresholdRate";
|
|
2624
2629
|
}>;
|
|
@@ -2715,10 +2720,10 @@ declare const caseRowSchema$1: z.ZodObject<{
|
|
|
2715
2720
|
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
2716
2721
|
status: z.ZodEnum<{
|
|
2717
2722
|
error: "error";
|
|
2718
|
-
pass: "pass";
|
|
2719
|
-
fail: "fail";
|
|
2720
2723
|
running: "running";
|
|
2721
2724
|
cancelled: "cancelled";
|
|
2725
|
+
pass: "pass";
|
|
2726
|
+
fail: "fail";
|
|
2722
2727
|
pending: "pending";
|
|
2723
2728
|
}>;
|
|
2724
2729
|
durationMs: z.ZodNullable<z.ZodNumber>;
|
|
@@ -2857,8 +2862,8 @@ declare const scoreTraceSchema: z.ZodObject<{
|
|
|
2857
2862
|
status: z.ZodEnum<{
|
|
2858
2863
|
error: "error";
|
|
2859
2864
|
running: "running";
|
|
2860
|
-
cancelled: "cancelled";
|
|
2861
2865
|
ok: "ok";
|
|
2866
|
+
cancelled: "cancelled";
|
|
2862
2867
|
}>;
|
|
2863
2868
|
attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
2864
2869
|
error: z.ZodOptional<z.ZodObject<{
|
|
@@ -2908,9 +2913,9 @@ declare const scoreTraceSchema: z.ZodObject<{
|
|
|
2908
2913
|
subtree: "subtree";
|
|
2909
2914
|
}>>;
|
|
2910
2915
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
2911
|
-
sum: "sum";
|
|
2912
2916
|
all: "all";
|
|
2913
2917
|
last: "last";
|
|
2918
|
+
sum: "sum";
|
|
2914
2919
|
}>>;
|
|
2915
2920
|
}, z.core.$strip>>>;
|
|
2916
2921
|
}, z.core.$strip>;
|
|
@@ -2942,10 +2947,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
2942
2947
|
tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
2943
2948
|
status: z.ZodEnum<{
|
|
2944
2949
|
error: "error";
|
|
2945
|
-
pass: "pass";
|
|
2946
|
-
fail: "fail";
|
|
2947
2950
|
running: "running";
|
|
2948
2951
|
cancelled: "cancelled";
|
|
2952
|
+
pass: "pass";
|
|
2953
|
+
fail: "fail";
|
|
2949
2954
|
pending: "pending";
|
|
2950
2955
|
}>;
|
|
2951
2956
|
input: z.ZodUnknown;
|
|
@@ -2960,8 +2965,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
2960
2965
|
status: z.ZodEnum<{
|
|
2961
2966
|
error: "error";
|
|
2962
2967
|
running: "running";
|
|
2963
|
-
cancelled: "cancelled";
|
|
2964
2968
|
ok: "ok";
|
|
2969
|
+
cancelled: "cancelled";
|
|
2965
2970
|
}>;
|
|
2966
2971
|
attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
2967
2972
|
error: z.ZodOptional<z.ZodObject<{
|
|
@@ -3011,9 +3016,9 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3011
3016
|
subtree: "subtree";
|
|
3012
3017
|
}>>;
|
|
3013
3018
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
3014
|
-
sum: "sum";
|
|
3015
3019
|
all: "all";
|
|
3016
3020
|
last: "last";
|
|
3021
|
+
sum: "sum";
|
|
3017
3022
|
}>>;
|
|
3018
3023
|
}, z.core.$strip>>>;
|
|
3019
3024
|
}, z.core.$strip>;
|
|
@@ -3029,8 +3034,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3029
3034
|
status: z.ZodEnum<{
|
|
3030
3035
|
error: "error";
|
|
3031
3036
|
running: "running";
|
|
3032
|
-
cancelled: "cancelled";
|
|
3033
3037
|
ok: "ok";
|
|
3038
|
+
cancelled: "cancelled";
|
|
3034
3039
|
}>;
|
|
3035
3040
|
attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
|
|
3036
3041
|
error: z.ZodOptional<z.ZodObject<{
|
|
@@ -3080,9 +3085,9 @@ declare const caseDetailSchema$1: z.ZodObject<{
|
|
|
3080
3085
|
subtree: "subtree";
|
|
3081
3086
|
}>>;
|
|
3082
3087
|
mode: z.ZodOptional<z.ZodEnum<{
|
|
3083
|
-
sum: "sum";
|
|
3084
3088
|
all: "all";
|
|
3085
3089
|
last: "last";
|
|
3090
|
+
sum: "sum";
|
|
3086
3091
|
}>>;
|
|
3087
3092
|
}, z.core.$strip>>>;
|
|
3088
3093
|
}, z.core.$strip>;
|
|
@@ -3269,10 +3274,10 @@ declare const evalChartBuiltinMetricSchema: z.ZodEnum<{
|
|
|
3269
3274
|
type EvalChartBuiltinMetric = z.infer<typeof evalChartBuiltinMetricSchema>;
|
|
3270
3275
|
/** Reducer applied to a numeric column across all cases of a single run. */
|
|
3271
3276
|
declare const evalChartAggregateSchema: z.ZodEnum<{
|
|
3277
|
+
sum: "sum";
|
|
3272
3278
|
avg: "avg";
|
|
3273
3279
|
min: "min";
|
|
3274
3280
|
max: "max";
|
|
3275
|
-
sum: "sum";
|
|
3276
3281
|
latest: "latest";
|
|
3277
3282
|
passThresholdRate: "passThresholdRate";
|
|
3278
3283
|
}>;
|
|
@@ -3285,9 +3290,9 @@ type EvalChartAggregate = z.infer<typeof evalChartAggregateSchema>;
|
|
|
3285
3290
|
declare const evalChartColorSchema: z.ZodEnum<{
|
|
3286
3291
|
success: "success";
|
|
3287
3292
|
error: "error";
|
|
3293
|
+
warning: "warning";
|
|
3288
3294
|
accent: "accent";
|
|
3289
3295
|
accentDim: "accentDim";
|
|
3290
|
-
warning: "warning";
|
|
3291
3296
|
textMuted: "textMuted";
|
|
3292
3297
|
}>;
|
|
3293
3298
|
/** Semantic color token resolved to a theme color by the web UI. */
|
|
@@ -3314,9 +3319,9 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3314
3319
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3315
3320
|
success: "success";
|
|
3316
3321
|
error: "error";
|
|
3322
|
+
warning: "warning";
|
|
3317
3323
|
accent: "accent";
|
|
3318
3324
|
accentDim: "accentDim";
|
|
3319
|
-
warning: "warning";
|
|
3320
3325
|
textMuted: "textMuted";
|
|
3321
3326
|
}>>;
|
|
3322
3327
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -3327,10 +3332,10 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3327
3332
|
source: z.ZodLiteral<"column">;
|
|
3328
3333
|
key: z.ZodString;
|
|
3329
3334
|
aggregate: z.ZodEnum<{
|
|
3335
|
+
sum: "sum";
|
|
3330
3336
|
avg: "avg";
|
|
3331
3337
|
min: "min";
|
|
3332
3338
|
max: "max";
|
|
3333
|
-
sum: "sum";
|
|
3334
3339
|
latest: "latest";
|
|
3335
3340
|
passThresholdRate: "passThresholdRate";
|
|
3336
3341
|
}>;
|
|
@@ -3338,9 +3343,9 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
|
|
|
3338
3343
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3339
3344
|
success: "success";
|
|
3340
3345
|
error: "error";
|
|
3346
|
+
warning: "warning";
|
|
3341
3347
|
accent: "accent";
|
|
3342
3348
|
accentDim: "accentDim";
|
|
3343
|
-
warning: "warning";
|
|
3344
3349
|
textMuted: "textMuted";
|
|
3345
3350
|
}>>;
|
|
3346
3351
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -3362,10 +3367,10 @@ declare const evalChartTooltipExtraSchema: z.ZodDiscriminatedUnion<[z.ZodObject<
|
|
|
3362
3367
|
source: z.ZodLiteral<"column">;
|
|
3363
3368
|
key: z.ZodString;
|
|
3364
3369
|
aggregate: z.ZodEnum<{
|
|
3370
|
+
sum: "sum";
|
|
3365
3371
|
avg: "avg";
|
|
3366
3372
|
min: "min";
|
|
3367
3373
|
max: "max";
|
|
3368
|
-
sum: "sum";
|
|
3369
3374
|
latest: "latest";
|
|
3370
3375
|
passThresholdRate: "passThresholdRate";
|
|
3371
3376
|
}>;
|
|
@@ -3397,9 +3402,9 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3397
3402
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3398
3403
|
success: "success";
|
|
3399
3404
|
error: "error";
|
|
3405
|
+
warning: "warning";
|
|
3400
3406
|
accent: "accent";
|
|
3401
3407
|
accentDim: "accentDim";
|
|
3402
|
-
warning: "warning";
|
|
3403
3408
|
textMuted: "textMuted";
|
|
3404
3409
|
}>>;
|
|
3405
3410
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -3410,10 +3415,10 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3410
3415
|
source: z.ZodLiteral<"column">;
|
|
3411
3416
|
key: z.ZodString;
|
|
3412
3417
|
aggregate: z.ZodEnum<{
|
|
3418
|
+
sum: "sum";
|
|
3413
3419
|
avg: "avg";
|
|
3414
3420
|
min: "min";
|
|
3415
3421
|
max: "max";
|
|
3416
|
-
sum: "sum";
|
|
3417
3422
|
latest: "latest";
|
|
3418
3423
|
passThresholdRate: "passThresholdRate";
|
|
3419
3424
|
}>;
|
|
@@ -3421,9 +3426,9 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3421
3426
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3422
3427
|
success: "success";
|
|
3423
3428
|
error: "error";
|
|
3429
|
+
warning: "warning";
|
|
3424
3430
|
accent: "accent";
|
|
3425
3431
|
accentDim: "accentDim";
|
|
3426
|
-
warning: "warning";
|
|
3427
3432
|
textMuted: "textMuted";
|
|
3428
3433
|
}>>;
|
|
3429
3434
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -3452,10 +3457,10 @@ declare const evalChartConfigSchema: z.ZodObject<{
|
|
|
3452
3457
|
source: z.ZodLiteral<"column">;
|
|
3453
3458
|
key: z.ZodString;
|
|
3454
3459
|
aggregate: z.ZodEnum<{
|
|
3460
|
+
sum: "sum";
|
|
3455
3461
|
avg: "avg";
|
|
3456
3462
|
min: "min";
|
|
3457
3463
|
max: "max";
|
|
3458
|
-
sum: "sum";
|
|
3459
3464
|
latest: "latest";
|
|
3460
3465
|
passThresholdRate: "passThresholdRate";
|
|
3461
3466
|
}>;
|
|
@@ -3487,9 +3492,9 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3487
3492
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3488
3493
|
success: "success";
|
|
3489
3494
|
error: "error";
|
|
3495
|
+
warning: "warning";
|
|
3490
3496
|
accent: "accent";
|
|
3491
3497
|
accentDim: "accentDim";
|
|
3492
|
-
warning: "warning";
|
|
3493
3498
|
textMuted: "textMuted";
|
|
3494
3499
|
}>>;
|
|
3495
3500
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -3500,10 +3505,10 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3500
3505
|
source: z.ZodLiteral<"column">;
|
|
3501
3506
|
key: z.ZodString;
|
|
3502
3507
|
aggregate: z.ZodEnum<{
|
|
3508
|
+
sum: "sum";
|
|
3503
3509
|
avg: "avg";
|
|
3504
3510
|
min: "min";
|
|
3505
3511
|
max: "max";
|
|
3506
|
-
sum: "sum";
|
|
3507
3512
|
latest: "latest";
|
|
3508
3513
|
passThresholdRate: "passThresholdRate";
|
|
3509
3514
|
}>;
|
|
@@ -3511,9 +3516,9 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3511
3516
|
color: z.ZodOptional<z.ZodEnum<{
|
|
3512
3517
|
success: "success";
|
|
3513
3518
|
error: "error";
|
|
3519
|
+
warning: "warning";
|
|
3514
3520
|
accent: "accent";
|
|
3515
3521
|
accentDim: "accentDim";
|
|
3516
|
-
warning: "warning";
|
|
3517
3522
|
textMuted: "textMuted";
|
|
3518
3523
|
}>>;
|
|
3519
3524
|
axis: z.ZodOptional<z.ZodEnum<{
|
|
@@ -3542,10 +3547,10 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
|
|
|
3542
3547
|
source: z.ZodLiteral<"column">;
|
|
3543
3548
|
key: z.ZodString;
|
|
3544
3549
|
aggregate: z.ZodEnum<{
|
|
3550
|
+
sum: "sum";
|
|
3545
3551
|
avg: "avg";
|
|
3546
3552
|
min: "min";
|
|
3547
3553
|
max: "max";
|
|
3548
|
-
sum: "sum";
|
|
3549
3554
|
latest: "latest";
|
|
3550
3555
|
passThresholdRate: "passThresholdRate";
|
|
3551
3556
|
}>;
|
|
@@ -3573,8 +3578,8 @@ declare const runManifestSchema$1: z.ZodObject<{
|
|
|
3573
3578
|
evalSourceFingerprints: z.ZodDefault<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>>;
|
|
3574
3579
|
target: z.ZodObject<{
|
|
3575
3580
|
mode: z.ZodEnum<{
|
|
3576
|
-
caseIds: "caseIds";
|
|
3577
3581
|
all: "all";
|
|
3582
|
+
caseIds: "caseIds";
|
|
3578
3583
|
evalIds: "evalIds";
|
|
3579
3584
|
}>;
|
|
3580
3585
|
evalKeys: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
@@ -3684,7 +3689,12 @@ declare const trialSelectionModeSchema: z.ZodEnum<{
|
|
|
3684
3689
|
}>;
|
|
3685
3690
|
/** Strategy used to collapse repeated trials into one stored case result. */
|
|
3686
3691
|
type TrialSelectionMode = z.infer<typeof trialSelectionModeSchema>;
|
|
3687
|
-
/**
|
|
3692
|
+
/**
|
|
3693
|
+
* Built-in eval-level output/column keys.
|
|
3694
|
+
*
|
|
3695
|
+
* `costUsd` controls the default LLM cost family: actual billed cost plus the
|
|
3696
|
+
* normalized `costUsdWithoutCache` and `costUsdWarmedCache` chart outputs.
|
|
3697
|
+
*/
|
|
3688
3698
|
/** Removal config for built-in eval-level outputs and UI metadata. */
|
|
3689
3699
|
declare const removeDefaultConfigSchema: z.ZodUnion<readonly [z.ZodLiteral<true>, z.ZodArray<z.ZodEnum<{
|
|
3690
3700
|
costUsd: "costUsd";
|
|
@@ -4285,7 +4295,9 @@ type AgentEvalsConfig$1 = {
|
|
|
4285
4295
|
* Defaults are derived from trace spans using the resolved `llmCalls` and
|
|
4286
4296
|
* `apiCalls` extraction configs. Set to `true` to remove all defaults, or
|
|
4287
4297
|
* pass specific keys such as `['costUsd', 'apiCalls']` to remove only those
|
|
4288
|
-
* defaults globally.
|
|
4298
|
+
* defaults globally. Removing `costUsd` removes the whole default cost
|
|
4299
|
+
* family, including normalized no-cache and warmed-cache outputs. Per-eval
|
|
4300
|
+
* removal is additive.
|
|
4289
4301
|
*/
|
|
4290
4302
|
removeDefaultConfig?: RemoveDefaultConfig;
|
|
4291
4303
|
/**
|
|
@@ -4729,8 +4741,8 @@ declare const cacheRecordingSchema: z.ZodObject<{
|
|
|
4729
4741
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
4730
4742
|
error: "error";
|
|
4731
4743
|
running: "running";
|
|
4732
|
-
cancelled: "cancelled";
|
|
4733
4744
|
ok: "ok";
|
|
4745
|
+
cancelled: "cancelled";
|
|
4734
4746
|
}>>;
|
|
4735
4747
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
4736
4748
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -4830,8 +4842,8 @@ declare const cacheEntrySchema: z.ZodObject<{
|
|
|
4830
4842
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
4831
4843
|
error: "error";
|
|
4832
4844
|
running: "running";
|
|
4833
|
-
cancelled: "cancelled";
|
|
4834
4845
|
ok: "ok";
|
|
4846
|
+
cancelled: "cancelled";
|
|
4835
4847
|
}>>;
|
|
4836
4848
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
4837
4849
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -4948,8 +4960,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
|
|
|
4948
4960
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
4949
4961
|
error: "error";
|
|
4950
4962
|
running: "running";
|
|
4951
|
-
cancelled: "cancelled";
|
|
4952
4963
|
ok: "ok";
|
|
4964
|
+
cancelled: "cancelled";
|
|
4953
4965
|
}>>;
|
|
4954
4966
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
4955
4967
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5055,8 +5067,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5055
5067
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
5056
5068
|
error: "error";
|
|
5057
5069
|
running: "running";
|
|
5058
|
-
cancelled: "cancelled";
|
|
5059
5070
|
ok: "ok";
|
|
5071
|
+
cancelled: "cancelled";
|
|
5060
5072
|
}>>;
|
|
5061
5073
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
5062
5074
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5164,8 +5176,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
|
|
|
5164
5176
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
5165
5177
|
error: "error";
|
|
5166
5178
|
running: "running";
|
|
5167
|
-
cancelled: "cancelled";
|
|
5168
5179
|
ok: "ok";
|
|
5180
|
+
cancelled: "cancelled";
|
|
5169
5181
|
}>>;
|
|
5170
5182
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
5171
5183
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5271,8 +5283,8 @@ declare const cacheFileSchema: z.ZodObject<{
|
|
|
5271
5283
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
5272
5284
|
error: "error";
|
|
5273
5285
|
running: "running";
|
|
5274
|
-
cancelled: "cancelled";
|
|
5275
5286
|
ok: "ok";
|
|
5287
|
+
cancelled: "cancelled";
|
|
5276
5288
|
}>>;
|
|
5277
5289
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
5278
5290
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5388,8 +5400,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
|
|
|
5388
5400
|
finalStatus: z.ZodOptional<z.ZodEnum<{
|
|
5389
5401
|
error: "error";
|
|
5390
5402
|
running: "running";
|
|
5391
|
-
cancelled: "cancelled";
|
|
5392
5403
|
ok: "ok";
|
|
5404
|
+
cancelled: "cancelled";
|
|
5393
5405
|
}>>;
|
|
5394
5406
|
finalError: z.ZodOptional<z.ZodObject<{
|
|
5395
5407
|
name: z.ZodOptional<z.ZodString>;
|
|
@@ -5573,8 +5585,8 @@ type ConfigReloadState = z.infer<typeof configReloadStateSchema$1>;
|
|
|
5573
5585
|
declare const createRunRequestSchema$1: z.ZodObject<{
|
|
5574
5586
|
target: z.ZodObject<{
|
|
5575
5587
|
mode: z.ZodEnum<{
|
|
5576
|
-
caseIds: "caseIds";
|
|
5577
5588
|
all: "all";
|
|
5589
|
+
caseIds: "caseIds";
|
|
5578
5590
|
evalIds: "evalIds";
|
|
5579
5591
|
}>;
|
|
5580
5592
|
evalKeys: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as startEvalBackgroundJob, A as manualInputFileValueSchema, B as getCurrentScope, C as hashCacheKey, D as serializeCacheRecording, E as deserializeCacheValue, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as evalExpect, N as EvalAssertionError, O as serializeCacheValue, P as EvalRuntimeUsageError, Pt as getEvalRegistry, Q as setScopeCacheContext, R as evalLog, S as evalTracer, T as deserializeCacheRecording, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as captureEvalSpanError, it as extractApiCalls, j as readManualInputFile, k as repoFile, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKeySync, x as evalSpan, y as buildTraceTree, z as evalTime } from "./runExecution-C24aYsk3.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CCHcjbC1.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-C8n7QANC.mjs";
|
|
4
4
|
export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-
|
|
2
|
-
import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-
|
|
1
|
+
import { At as manualInputDescriptorSchema, I as configureEvalRunLogs, Mt as columnDefSchema, Ot as evalStatAggregateSchema, _ as createFsCacheStore, bt as runSummarySchema, et as createRunRequestSchema, jt as evalChartsConfigSchema, kt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as getCacheRetentionOptions, wt as buildEvalKey, yt as runManifestSchema } from "./runExecution-C24aYsk3.mjs";
|
|
2
|
+
import { C as parseEvalDiscovery, h as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-9XKoYcP9.mjs";
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import { readFile } from "node:fs/promises";
|
|
5
5
|
import { relative } from "node:path";
|