@ls-stack/agent-eval 0.57.0 → 0.58.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-Db_x-Rit.mjs → app-L9GdY28I.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-Cz9p4l-t.js +377 -0
- package/dist/apps/web/dist/assets/index-DtARRwsS.css +1 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/caseChild.mjs +3 -2
- package/dist/{cli-Ck0mqxd-.mjs → cli-Cf37PZKi.mjs} +7 -6
- package/dist/index.d.mts +108 -61
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +4 -3
- package/dist/{runExecution-BH7DlMXl.mjs → runExecution-C4kAOhC1.mjs} +115 -30
- package/dist/{runOrchestration-C1Ex9QI-.mjs → runOrchestration-5xEiQxiS.mjs} +1 -1
- package/dist/{runner-DbVYcapC.mjs → runner-JIykMlve.mjs} +1 -1
- package/dist/{runner-B3hEOT_I.mjs → runner-bjd_UB9i.mjs} +2 -2
- package/dist/{src-B3iq-tuv.mjs → src-303BocMW.mjs} +2 -2
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +5 -3
- package/dist/apps/web/dist/assets/index-Xa_7PteQ.css +0 -1
- package/dist/apps/web/dist/assets/index-o4o2EktS.js +0 -377
package/dist/index.d.mts
CHANGED
|
@@ -235,6 +235,18 @@ declare const assertionFailureSchema$1: z$1.ZodObject<{
|
|
|
235
235
|
}, z$1.core.$strip>;
|
|
236
236
|
/** Assertion failure metadata captured for one case run. */
|
|
237
237
|
type AssertionFailure$1 = z$1.infer<typeof assertionFailureSchema$1>;
|
|
238
|
+
/** Structured assertion result metadata captured for one case run. */
|
|
239
|
+
declare const assertionResultSchema: z$1.ZodObject<{
|
|
240
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
241
|
+
message: z$1.ZodString;
|
|
242
|
+
stack: z$1.ZodOptional<z$1.ZodString>;
|
|
243
|
+
status: z$1.ZodEnum<{
|
|
244
|
+
pass: "pass";
|
|
245
|
+
fail: "fail";
|
|
246
|
+
}>;
|
|
247
|
+
}, z$1.core.$strip>;
|
|
248
|
+
/** Assertion result metadata captured for one case run. */
|
|
249
|
+
type AssertionResult = z$1.infer<typeof assertionResultSchema>;
|
|
238
250
|
/** Severity level for one log captured during a case run. */
|
|
239
251
|
declare const runLogLevelSchema$1: z$1.ZodEnum<{
|
|
240
252
|
error: "error";
|
|
@@ -1462,7 +1474,8 @@ type EvalCaseScope = {
|
|
|
1462
1474
|
input?: unknown; /** Effective tags for the current case. */
|
|
1463
1475
|
tags: string[];
|
|
1464
1476
|
outputs: Record<string, unknown>; /** Runtime display overrides recorded by output helpers for this case. */
|
|
1465
|
-
outputColumnOverrides: Record<string, EvalColumnOverride>; /** Structured assertion
|
|
1477
|
+
outputColumnOverrides: Record<string, EvalColumnOverride>; /** Structured assertion results recorded for the current case. */
|
|
1478
|
+
assertions: AssertionResult[]; /** Structured assertion failures recorded for the current case. */
|
|
1466
1479
|
assertionFailures: AssertionFailure$1[]; /** Logs captured from manual `evalLog(...)` calls and enabled console calls. */
|
|
1467
1480
|
logs: RunLogEntry$1[];
|
|
1468
1481
|
spans: EvalTraceSpan$2[];
|
|
@@ -2004,8 +2017,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
|
|
|
2004
2017
|
subtree: "subtree";
|
|
2005
2018
|
}>>;
|
|
2006
2019
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2007
|
-
sum: "sum";
|
|
2008
2020
|
all: "all";
|
|
2021
|
+
sum: "sum";
|
|
2009
2022
|
last: "last";
|
|
2010
2023
|
}>>;
|
|
2011
2024
|
}, z$1.core.$strip>;
|
|
@@ -2040,8 +2053,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
|
|
|
2040
2053
|
subtree: "subtree";
|
|
2041
2054
|
}>>;
|
|
2042
2055
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2043
|
-
sum: "sum";
|
|
2044
2056
|
all: "all";
|
|
2057
|
+
sum: "sum";
|
|
2045
2058
|
last: "last";
|
|
2046
2059
|
}>>;
|
|
2047
2060
|
}, z$1.core.$strip>>>;
|
|
@@ -2080,8 +2093,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
|
|
|
2080
2093
|
subtree: "subtree";
|
|
2081
2094
|
}>>;
|
|
2082
2095
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2083
|
-
sum: "sum";
|
|
2084
2096
|
all: "all";
|
|
2097
|
+
sum: "sum";
|
|
2085
2098
|
last: "last";
|
|
2086
2099
|
}>>;
|
|
2087
2100
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
@@ -2118,8 +2131,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
|
|
|
2118
2131
|
subtree: "subtree";
|
|
2119
2132
|
}>>;
|
|
2120
2133
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2121
|
-
sum: "sum";
|
|
2122
2134
|
all: "all";
|
|
2135
|
+
sum: "sum";
|
|
2123
2136
|
last: "last";
|
|
2124
2137
|
}>>;
|
|
2125
2138
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
@@ -2204,9 +2217,9 @@ type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
|
|
|
2204
2217
|
*/
|
|
2205
2218
|
declare const evalStatAggregateSchema: z$1.ZodEnum<{
|
|
2206
2219
|
avg: "avg";
|
|
2220
|
+
sum: "sum";
|
|
2207
2221
|
min: "min";
|
|
2208
2222
|
max: "max";
|
|
2209
|
-
sum: "sum";
|
|
2210
2223
|
best: "best";
|
|
2211
2224
|
worst: "worst";
|
|
2212
2225
|
}>;
|
|
@@ -2236,9 +2249,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2236
2249
|
kind: z$1.ZodLiteral<"duration">;
|
|
2237
2250
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2238
2251
|
avg: "avg";
|
|
2252
|
+
sum: "sum";
|
|
2239
2253
|
min: "min";
|
|
2240
2254
|
max: "max";
|
|
2241
|
-
sum: "sum";
|
|
2242
2255
|
best: "best";
|
|
2243
2256
|
worst: "worst";
|
|
2244
2257
|
}>>;
|
|
@@ -2247,9 +2260,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2247
2260
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2248
2261
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2249
2262
|
avg: "avg";
|
|
2263
|
+
sum: "sum";
|
|
2250
2264
|
min: "min";
|
|
2251
2265
|
max: "max";
|
|
2252
|
-
sum: "sum";
|
|
2253
2266
|
best: "best";
|
|
2254
2267
|
worst: "worst";
|
|
2255
2268
|
}>>;
|
|
@@ -2260,9 +2273,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
2260
2273
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2261
2274
|
aggregate: z$1.ZodEnum<{
|
|
2262
2275
|
avg: "avg";
|
|
2276
|
+
sum: "sum";
|
|
2263
2277
|
min: "min";
|
|
2264
2278
|
max: "max";
|
|
2265
|
-
sum: "sum";
|
|
2266
2279
|
best: "best";
|
|
2267
2280
|
worst: "worst";
|
|
2268
2281
|
}>;
|
|
@@ -2300,9 +2313,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2300
2313
|
kind: z$1.ZodLiteral<"duration">;
|
|
2301
2314
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2302
2315
|
avg: "avg";
|
|
2316
|
+
sum: "sum";
|
|
2303
2317
|
min: "min";
|
|
2304
2318
|
max: "max";
|
|
2305
|
-
sum: "sum";
|
|
2306
2319
|
best: "best";
|
|
2307
2320
|
worst: "worst";
|
|
2308
2321
|
}>>;
|
|
@@ -2311,9 +2324,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2311
2324
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2312
2325
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2313
2326
|
avg: "avg";
|
|
2327
|
+
sum: "sum";
|
|
2314
2328
|
min: "min";
|
|
2315
2329
|
max: "max";
|
|
2316
|
-
sum: "sum";
|
|
2317
2330
|
best: "best";
|
|
2318
2331
|
worst: "worst";
|
|
2319
2332
|
}>>;
|
|
@@ -2324,9 +2337,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
2324
2337
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2325
2338
|
aggregate: z$1.ZodEnum<{
|
|
2326
2339
|
avg: "avg";
|
|
2340
|
+
sum: "sum";
|
|
2327
2341
|
min: "min";
|
|
2328
2342
|
max: "max";
|
|
2329
|
-
sum: "sum";
|
|
2330
2343
|
best: "best";
|
|
2331
2344
|
worst: "worst";
|
|
2332
2345
|
}>;
|
|
@@ -2409,10 +2422,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2409
2422
|
caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2410
2423
|
lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
|
|
2411
2424
|
error: "error";
|
|
2412
|
-
pass: "pass";
|
|
2413
|
-
fail: "fail";
|
|
2414
2425
|
running: "running";
|
|
2415
2426
|
cancelled: "cancelled";
|
|
2427
|
+
pass: "pass";
|
|
2428
|
+
fail: "fail";
|
|
2416
2429
|
unscored: "unscored";
|
|
2417
2430
|
}>>;
|
|
2418
2431
|
stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
@@ -2427,9 +2440,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2427
2440
|
kind: z$1.ZodLiteral<"duration">;
|
|
2428
2441
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2429
2442
|
avg: "avg";
|
|
2443
|
+
sum: "sum";
|
|
2430
2444
|
min: "min";
|
|
2431
2445
|
max: "max";
|
|
2432
|
-
sum: "sum";
|
|
2433
2446
|
best: "best";
|
|
2434
2447
|
worst: "worst";
|
|
2435
2448
|
}>>;
|
|
@@ -2438,9 +2451,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2438
2451
|
kind: z$1.ZodLiteral<"cacheHits">;
|
|
2439
2452
|
aggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2440
2453
|
avg: "avg";
|
|
2454
|
+
sum: "sum";
|
|
2441
2455
|
min: "min";
|
|
2442
2456
|
max: "max";
|
|
2443
|
-
sum: "sum";
|
|
2444
2457
|
best: "best";
|
|
2445
2458
|
worst: "worst";
|
|
2446
2459
|
}>>;
|
|
@@ -2451,9 +2464,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2451
2464
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2452
2465
|
aggregate: z$1.ZodEnum<{
|
|
2453
2466
|
avg: "avg";
|
|
2467
|
+
sum: "sum";
|
|
2454
2468
|
min: "min";
|
|
2455
2469
|
max: "max";
|
|
2456
|
-
sum: "sum";
|
|
2457
2470
|
best: "best";
|
|
2458
2471
|
worst: "worst";
|
|
2459
2472
|
}>;
|
|
@@ -2478,9 +2491,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2478
2491
|
}, z$1.core.$strip>], "kind">>>;
|
|
2479
2492
|
defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2480
2493
|
avg: "avg";
|
|
2494
|
+
sum: "sum";
|
|
2481
2495
|
min: "min";
|
|
2482
2496
|
max: "max";
|
|
2483
|
-
sum: "sum";
|
|
2484
2497
|
best: "best";
|
|
2485
2498
|
worst: "worst";
|
|
2486
2499
|
}>>;
|
|
@@ -2517,9 +2530,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2517
2530
|
key: z$1.ZodString;
|
|
2518
2531
|
aggregate: z$1.ZodEnum<{
|
|
2519
2532
|
avg: "avg";
|
|
2533
|
+
sum: "sum";
|
|
2520
2534
|
min: "min";
|
|
2521
2535
|
max: "max";
|
|
2522
|
-
sum: "sum";
|
|
2523
2536
|
latest: "latest";
|
|
2524
2537
|
passThresholdRate: "passThresholdRate";
|
|
2525
2538
|
}>;
|
|
@@ -2559,9 +2572,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
|
|
|
2559
2572
|
key: z$1.ZodString;
|
|
2560
2573
|
aggregate: z$1.ZodEnum<{
|
|
2561
2574
|
avg: "avg";
|
|
2575
|
+
sum: "sum";
|
|
2562
2576
|
min: "min";
|
|
2563
2577
|
max: "max";
|
|
2564
|
-
sum: "sum";
|
|
2565
2578
|
latest: "latest";
|
|
2566
2579
|
passThresholdRate: "passThresholdRate";
|
|
2567
2580
|
}>;
|
|
@@ -2658,11 +2671,11 @@ declare const caseRowSchema$1: z$1.ZodObject<{
|
|
|
2658
2671
|
tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2659
2672
|
status: z$1.ZodEnum<{
|
|
2660
2673
|
error: "error";
|
|
2661
|
-
|
|
2662
|
-
fail: "fail";
|
|
2674
|
+
pending: "pending";
|
|
2663
2675
|
running: "running";
|
|
2664
2676
|
cancelled: "cancelled";
|
|
2665
|
-
|
|
2677
|
+
pass: "pass";
|
|
2678
|
+
fail: "fail";
|
|
2666
2679
|
}>;
|
|
2667
2680
|
durationMs: z$1.ZodNullable<z$1.ZodNumber>;
|
|
2668
2681
|
cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
|
|
@@ -2729,6 +2742,7 @@ declare const assertionFailureSchema: z$1.ZodObject<{
|
|
|
2729
2742
|
}, z$1.core.$strip>;
|
|
2730
2743
|
/** Assertion failure metadata captured for one case run. */
|
|
2731
2744
|
type AssertionFailure = z$1.infer<typeof assertionFailureSchema>;
|
|
2745
|
+
/** Pass/fail outcome for one recorded eval assertion. */
|
|
2732
2746
|
/** Severity level for one log captured during a case run. */
|
|
2733
2747
|
declare const runLogLevelSchema: z$1.ZodEnum<{
|
|
2734
2748
|
error: "error";
|
|
@@ -2848,8 +2862,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2848
2862
|
subtree: "subtree";
|
|
2849
2863
|
}>>;
|
|
2850
2864
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2851
|
-
sum: "sum";
|
|
2852
2865
|
all: "all";
|
|
2866
|
+
sum: "sum";
|
|
2853
2867
|
last: "last";
|
|
2854
2868
|
}>>;
|
|
2855
2869
|
}, z$1.core.$strip>>>;
|
|
@@ -2860,10 +2874,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
|
|
|
2860
2874
|
namespace: z$1.ZodString;
|
|
2861
2875
|
key: z$1.ZodString;
|
|
2862
2876
|
status: z$1.ZodEnum<{
|
|
2877
|
+
bypass: "bypass";
|
|
2878
|
+
refresh: "refresh";
|
|
2863
2879
|
hit: "hit";
|
|
2864
2880
|
miss: "miss";
|
|
2865
|
-
refresh: "refresh";
|
|
2866
|
-
bypass: "bypass";
|
|
2867
2881
|
}>;
|
|
2868
2882
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2869
2883
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -2882,11 +2896,11 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2882
2896
|
tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2883
2897
|
status: z$1.ZodEnum<{
|
|
2884
2898
|
error: "error";
|
|
2885
|
-
|
|
2886
|
-
fail: "fail";
|
|
2899
|
+
pending: "pending";
|
|
2887
2900
|
running: "running";
|
|
2888
2901
|
cancelled: "cancelled";
|
|
2889
|
-
|
|
2902
|
+
pass: "pass";
|
|
2903
|
+
fail: "fail";
|
|
2890
2904
|
}>;
|
|
2891
2905
|
input: z$1.ZodUnknown;
|
|
2892
2906
|
trace: z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -2951,8 +2965,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
2951
2965
|
subtree: "subtree";
|
|
2952
2966
|
}>>;
|
|
2953
2967
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2954
|
-
sum: "sum";
|
|
2955
2968
|
all: "all";
|
|
2969
|
+
sum: "sum";
|
|
2956
2970
|
last: "last";
|
|
2957
2971
|
}>>;
|
|
2958
2972
|
}, z$1.core.$strip>>>;
|
|
@@ -3020,8 +3034,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3020
3034
|
subtree: "subtree";
|
|
3021
3035
|
}>>;
|
|
3022
3036
|
mode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3023
|
-
sum: "sum";
|
|
3024
3037
|
all: "all";
|
|
3038
|
+
sum: "sum";
|
|
3025
3039
|
last: "last";
|
|
3026
3040
|
}>>;
|
|
3027
3041
|
}, z$1.core.$strip>>>;
|
|
@@ -3032,10 +3046,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3032
3046
|
namespace: z$1.ZodString;
|
|
3033
3047
|
key: z$1.ZodString;
|
|
3034
3048
|
status: z$1.ZodEnum<{
|
|
3049
|
+
bypass: "bypass";
|
|
3050
|
+
refresh: "refresh";
|
|
3035
3051
|
hit: "hit";
|
|
3036
3052
|
miss: "miss";
|
|
3037
|
-
refresh: "refresh";
|
|
3038
|
-
bypass: "bypass";
|
|
3039
3053
|
}>;
|
|
3040
3054
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
3041
3055
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -3092,6 +3106,20 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3092
3106
|
right: "right";
|
|
3093
3107
|
}>>;
|
|
3094
3108
|
}, z$1.core.$strip>>>;
|
|
3109
|
+
assertions: z$1.ZodOptional<z$1.ZodArray<z$1.ZodUnion<readonly [z$1.ZodObject<{
|
|
3110
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
3111
|
+
message: z$1.ZodString;
|
|
3112
|
+
stack: z$1.ZodOptional<z$1.ZodString>;
|
|
3113
|
+
status: z$1.ZodEnum<{
|
|
3114
|
+
pass: "pass";
|
|
3115
|
+
fail: "fail";
|
|
3116
|
+
}>;
|
|
3117
|
+
}, z$1.core.$strip>, z$1.ZodPipe<z$1.ZodString, z$1.ZodTransform<{
|
|
3118
|
+
message: string;
|
|
3119
|
+
status: "pass" | "fail";
|
|
3120
|
+
name?: string | undefined;
|
|
3121
|
+
stack?: string | undefined;
|
|
3122
|
+
}, string>>]>>>;
|
|
3095
3123
|
assertionFailures: z$1.ZodArray<z$1.ZodUnion<readonly [z$1.ZodObject<{
|
|
3096
3124
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
3097
3125
|
message: z$1.ZodString;
|
|
@@ -3138,10 +3166,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
|
|
|
3138
3166
|
namespace: z$1.ZodString;
|
|
3139
3167
|
key: z$1.ZodString;
|
|
3140
3168
|
status: z$1.ZodEnum<{
|
|
3169
|
+
bypass: "bypass";
|
|
3170
|
+
refresh: "refresh";
|
|
3141
3171
|
hit: "hit";
|
|
3142
3172
|
miss: "miss";
|
|
3143
|
-
refresh: "refresh";
|
|
3144
|
-
bypass: "bypass";
|
|
3145
3173
|
}>;
|
|
3146
3174
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
3147
3175
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -3195,9 +3223,9 @@ type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
|
|
|
3195
3223
|
/** Reducer applied to a numeric column across all cases of a single run. */
|
|
3196
3224
|
declare const evalChartAggregateSchema: z$1.ZodEnum<{
|
|
3197
3225
|
avg: "avg";
|
|
3226
|
+
sum: "sum";
|
|
3198
3227
|
min: "min";
|
|
3199
3228
|
max: "max";
|
|
3200
|
-
sum: "sum";
|
|
3201
3229
|
latest: "latest";
|
|
3202
3230
|
passThresholdRate: "passThresholdRate";
|
|
3203
3231
|
}>;
|
|
@@ -3253,9 +3281,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
3253
3281
|
key: z$1.ZodString;
|
|
3254
3282
|
aggregate: z$1.ZodEnum<{
|
|
3255
3283
|
avg: "avg";
|
|
3284
|
+
sum: "sum";
|
|
3256
3285
|
min: "min";
|
|
3257
3286
|
max: "max";
|
|
3258
|
-
sum: "sum";
|
|
3259
3287
|
latest: "latest";
|
|
3260
3288
|
passThresholdRate: "passThresholdRate";
|
|
3261
3289
|
}>;
|
|
@@ -3288,9 +3316,9 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
|
|
|
3288
3316
|
key: z$1.ZodString;
|
|
3289
3317
|
aggregate: z$1.ZodEnum<{
|
|
3290
3318
|
avg: "avg";
|
|
3319
|
+
sum: "sum";
|
|
3291
3320
|
min: "min";
|
|
3292
3321
|
max: "max";
|
|
3293
|
-
sum: "sum";
|
|
3294
3322
|
latest: "latest";
|
|
3295
3323
|
passThresholdRate: "passThresholdRate";
|
|
3296
3324
|
}>;
|
|
@@ -3336,9 +3364,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3336
3364
|
key: z$1.ZodString;
|
|
3337
3365
|
aggregate: z$1.ZodEnum<{
|
|
3338
3366
|
avg: "avg";
|
|
3367
|
+
sum: "sum";
|
|
3339
3368
|
min: "min";
|
|
3340
3369
|
max: "max";
|
|
3341
|
-
sum: "sum";
|
|
3342
3370
|
latest: "latest";
|
|
3343
3371
|
passThresholdRate: "passThresholdRate";
|
|
3344
3372
|
}>;
|
|
@@ -3378,9 +3406,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
|
3378
3406
|
key: z$1.ZodString;
|
|
3379
3407
|
aggregate: z$1.ZodEnum<{
|
|
3380
3408
|
avg: "avg";
|
|
3409
|
+
sum: "sum";
|
|
3381
3410
|
min: "min";
|
|
3382
3411
|
max: "max";
|
|
3383
|
-
sum: "sum";
|
|
3384
3412
|
latest: "latest";
|
|
3385
3413
|
passThresholdRate: "passThresholdRate";
|
|
3386
3414
|
}>;
|
|
@@ -3426,9 +3454,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3426
3454
|
key: z$1.ZodString;
|
|
3427
3455
|
aggregate: z$1.ZodEnum<{
|
|
3428
3456
|
avg: "avg";
|
|
3457
|
+
sum: "sum";
|
|
3429
3458
|
min: "min";
|
|
3430
3459
|
max: "max";
|
|
3431
|
-
sum: "sum";
|
|
3432
3460
|
latest: "latest";
|
|
3433
3461
|
passThresholdRate: "passThresholdRate";
|
|
3434
3462
|
}>;
|
|
@@ -3468,9 +3496,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
|
3468
3496
|
key: z$1.ZodString;
|
|
3469
3497
|
aggregate: z$1.ZodEnum<{
|
|
3470
3498
|
avg: "avg";
|
|
3499
|
+
sum: "sum";
|
|
3471
3500
|
min: "min";
|
|
3472
3501
|
max: "max";
|
|
3473
|
-
sum: "sum";
|
|
3474
3502
|
latest: "latest";
|
|
3475
3503
|
passThresholdRate: "passThresholdRate";
|
|
3476
3504
|
}>;
|
|
@@ -3486,10 +3514,10 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3486
3514
|
shortId: z$1.ZodString;
|
|
3487
3515
|
status: z$1.ZodEnum<{
|
|
3488
3516
|
error: "error";
|
|
3489
|
-
running: "running";
|
|
3490
|
-
cancelled: "cancelled";
|
|
3491
3517
|
pending: "pending";
|
|
3518
|
+
running: "running";
|
|
3492
3519
|
completed: "completed";
|
|
3520
|
+
cancelled: "cancelled";
|
|
3493
3521
|
}>;
|
|
3494
3522
|
temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
|
|
3495
3523
|
startedAt: z$1.ZodString;
|
|
@@ -3498,9 +3526,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3498
3526
|
evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
|
|
3499
3527
|
target: z$1.ZodObject<{
|
|
3500
3528
|
mode: z$1.ZodEnum<{
|
|
3501
|
-
caseIds: "caseIds";
|
|
3502
3529
|
all: "all";
|
|
3503
3530
|
evalIds: "evalIds";
|
|
3531
|
+
caseIds: "caseIds";
|
|
3504
3532
|
}>;
|
|
3505
3533
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
3506
3534
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -3514,9 +3542,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
|
|
|
3514
3542
|
median: "median";
|
|
3515
3543
|
}>>>;
|
|
3516
3544
|
cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
|
|
3517
|
-
refresh: "refresh";
|
|
3518
|
-
bypass: "bypass";
|
|
3519
3545
|
use: "use";
|
|
3546
|
+
bypass: "bypass";
|
|
3547
|
+
refresh: "refresh";
|
|
3520
3548
|
}>>;
|
|
3521
3549
|
}, z$1.core.$strip>;
|
|
3522
3550
|
/** Persisted lifecycle metadata for a single eval run. */
|
|
@@ -3526,10 +3554,10 @@ declare const runSummarySchema$1: z$1.ZodObject<{
|
|
|
3526
3554
|
runId: z$1.ZodString;
|
|
3527
3555
|
status: z$1.ZodEnum<{
|
|
3528
3556
|
error: "error";
|
|
3529
|
-
running: "running";
|
|
3530
|
-
cancelled: "cancelled";
|
|
3531
3557
|
pending: "pending";
|
|
3558
|
+
running: "running";
|
|
3532
3559
|
completed: "completed";
|
|
3560
|
+
cancelled: "cancelled";
|
|
3533
3561
|
}>;
|
|
3534
3562
|
totalCases: z$1.ZodNumber;
|
|
3535
3563
|
passedCases: z$1.ZodNumber;
|
|
@@ -4226,7 +4254,12 @@ type AgentEvalsConfig$1 = {
|
|
|
4226
4254
|
* cache entries. Defaults to `5000`; non-positive or non-finite values use
|
|
4227
4255
|
* the default.
|
|
4228
4256
|
*/
|
|
4229
|
-
pruneIdleDelayMs?: number;
|
|
4257
|
+
pruneIdleDelayMs?: number;
|
|
4258
|
+
/**
|
|
4259
|
+
* Minimum milliseconds between `lastAccessedAt` index rewrites for repeated
|
|
4260
|
+
* cache hits. Defaults to four hours. Set to `0` to record every hit.
|
|
4261
|
+
*/
|
|
4262
|
+
lastAccessedAtUpdateIntervalMs?: number; /** Legacy alias for `maxEntriesPerNamespace`, retained so older config files keep working. */
|
|
4230
4263
|
maxEntriesPerEval?: number;
|
|
4231
4264
|
};
|
|
4232
4265
|
};
|
|
@@ -4436,9 +4469,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
|
|
|
4436
4469
|
* - `refresh`: never read, always write (forces re-execution and overwrites).
|
|
4437
4470
|
*/
|
|
4438
4471
|
declare const cacheModeSchema: z$1.ZodEnum<{
|
|
4439
|
-
refresh: "refresh";
|
|
4440
|
-
bypass: "bypass";
|
|
4441
4472
|
use: "use";
|
|
4473
|
+
bypass: "bypass";
|
|
4474
|
+
refresh: "refresh";
|
|
4442
4475
|
}>;
|
|
4443
4476
|
/** Mode controlling how cached spans behave during a run. */
|
|
4444
4477
|
type CacheMode = z$1.infer<typeof cacheModeSchema>;
|
|
@@ -4459,10 +4492,10 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
|
|
|
4459
4492
|
type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
|
|
4460
4493
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4461
4494
|
declare const cacheStatusSchema: z$1.ZodEnum<{
|
|
4495
|
+
bypass: "bypass";
|
|
4496
|
+
refresh: "refresh";
|
|
4462
4497
|
hit: "hit";
|
|
4463
4498
|
miss: "miss";
|
|
4464
|
-
refresh: "refresh";
|
|
4465
|
-
bypass: "bypass";
|
|
4466
4499
|
}>;
|
|
4467
4500
|
/** Status of a cache lookup recorded on a span or case scope. */
|
|
4468
4501
|
type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
|
|
@@ -4479,10 +4512,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
|
|
|
4479
4512
|
namespace: z$1.ZodString;
|
|
4480
4513
|
key: z$1.ZodString;
|
|
4481
4514
|
status: z$1.ZodEnum<{
|
|
4515
|
+
bypass: "bypass";
|
|
4516
|
+
refresh: "refresh";
|
|
4482
4517
|
hit: "hit";
|
|
4483
4518
|
miss: "miss";
|
|
4484
|
-
refresh: "refresh";
|
|
4485
|
-
bypass: "bypass";
|
|
4486
4519
|
}>;
|
|
4487
4520
|
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
4488
4521
|
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
@@ -4496,7 +4529,7 @@ declare const cacheListItemSchema$1: z$1.ZodObject<{
|
|
|
4496
4529
|
key: z$1.ZodString;
|
|
4497
4530
|
namespace: z$1.ZodString;
|
|
4498
4531
|
storedAt: z$1.ZodString;
|
|
4499
|
-
lastAccessedAt: z$1.ZodString
|
|
4532
|
+
lastAccessedAt: z$1.ZodNullable<z$1.ZodString>;
|
|
4500
4533
|
}, z$1.core.$strip>;
|
|
4501
4534
|
/** Minimal summary row for a single cache entry. */
|
|
4502
4535
|
type CacheListItem = z$1.infer<typeof cacheListItemSchema$1>;
|
|
@@ -5434,9 +5467,9 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema$1>;
|
|
|
5434
5467
|
declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
5435
5468
|
target: z$1.ZodObject<{
|
|
5436
5469
|
mode: z$1.ZodEnum<{
|
|
5437
|
-
caseIds: "caseIds";
|
|
5438
5470
|
all: "all";
|
|
5439
5471
|
evalIds: "evalIds";
|
|
5472
|
+
caseIds: "caseIds";
|
|
5440
5473
|
}>;
|
|
5441
5474
|
evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
5442
5475
|
files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -5448,9 +5481,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
|
|
|
5448
5481
|
temporary: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
5449
5482
|
cache: z$1.ZodOptional<z$1.ZodObject<{
|
|
5450
5483
|
mode: z$1.ZodDefault<z$1.ZodEnum<{
|
|
5451
|
-
refresh: "refresh";
|
|
5452
|
-
bypass: "bypass";
|
|
5453
5484
|
use: "use";
|
|
5485
|
+
bypass: "bypass";
|
|
5486
|
+
refresh: "refresh";
|
|
5454
5487
|
}>>;
|
|
5455
5488
|
}, z$1.core.$strip>>;
|
|
5456
5489
|
manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
|
|
@@ -6302,6 +6335,20 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
6302
6335
|
right: "right";
|
|
6303
6336
|
}>>;
|
|
6304
6337
|
}, z$1.core.$strip>>>;
|
|
6338
|
+
assertions: z$1.ZodOptional<z$1.ZodArray<z$1.ZodUnion<readonly [z$1.ZodObject<{
|
|
6339
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
6340
|
+
message: z$1.ZodString;
|
|
6341
|
+
stack: z$1.ZodOptional<z$1.ZodString>;
|
|
6342
|
+
status: z$1.ZodEnum<{
|
|
6343
|
+
pass: "pass";
|
|
6344
|
+
fail: "fail";
|
|
6345
|
+
}>;
|
|
6346
|
+
}, z$1.core.$strip>, z$1.ZodPipe<z$1.ZodString, z$1.ZodTransform<{
|
|
6347
|
+
message: string;
|
|
6348
|
+
status: "pass" | "fail";
|
|
6349
|
+
name?: string | undefined;
|
|
6350
|
+
stack?: string | undefined;
|
|
6351
|
+
}, string>>]>>>;
|
|
6305
6352
|
assertionFailures: z$1.ZodArray<z$1.ZodUnion<readonly [z$1.ZodObject<{
|
|
6306
6353
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
6307
6354
|
message: z$1.ZodString;
|
|
@@ -6590,7 +6637,7 @@ declare const cacheListItemSchema: z$1.ZodObject<{
|
|
|
6590
6637
|
key: z$1.ZodString;
|
|
6591
6638
|
namespace: z$1.ZodString;
|
|
6592
6639
|
storedAt: z$1.ZodString;
|
|
6593
|
-
lastAccessedAt: z$1.ZodString
|
|
6640
|
+
lastAccessedAt: z$1.ZodNullable<z$1.ZodString>;
|
|
6594
6641
|
}, z$1.core.$strip>;
|
|
6595
6642
|
/** Minimal summary row for a single cache entry. */
|
|
6596
6643
|
type CacheListItem$1 = z$1.infer<typeof cacheListItemSchema>;
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-C4kAOhC1.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Cf37PZKi.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-303BocMW.mjs";
|
|
4
4
|
export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-
|
|
2
|
-
import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-
|
|
1
|
+
import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-C4kAOhC1.mjs";
|
|
2
|
+
import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-5xEiQxiS.mjs";
|
|
3
3
|
import { z } from "zod/v4";
|
|
4
4
|
import { readFile } from "node:fs/promises";
|
|
5
5
|
import { relative } from "node:path";
|
|
@@ -141,7 +141,8 @@ async function main() {
|
|
|
141
141
|
workspaceRoot: context.workspaceRoot,
|
|
142
142
|
dir: config.cache?.dir,
|
|
143
143
|
maxEntriesPerNamespace: config.cache?.maxEntriesPerNamespace ?? config.cache?.maxEntriesPerEval,
|
|
144
|
-
maxEntriesByNamespace: config.cache?.maxEntriesByNamespace
|
|
144
|
+
maxEntriesByNamespace: config.cache?.maxEntriesByNamespace,
|
|
145
|
+
lastAccessedAtUpdateIntervalMs: config.cache?.lastAccessedAtUpdateIntervalMs
|
|
145
146
|
});
|
|
146
147
|
const evalMetas = await discoverRunEvals({
|
|
147
148
|
config,
|