@ls-stack/agent-eval 0.57.0 → 0.58.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -235,6 +235,18 @@ declare const assertionFailureSchema$1: z$1.ZodObject<{
235
235
  }, z$1.core.$strip>;
236
236
  /** Assertion failure metadata captured for one case run. */
237
237
  type AssertionFailure$1 = z$1.infer<typeof assertionFailureSchema$1>;
238
+ /** Structured assertion result metadata captured for one case run. */
239
+ declare const assertionResultSchema: z$1.ZodObject<{
240
+ name: z$1.ZodOptional<z$1.ZodString>;
241
+ message: z$1.ZodString;
242
+ stack: z$1.ZodOptional<z$1.ZodString>;
243
+ status: z$1.ZodEnum<{
244
+ pass: "pass";
245
+ fail: "fail";
246
+ }>;
247
+ }, z$1.core.$strip>;
248
+ /** Assertion result metadata captured for one case run. */
249
+ type AssertionResult = z$1.infer<typeof assertionResultSchema>;
238
250
  /** Severity level for one log captured during a case run. */
239
251
  declare const runLogLevelSchema$1: z$1.ZodEnum<{
240
252
  error: "error";
@@ -1462,7 +1474,8 @@ type EvalCaseScope = {
1462
1474
  input?: unknown; /** Effective tags for the current case. */
1463
1475
  tags: string[];
1464
1476
  outputs: Record<string, unknown>; /** Runtime display overrides recorded by output helpers for this case. */
1465
- outputColumnOverrides: Record<string, EvalColumnOverride>; /** Structured assertion failures recorded for the current case. */
1477
+ outputColumnOverrides: Record<string, EvalColumnOverride>; /** Structured assertion results recorded for the current case. */
1478
+ assertions: AssertionResult[]; /** Structured assertion failures recorded for the current case. */
1466
1479
  assertionFailures: AssertionFailure$1[]; /** Logs captured from manual `evalLog(...)` calls and enabled console calls. */
1467
1480
  logs: RunLogEntry$1[];
1468
1481
  spans: EvalTraceSpan$2[];
@@ -2004,8 +2017,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
2004
2017
  subtree: "subtree";
2005
2018
  }>>;
2006
2019
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2007
- sum: "sum";
2008
2020
  all: "all";
2021
+ sum: "sum";
2009
2022
  last: "last";
2010
2023
  }>>;
2011
2024
  }, z$1.core.$strip>;
@@ -2040,8 +2053,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
2040
2053
  subtree: "subtree";
2041
2054
  }>>;
2042
2055
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2043
- sum: "sum";
2044
2056
  all: "all";
2057
+ sum: "sum";
2045
2058
  last: "last";
2046
2059
  }>>;
2047
2060
  }, z$1.core.$strip>>>;
@@ -2080,8 +2093,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
2080
2093
  subtree: "subtree";
2081
2094
  }>>;
2082
2095
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2083
- sum: "sum";
2084
2096
  all: "all";
2097
+ sum: "sum";
2085
2098
  last: "last";
2086
2099
  }>>;
2087
2100
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
@@ -2118,8 +2131,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
2118
2131
  subtree: "subtree";
2119
2132
  }>>;
2120
2133
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2121
- sum: "sum";
2122
2134
  all: "all";
2135
+ sum: "sum";
2123
2136
  last: "last";
2124
2137
  }>>;
2125
2138
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
@@ -2204,9 +2217,9 @@ type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
2204
2217
  */
2205
2218
  declare const evalStatAggregateSchema: z$1.ZodEnum<{
2206
2219
  avg: "avg";
2220
+ sum: "sum";
2207
2221
  min: "min";
2208
2222
  max: "max";
2209
- sum: "sum";
2210
2223
  best: "best";
2211
2224
  worst: "worst";
2212
2225
  }>;
@@ -2236,9 +2249,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2236
2249
  kind: z$1.ZodLiteral<"duration">;
2237
2250
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2238
2251
  avg: "avg";
2252
+ sum: "sum";
2239
2253
  min: "min";
2240
2254
  max: "max";
2241
- sum: "sum";
2242
2255
  best: "best";
2243
2256
  worst: "worst";
2244
2257
  }>>;
@@ -2247,9 +2260,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2247
2260
  kind: z$1.ZodLiteral<"cacheHits">;
2248
2261
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2249
2262
  avg: "avg";
2263
+ sum: "sum";
2250
2264
  min: "min";
2251
2265
  max: "max";
2252
- sum: "sum";
2253
2266
  best: "best";
2254
2267
  worst: "worst";
2255
2268
  }>>;
@@ -2260,9 +2273,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2260
2273
  label: z$1.ZodOptional<z$1.ZodString>;
2261
2274
  aggregate: z$1.ZodEnum<{
2262
2275
  avg: "avg";
2276
+ sum: "sum";
2263
2277
  min: "min";
2264
2278
  max: "max";
2265
- sum: "sum";
2266
2279
  best: "best";
2267
2280
  worst: "worst";
2268
2281
  }>;
@@ -2300,9 +2313,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2300
2313
  kind: z$1.ZodLiteral<"duration">;
2301
2314
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2302
2315
  avg: "avg";
2316
+ sum: "sum";
2303
2317
  min: "min";
2304
2318
  max: "max";
2305
- sum: "sum";
2306
2319
  best: "best";
2307
2320
  worst: "worst";
2308
2321
  }>>;
@@ -2311,9 +2324,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2311
2324
  kind: z$1.ZodLiteral<"cacheHits">;
2312
2325
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2313
2326
  avg: "avg";
2327
+ sum: "sum";
2314
2328
  min: "min";
2315
2329
  max: "max";
2316
- sum: "sum";
2317
2330
  best: "best";
2318
2331
  worst: "worst";
2319
2332
  }>>;
@@ -2324,9 +2337,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2324
2337
  label: z$1.ZodOptional<z$1.ZodString>;
2325
2338
  aggregate: z$1.ZodEnum<{
2326
2339
  avg: "avg";
2340
+ sum: "sum";
2327
2341
  min: "min";
2328
2342
  max: "max";
2329
- sum: "sum";
2330
2343
  best: "best";
2331
2344
  worst: "worst";
2332
2345
  }>;
@@ -2409,10 +2422,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2409
2422
  caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2410
2423
  lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
2411
2424
  error: "error";
2412
- pass: "pass";
2413
- fail: "fail";
2414
2425
  running: "running";
2415
2426
  cancelled: "cancelled";
2427
+ pass: "pass";
2428
+ fail: "fail";
2416
2429
  unscored: "unscored";
2417
2430
  }>>;
2418
2431
  stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
@@ -2427,9 +2440,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2427
2440
  kind: z$1.ZodLiteral<"duration">;
2428
2441
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2429
2442
  avg: "avg";
2443
+ sum: "sum";
2430
2444
  min: "min";
2431
2445
  max: "max";
2432
- sum: "sum";
2433
2446
  best: "best";
2434
2447
  worst: "worst";
2435
2448
  }>>;
@@ -2438,9 +2451,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2438
2451
  kind: z$1.ZodLiteral<"cacheHits">;
2439
2452
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2440
2453
  avg: "avg";
2454
+ sum: "sum";
2441
2455
  min: "min";
2442
2456
  max: "max";
2443
- sum: "sum";
2444
2457
  best: "best";
2445
2458
  worst: "worst";
2446
2459
  }>>;
@@ -2451,9 +2464,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2451
2464
  label: z$1.ZodOptional<z$1.ZodString>;
2452
2465
  aggregate: z$1.ZodEnum<{
2453
2466
  avg: "avg";
2467
+ sum: "sum";
2454
2468
  min: "min";
2455
2469
  max: "max";
2456
- sum: "sum";
2457
2470
  best: "best";
2458
2471
  worst: "worst";
2459
2472
  }>;
@@ -2478,9 +2491,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2478
2491
  }, z$1.core.$strip>], "kind">>>;
2479
2492
  defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
2480
2493
  avg: "avg";
2494
+ sum: "sum";
2481
2495
  min: "min";
2482
2496
  max: "max";
2483
- sum: "sum";
2484
2497
  best: "best";
2485
2498
  worst: "worst";
2486
2499
  }>>;
@@ -2517,9 +2530,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2517
2530
  key: z$1.ZodString;
2518
2531
  aggregate: z$1.ZodEnum<{
2519
2532
  avg: "avg";
2533
+ sum: "sum";
2520
2534
  min: "min";
2521
2535
  max: "max";
2522
- sum: "sum";
2523
2536
  latest: "latest";
2524
2537
  passThresholdRate: "passThresholdRate";
2525
2538
  }>;
@@ -2559,9 +2572,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2559
2572
  key: z$1.ZodString;
2560
2573
  aggregate: z$1.ZodEnum<{
2561
2574
  avg: "avg";
2575
+ sum: "sum";
2562
2576
  min: "min";
2563
2577
  max: "max";
2564
- sum: "sum";
2565
2578
  latest: "latest";
2566
2579
  passThresholdRate: "passThresholdRate";
2567
2580
  }>;
@@ -2658,11 +2671,11 @@ declare const caseRowSchema$1: z$1.ZodObject<{
2658
2671
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2659
2672
  status: z$1.ZodEnum<{
2660
2673
  error: "error";
2661
- pass: "pass";
2662
- fail: "fail";
2674
+ pending: "pending";
2663
2675
  running: "running";
2664
2676
  cancelled: "cancelled";
2665
- pending: "pending";
2677
+ pass: "pass";
2678
+ fail: "fail";
2666
2679
  }>;
2667
2680
  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
2668
2681
  cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
@@ -2729,6 +2742,7 @@ declare const assertionFailureSchema: z$1.ZodObject<{
2729
2742
  }, z$1.core.$strip>;
2730
2743
  /** Assertion failure metadata captured for one case run. */
2731
2744
  type AssertionFailure = z$1.infer<typeof assertionFailureSchema>;
2745
+ /** Pass/fail outcome for one recorded eval assertion. */
2732
2746
  /** Severity level for one log captured during a case run. */
2733
2747
  declare const runLogLevelSchema: z$1.ZodEnum<{
2734
2748
  error: "error";
@@ -2848,8 +2862,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2848
2862
  subtree: "subtree";
2849
2863
  }>>;
2850
2864
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2851
- sum: "sum";
2852
2865
  all: "all";
2866
+ sum: "sum";
2853
2867
  last: "last";
2854
2868
  }>>;
2855
2869
  }, z$1.core.$strip>>>;
@@ -2860,10 +2874,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2860
2874
  namespace: z$1.ZodString;
2861
2875
  key: z$1.ZodString;
2862
2876
  status: z$1.ZodEnum<{
2877
+ bypass: "bypass";
2878
+ refresh: "refresh";
2863
2879
  hit: "hit";
2864
2880
  miss: "miss";
2865
- refresh: "refresh";
2866
- bypass: "bypass";
2867
2881
  }>;
2868
2882
  read: z$1.ZodOptional<z$1.ZodBoolean>;
2869
2883
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -2882,11 +2896,11 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2882
2896
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2883
2897
  status: z$1.ZodEnum<{
2884
2898
  error: "error";
2885
- pass: "pass";
2886
- fail: "fail";
2899
+ pending: "pending";
2887
2900
  running: "running";
2888
2901
  cancelled: "cancelled";
2889
- pending: "pending";
2902
+ pass: "pass";
2903
+ fail: "fail";
2890
2904
  }>;
2891
2905
  input: z$1.ZodUnknown;
2892
2906
  trace: z$1.ZodArray<z$1.ZodObject<{
@@ -2951,8 +2965,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2951
2965
  subtree: "subtree";
2952
2966
  }>>;
2953
2967
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2954
- sum: "sum";
2955
2968
  all: "all";
2969
+ sum: "sum";
2956
2970
  last: "last";
2957
2971
  }>>;
2958
2972
  }, z$1.core.$strip>>>;
@@ -3020,8 +3034,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3020
3034
  subtree: "subtree";
3021
3035
  }>>;
3022
3036
  mode: z$1.ZodOptional<z$1.ZodEnum<{
3023
- sum: "sum";
3024
3037
  all: "all";
3038
+ sum: "sum";
3025
3039
  last: "last";
3026
3040
  }>>;
3027
3041
  }, z$1.core.$strip>>>;
@@ -3032,10 +3046,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3032
3046
  namespace: z$1.ZodString;
3033
3047
  key: z$1.ZodString;
3034
3048
  status: z$1.ZodEnum<{
3049
+ bypass: "bypass";
3050
+ refresh: "refresh";
3035
3051
  hit: "hit";
3036
3052
  miss: "miss";
3037
- refresh: "refresh";
3038
- bypass: "bypass";
3039
3053
  }>;
3040
3054
  read: z$1.ZodOptional<z$1.ZodBoolean>;
3041
3055
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -3092,6 +3106,20 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3092
3106
  right: "right";
3093
3107
  }>>;
3094
3108
  }, z$1.core.$strip>>>;
3109
+ assertions: z$1.ZodOptional<z$1.ZodArray<z$1.ZodUnion<readonly [z$1.ZodObject<{
3110
+ name: z$1.ZodOptional<z$1.ZodString>;
3111
+ message: z$1.ZodString;
3112
+ stack: z$1.ZodOptional<z$1.ZodString>;
3113
+ status: z$1.ZodEnum<{
3114
+ pass: "pass";
3115
+ fail: "fail";
3116
+ }>;
3117
+ }, z$1.core.$strip>, z$1.ZodPipe<z$1.ZodString, z$1.ZodTransform<{
3118
+ message: string;
3119
+ status: "pass" | "fail";
3120
+ name?: string | undefined;
3121
+ stack?: string | undefined;
3122
+ }, string>>]>>>;
3095
3123
  assertionFailures: z$1.ZodArray<z$1.ZodUnion<readonly [z$1.ZodObject<{
3096
3124
  name: z$1.ZodOptional<z$1.ZodString>;
3097
3125
  message: z$1.ZodString;
@@ -3138,10 +3166,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3138
3166
  namespace: z$1.ZodString;
3139
3167
  key: z$1.ZodString;
3140
3168
  status: z$1.ZodEnum<{
3169
+ bypass: "bypass";
3170
+ refresh: "refresh";
3141
3171
  hit: "hit";
3142
3172
  miss: "miss";
3143
- refresh: "refresh";
3144
- bypass: "bypass";
3145
3173
  }>;
3146
3174
  read: z$1.ZodOptional<z$1.ZodBoolean>;
3147
3175
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -3195,9 +3223,9 @@ type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
3195
3223
  /** Reducer applied to a numeric column across all cases of a single run. */
3196
3224
  declare const evalChartAggregateSchema: z$1.ZodEnum<{
3197
3225
  avg: "avg";
3226
+ sum: "sum";
3198
3227
  min: "min";
3199
3228
  max: "max";
3200
- sum: "sum";
3201
3229
  latest: "latest";
3202
3230
  passThresholdRate: "passThresholdRate";
3203
3231
  }>;
@@ -3253,9 +3281,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3253
3281
  key: z$1.ZodString;
3254
3282
  aggregate: z$1.ZodEnum<{
3255
3283
  avg: "avg";
3284
+ sum: "sum";
3256
3285
  min: "min";
3257
3286
  max: "max";
3258
- sum: "sum";
3259
3287
  latest: "latest";
3260
3288
  passThresholdRate: "passThresholdRate";
3261
3289
  }>;
@@ -3288,9 +3316,9 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
3288
3316
  key: z$1.ZodString;
3289
3317
  aggregate: z$1.ZodEnum<{
3290
3318
  avg: "avg";
3319
+ sum: "sum";
3291
3320
  min: "min";
3292
3321
  max: "max";
3293
- sum: "sum";
3294
3322
  latest: "latest";
3295
3323
  passThresholdRate: "passThresholdRate";
3296
3324
  }>;
@@ -3336,9 +3364,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3336
3364
  key: z$1.ZodString;
3337
3365
  aggregate: z$1.ZodEnum<{
3338
3366
  avg: "avg";
3367
+ sum: "sum";
3339
3368
  min: "min";
3340
3369
  max: "max";
3341
- sum: "sum";
3342
3370
  latest: "latest";
3343
3371
  passThresholdRate: "passThresholdRate";
3344
3372
  }>;
@@ -3378,9 +3406,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3378
3406
  key: z$1.ZodString;
3379
3407
  aggregate: z$1.ZodEnum<{
3380
3408
  avg: "avg";
3409
+ sum: "sum";
3381
3410
  min: "min";
3382
3411
  max: "max";
3383
- sum: "sum";
3384
3412
  latest: "latest";
3385
3413
  passThresholdRate: "passThresholdRate";
3386
3414
  }>;
@@ -3426,9 +3454,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3426
3454
  key: z$1.ZodString;
3427
3455
  aggregate: z$1.ZodEnum<{
3428
3456
  avg: "avg";
3457
+ sum: "sum";
3429
3458
  min: "min";
3430
3459
  max: "max";
3431
- sum: "sum";
3432
3460
  latest: "latest";
3433
3461
  passThresholdRate: "passThresholdRate";
3434
3462
  }>;
@@ -3468,9 +3496,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3468
3496
  key: z$1.ZodString;
3469
3497
  aggregate: z$1.ZodEnum<{
3470
3498
  avg: "avg";
3499
+ sum: "sum";
3471
3500
  min: "min";
3472
3501
  max: "max";
3473
- sum: "sum";
3474
3502
  latest: "latest";
3475
3503
  passThresholdRate: "passThresholdRate";
3476
3504
  }>;
@@ -3486,10 +3514,10 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3486
3514
  shortId: z$1.ZodString;
3487
3515
  status: z$1.ZodEnum<{
3488
3516
  error: "error";
3489
- running: "running";
3490
- cancelled: "cancelled";
3491
3517
  pending: "pending";
3518
+ running: "running";
3492
3519
  completed: "completed";
3520
+ cancelled: "cancelled";
3493
3521
  }>;
3494
3522
  temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
3495
3523
  startedAt: z$1.ZodString;
@@ -3498,9 +3526,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3498
3526
  evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
3499
3527
  target: z$1.ZodObject<{
3500
3528
  mode: z$1.ZodEnum<{
3501
- caseIds: "caseIds";
3502
3529
  all: "all";
3503
3530
  evalIds: "evalIds";
3531
+ caseIds: "caseIds";
3504
3532
  }>;
3505
3533
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
3506
3534
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -3514,9 +3542,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3514
3542
  median: "median";
3515
3543
  }>>>;
3516
3544
  cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
3517
- refresh: "refresh";
3518
- bypass: "bypass";
3519
3545
  use: "use";
3546
+ bypass: "bypass";
3547
+ refresh: "refresh";
3520
3548
  }>>;
3521
3549
  }, z$1.core.$strip>;
3522
3550
  /** Persisted lifecycle metadata for a single eval run. */
@@ -3526,10 +3554,10 @@ declare const runSummarySchema$1: z$1.ZodObject<{
3526
3554
  runId: z$1.ZodString;
3527
3555
  status: z$1.ZodEnum<{
3528
3556
  error: "error";
3529
- running: "running";
3530
- cancelled: "cancelled";
3531
3557
  pending: "pending";
3558
+ running: "running";
3532
3559
  completed: "completed";
3560
+ cancelled: "cancelled";
3533
3561
  }>;
3534
3562
  totalCases: z$1.ZodNumber;
3535
3563
  passedCases: z$1.ZodNumber;
@@ -4226,7 +4254,12 @@ type AgentEvalsConfig$1 = {
4226
4254
  * cache entries. Defaults to `5000`; non-positive or non-finite values use
4227
4255
  * the default.
4228
4256
  */
4229
- pruneIdleDelayMs?: number; /** Legacy alias for `maxEntriesPerNamespace`, retained so older config files keep working. */
4257
+ pruneIdleDelayMs?: number;
4258
+ /**
4259
+ * Minimum milliseconds between `lastAccessedAt` index rewrites for repeated
4260
+ * cache hits. Defaults to four hours. Set to `0` to record every hit.
4261
+ */
4262
+ lastAccessedAtUpdateIntervalMs?: number; /** Legacy alias for `maxEntriesPerNamespace`, retained so older config files keep working. */
4230
4263
  maxEntriesPerEval?: number;
4231
4264
  };
4232
4265
  };
@@ -4436,9 +4469,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
4436
4469
  * - `refresh`: never read, always write (forces re-execution and overwrites).
4437
4470
  */
4438
4471
  declare const cacheModeSchema: z$1.ZodEnum<{
4439
- refresh: "refresh";
4440
- bypass: "bypass";
4441
4472
  use: "use";
4473
+ bypass: "bypass";
4474
+ refresh: "refresh";
4442
4475
  }>;
4443
4476
  /** Mode controlling how cached spans behave during a run. */
4444
4477
  type CacheMode = z$1.infer<typeof cacheModeSchema>;
@@ -4459,10 +4492,10 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
4459
4492
  type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
4460
4493
  /** Status of a cache lookup recorded on a span or case scope. */
4461
4494
  declare const cacheStatusSchema: z$1.ZodEnum<{
4495
+ bypass: "bypass";
4496
+ refresh: "refresh";
4462
4497
  hit: "hit";
4463
4498
  miss: "miss";
4464
- refresh: "refresh";
4465
- bypass: "bypass";
4466
4499
  }>;
4467
4500
  /** Status of a cache lookup recorded on a span or case scope. */
4468
4501
  type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
@@ -4479,10 +4512,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
4479
4512
  namespace: z$1.ZodString;
4480
4513
  key: z$1.ZodString;
4481
4514
  status: z$1.ZodEnum<{
4515
+ bypass: "bypass";
4516
+ refresh: "refresh";
4482
4517
  hit: "hit";
4483
4518
  miss: "miss";
4484
- refresh: "refresh";
4485
- bypass: "bypass";
4486
4519
  }>;
4487
4520
  read: z$1.ZodOptional<z$1.ZodBoolean>;
4488
4521
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -4496,7 +4529,7 @@ declare const cacheListItemSchema$1: z$1.ZodObject<{
4496
4529
  key: z$1.ZodString;
4497
4530
  namespace: z$1.ZodString;
4498
4531
  storedAt: z$1.ZodString;
4499
- lastAccessedAt: z$1.ZodString;
4532
+ lastAccessedAt: z$1.ZodNullable<z$1.ZodString>;
4500
4533
  }, z$1.core.$strip>;
4501
4534
  /** Minimal summary row for a single cache entry. */
4502
4535
  type CacheListItem = z$1.infer<typeof cacheListItemSchema$1>;
@@ -5434,9 +5467,9 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema$1>;
5434
5467
  declare const createRunRequestSchema$1: z$1.ZodObject<{
5435
5468
  target: z$1.ZodObject<{
5436
5469
  mode: z$1.ZodEnum<{
5437
- caseIds: "caseIds";
5438
5470
  all: "all";
5439
5471
  evalIds: "evalIds";
5472
+ caseIds: "caseIds";
5440
5473
  }>;
5441
5474
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
5442
5475
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -5448,9 +5481,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
5448
5481
  temporary: z$1.ZodOptional<z$1.ZodBoolean>;
5449
5482
  cache: z$1.ZodOptional<z$1.ZodObject<{
5450
5483
  mode: z$1.ZodDefault<z$1.ZodEnum<{
5451
- refresh: "refresh";
5452
- bypass: "bypass";
5453
5484
  use: "use";
5485
+ bypass: "bypass";
5486
+ refresh: "refresh";
5454
5487
  }>>;
5455
5488
  }, z$1.core.$strip>>;
5456
5489
  manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
@@ -6302,6 +6335,20 @@ declare const caseDetailSchema: z$1.ZodObject<{
6302
6335
  right: "right";
6303
6336
  }>>;
6304
6337
  }, z$1.core.$strip>>>;
6338
+ assertions: z$1.ZodOptional<z$1.ZodArray<z$1.ZodUnion<readonly [z$1.ZodObject<{
6339
+ name: z$1.ZodOptional<z$1.ZodString>;
6340
+ message: z$1.ZodString;
6341
+ stack: z$1.ZodOptional<z$1.ZodString>;
6342
+ status: z$1.ZodEnum<{
6343
+ pass: "pass";
6344
+ fail: "fail";
6345
+ }>;
6346
+ }, z$1.core.$strip>, z$1.ZodPipe<z$1.ZodString, z$1.ZodTransform<{
6347
+ message: string;
6348
+ status: "pass" | "fail";
6349
+ name?: string | undefined;
6350
+ stack?: string | undefined;
6351
+ }, string>>]>>>;
6305
6352
  assertionFailures: z$1.ZodArray<z$1.ZodUnion<readonly [z$1.ZodObject<{
6306
6353
  name: z$1.ZodOptional<z$1.ZodString>;
6307
6354
  message: z$1.ZodString;
@@ -6590,7 +6637,7 @@ declare const cacheListItemSchema: z$1.ZodObject<{
6590
6637
  key: z$1.ZodString;
6591
6638
  namespace: z$1.ZodString;
6592
6639
  storedAt: z$1.ZodString;
6593
- lastAccessedAt: z$1.ZodString;
6640
+ lastAccessedAt: z$1.ZodNullable<z$1.ZodString>;
6594
6641
  }, z$1.core.$strip>;
6595
6642
  /** Minimal summary row for a single cache entry. */
6596
6643
  type CacheListItem$1 = z$1.infer<typeof cacheListItemSchema>;
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-BH7DlMXl.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Ck0mqxd-.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-B3iq-tuv.mjs";
1
+ import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-C4kAOhC1.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Cf37PZKi.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-303BocMW.mjs";
4
4
  export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-BH7DlMXl.mjs";
2
- import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-C1Ex9QI-.mjs";
1
+ import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-C4kAOhC1.mjs";
2
+ import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-5xEiQxiS.mjs";
3
3
  import { z } from "zod/v4";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";
@@ -141,7 +141,8 @@ async function main() {
141
141
  workspaceRoot: context.workspaceRoot,
142
142
  dir: config.cache?.dir,
143
143
  maxEntriesPerNamespace: config.cache?.maxEntriesPerNamespace ?? config.cache?.maxEntriesPerEval,
144
- maxEntriesByNamespace: config.cache?.maxEntriesByNamespace
144
+ maxEntriesByNamespace: config.cache?.maxEntriesByNamespace,
145
+ lastAccessedAtUpdateIntervalMs: config.cache?.lastAccessedAtUpdateIntervalMs
145
146
  });
146
147
  const evalMetas = await discoverRunEvals({
147
148
  config,