@ls-stack/agent-eval 0.61.1 → 0.61.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -2061,9 +2061,9 @@ declare const traceAttributeDisplaySchema: z.ZodObject<{
2061
2061
  subtree: "subtree";
2062
2062
  }>>;
2063
2063
  mode: z.ZodOptional<z.ZodEnum<{
2064
- sum: "sum";
2065
2064
  all: "all";
2066
2065
  last: "last";
2066
+ sum: "sum";
2067
2067
  }>>;
2068
2068
  }, z.core.$strip>;
2069
2069
  /**
@@ -2097,9 +2097,9 @@ declare const traceDisplayConfigSchema: z.ZodObject<{
2097
2097
  subtree: "subtree";
2098
2098
  }>>;
2099
2099
  mode: z.ZodOptional<z.ZodEnum<{
2100
- sum: "sum";
2101
2100
  all: "all";
2102
2101
  last: "last";
2102
+ sum: "sum";
2103
2103
  }>>;
2104
2104
  }, z.core.$strip>>>;
2105
2105
  }, z.core.$strip>;
@@ -2137,9 +2137,9 @@ declare const traceAttributeDisplayInputSchema: z.ZodObject<{
2137
2137
  subtree: "subtree";
2138
2138
  }>>;
2139
2139
  mode: z.ZodOptional<z.ZodEnum<{
2140
- sum: "sum";
2141
2140
  all: "all";
2142
2141
  last: "last";
2142
+ sum: "sum";
2143
2143
  }>>;
2144
2144
  transform: z.ZodOptional<z.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
2145
2145
  }, z.core.$strip>;
@@ -2175,9 +2175,9 @@ declare const traceDisplayInputConfigSchema: z.ZodObject<{
2175
2175
  subtree: "subtree";
2176
2176
  }>>;
2177
2177
  mode: z.ZodOptional<z.ZodEnum<{
2178
- sum: "sum";
2179
2178
  all: "all";
2180
2179
  last: "last";
2180
+ sum: "sum";
2181
2181
  }>>;
2182
2182
  transform: z.ZodOptional<z.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
2183
2183
  }, z.core.$strip>>>;
@@ -2214,8 +2214,8 @@ declare const traceSpanSchema$1: z.ZodObject<{
2214
2214
  status: z.ZodEnum<{
2215
2215
  error: "error";
2216
2216
  running: "running";
2217
- cancelled: "cancelled";
2218
2217
  ok: "ok";
2218
+ cancelled: "cancelled";
2219
2219
  }>;
2220
2220
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2221
2221
  error: z.ZodOptional<z.ZodObject<{
@@ -2260,10 +2260,10 @@ type EvalFreshnessStatus = z.infer<typeof evalFreshnessStatusSchema>;
2260
2260
  * `best` selects the highest finite value and `worst` selects the lowest.
2261
2261
  */
2262
2262
  declare const evalStatAggregateSchema: z.ZodEnum<{
2263
+ sum: "sum";
2263
2264
  avg: "avg";
2264
2265
  min: "min";
2265
2266
  max: "max";
2266
- sum: "sum";
2267
2267
  best: "best";
2268
2268
  worst: "worst";
2269
2269
  }>;
@@ -2292,10 +2292,10 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
2292
2292
  hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
2293
2293
  kind: z.ZodLiteral<"duration">;
2294
2294
  aggregate: z.ZodOptional<z.ZodEnum<{
2295
+ sum: "sum";
2295
2296
  avg: "avg";
2296
2297
  min: "min";
2297
2298
  max: "max";
2298
- sum: "sum";
2299
2299
  best: "best";
2300
2300
  worst: "worst";
2301
2301
  }>>;
@@ -2303,10 +2303,10 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
2303
2303
  hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
2304
2304
  kind: z.ZodLiteral<"cacheHits">;
2305
2305
  aggregate: z.ZodOptional<z.ZodEnum<{
2306
+ sum: "sum";
2306
2307
  avg: "avg";
2307
2308
  min: "min";
2308
2309
  max: "max";
2309
- sum: "sum";
2310
2310
  best: "best";
2311
2311
  worst: "worst";
2312
2312
  }>>;
@@ -2316,10 +2316,10 @@ declare const evalStatItemSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
2316
2316
  key: z.ZodString;
2317
2317
  label: z.ZodOptional<z.ZodString>;
2318
2318
  aggregate: z.ZodEnum<{
2319
+ sum: "sum";
2319
2320
  avg: "avg";
2320
2321
  min: "min";
2321
2322
  max: "max";
2322
- sum: "sum";
2323
2323
  best: "best";
2324
2324
  worst: "worst";
2325
2325
  }>;
@@ -2356,10 +2356,10 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
2356
2356
  hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
2357
2357
  kind: z.ZodLiteral<"duration">;
2358
2358
  aggregate: z.ZodOptional<z.ZodEnum<{
2359
+ sum: "sum";
2359
2360
  avg: "avg";
2360
2361
  min: "min";
2361
2362
  max: "max";
2362
- sum: "sum";
2363
2363
  best: "best";
2364
2364
  worst: "worst";
2365
2365
  }>>;
@@ -2367,10 +2367,10 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
2367
2367
  hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
2368
2368
  kind: z.ZodLiteral<"cacheHits">;
2369
2369
  aggregate: z.ZodOptional<z.ZodEnum<{
2370
+ sum: "sum";
2370
2371
  avg: "avg";
2371
2372
  min: "min";
2372
2373
  max: "max";
2373
- sum: "sum";
2374
2374
  best: "best";
2375
2375
  worst: "worst";
2376
2376
  }>>;
@@ -2380,10 +2380,10 @@ declare const evalStatsConfigSchema: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodOb
2380
2380
  key: z.ZodString;
2381
2381
  label: z.ZodOptional<z.ZodString>;
2382
2382
  aggregate: z.ZodEnum<{
2383
+ sum: "sum";
2383
2384
  avg: "avg";
2384
2385
  min: "min";
2385
2386
  max: "max";
2386
- sum: "sum";
2387
2387
  best: "best";
2388
2388
  worst: "worst";
2389
2389
  }>;
@@ -2466,10 +2466,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2466
2466
  caseIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
2467
2467
  lastRunStatus: z.ZodNullable<z.ZodEnum<{
2468
2468
  error: "error";
2469
- pass: "pass";
2470
- fail: "fail";
2471
2469
  running: "running";
2472
2470
  cancelled: "cancelled";
2471
+ pass: "pass";
2472
+ fail: "fail";
2473
2473
  unscored: "unscored";
2474
2474
  }>>;
2475
2475
  stats: z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
@@ -2483,10 +2483,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2483
2483
  hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
2484
2484
  kind: z.ZodLiteral<"duration">;
2485
2485
  aggregate: z.ZodOptional<z.ZodEnum<{
2486
+ sum: "sum";
2486
2487
  avg: "avg";
2487
2488
  min: "min";
2488
2489
  max: "max";
2489
- sum: "sum";
2490
2490
  best: "best";
2491
2491
  worst: "worst";
2492
2492
  }>>;
@@ -2494,10 +2494,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2494
2494
  hideIfNoValue: z.ZodOptional<z.ZodBoolean>;
2495
2495
  kind: z.ZodLiteral<"cacheHits">;
2496
2496
  aggregate: z.ZodOptional<z.ZodEnum<{
2497
+ sum: "sum";
2497
2498
  avg: "avg";
2498
2499
  min: "min";
2499
2500
  max: "max";
2500
- sum: "sum";
2501
2501
  best: "best";
2502
2502
  worst: "worst";
2503
2503
  }>>;
@@ -2507,10 +2507,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2507
2507
  key: z.ZodString;
2508
2508
  label: z.ZodOptional<z.ZodString>;
2509
2509
  aggregate: z.ZodEnum<{
2510
+ sum: "sum";
2510
2511
  avg: "avg";
2511
2512
  min: "min";
2512
2513
  max: "max";
2513
- sum: "sum";
2514
2514
  best: "best";
2515
2515
  worst: "worst";
2516
2516
  }>;
@@ -2534,10 +2534,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2534
2534
  accent: z.ZodOptional<z.ZodBoolean>;
2535
2535
  }, z.core.$strip>], "kind">>>;
2536
2536
  defaultStatAggregate: z.ZodOptional<z.ZodEnum<{
2537
+ sum: "sum";
2537
2538
  avg: "avg";
2538
2539
  min: "min";
2539
2540
  max: "max";
2540
- sum: "sum";
2541
2541
  best: "best";
2542
2542
  worst: "worst";
2543
2543
  }>>;
@@ -2560,9 +2560,9 @@ declare const evalSummarySchema$1: z.ZodObject<{
2560
2560
  color: z.ZodOptional<z.ZodEnum<{
2561
2561
  success: "success";
2562
2562
  error: "error";
2563
+ warning: "warning";
2563
2564
  accent: "accent";
2564
2565
  accentDim: "accentDim";
2565
- warning: "warning";
2566
2566
  textMuted: "textMuted";
2567
2567
  }>>;
2568
2568
  axis: z.ZodOptional<z.ZodEnum<{
@@ -2573,10 +2573,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2573
2573
  source: z.ZodLiteral<"column">;
2574
2574
  key: z.ZodString;
2575
2575
  aggregate: z.ZodEnum<{
2576
+ sum: "sum";
2576
2577
  avg: "avg";
2577
2578
  min: "min";
2578
2579
  max: "max";
2579
- sum: "sum";
2580
2580
  latest: "latest";
2581
2581
  passThresholdRate: "passThresholdRate";
2582
2582
  }>;
@@ -2584,9 +2584,9 @@ declare const evalSummarySchema$1: z.ZodObject<{
2584
2584
  color: z.ZodOptional<z.ZodEnum<{
2585
2585
  success: "success";
2586
2586
  error: "error";
2587
+ warning: "warning";
2587
2588
  accent: "accent";
2588
2589
  accentDim: "accentDim";
2589
- warning: "warning";
2590
2590
  textMuted: "textMuted";
2591
2591
  }>>;
2592
2592
  axis: z.ZodOptional<z.ZodEnum<{
@@ -2615,10 +2615,10 @@ declare const evalSummarySchema$1: z.ZodObject<{
2615
2615
  source: z.ZodLiteral<"column">;
2616
2616
  key: z.ZodString;
2617
2617
  aggregate: z.ZodEnum<{
2618
+ sum: "sum";
2618
2619
  avg: "avg";
2619
2620
  min: "min";
2620
2621
  max: "max";
2621
- sum: "sum";
2622
2622
  latest: "latest";
2623
2623
  passThresholdRate: "passThresholdRate";
2624
2624
  }>;
@@ -2715,10 +2715,10 @@ declare const caseRowSchema$1: z.ZodObject<{
2715
2715
  tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
2716
2716
  status: z.ZodEnum<{
2717
2717
  error: "error";
2718
- pass: "pass";
2719
- fail: "fail";
2720
2718
  running: "running";
2721
2719
  cancelled: "cancelled";
2720
+ pass: "pass";
2721
+ fail: "fail";
2722
2722
  pending: "pending";
2723
2723
  }>;
2724
2724
  durationMs: z.ZodNullable<z.ZodNumber>;
@@ -2857,8 +2857,8 @@ declare const scoreTraceSchema: z.ZodObject<{
2857
2857
  status: z.ZodEnum<{
2858
2858
  error: "error";
2859
2859
  running: "running";
2860
- cancelled: "cancelled";
2861
2860
  ok: "ok";
2861
+ cancelled: "cancelled";
2862
2862
  }>;
2863
2863
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2864
2864
  error: z.ZodOptional<z.ZodObject<{
@@ -2908,9 +2908,9 @@ declare const scoreTraceSchema: z.ZodObject<{
2908
2908
  subtree: "subtree";
2909
2909
  }>>;
2910
2910
  mode: z.ZodOptional<z.ZodEnum<{
2911
- sum: "sum";
2912
2911
  all: "all";
2913
2912
  last: "last";
2913
+ sum: "sum";
2914
2914
  }>>;
2915
2915
  }, z.core.$strip>>>;
2916
2916
  }, z.core.$strip>;
@@ -2942,10 +2942,10 @@ declare const caseDetailSchema$1: z.ZodObject<{
2942
2942
  tags: z.ZodOptional<z.ZodArray<z.ZodString>>;
2943
2943
  status: z.ZodEnum<{
2944
2944
  error: "error";
2945
- pass: "pass";
2946
- fail: "fail";
2947
2945
  running: "running";
2948
2946
  cancelled: "cancelled";
2947
+ pass: "pass";
2948
+ fail: "fail";
2949
2949
  pending: "pending";
2950
2950
  }>;
2951
2951
  input: z.ZodUnknown;
@@ -2960,8 +2960,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
2960
2960
  status: z.ZodEnum<{
2961
2961
  error: "error";
2962
2962
  running: "running";
2963
- cancelled: "cancelled";
2964
2963
  ok: "ok";
2964
+ cancelled: "cancelled";
2965
2965
  }>;
2966
2966
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
2967
2967
  error: z.ZodOptional<z.ZodObject<{
@@ -3011,9 +3011,9 @@ declare const caseDetailSchema$1: z.ZodObject<{
3011
3011
  subtree: "subtree";
3012
3012
  }>>;
3013
3013
  mode: z.ZodOptional<z.ZodEnum<{
3014
- sum: "sum";
3015
3014
  all: "all";
3016
3015
  last: "last";
3016
+ sum: "sum";
3017
3017
  }>>;
3018
3018
  }, z.core.$strip>>>;
3019
3019
  }, z.core.$strip>;
@@ -3029,8 +3029,8 @@ declare const caseDetailSchema$1: z.ZodObject<{
3029
3029
  status: z.ZodEnum<{
3030
3030
  error: "error";
3031
3031
  running: "running";
3032
- cancelled: "cancelled";
3033
3032
  ok: "ok";
3033
+ cancelled: "cancelled";
3034
3034
  }>;
3035
3035
  attributes: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
3036
3036
  error: z.ZodOptional<z.ZodObject<{
@@ -3080,9 +3080,9 @@ declare const caseDetailSchema$1: z.ZodObject<{
3080
3080
  subtree: "subtree";
3081
3081
  }>>;
3082
3082
  mode: z.ZodOptional<z.ZodEnum<{
3083
- sum: "sum";
3084
3083
  all: "all";
3085
3084
  last: "last";
3085
+ sum: "sum";
3086
3086
  }>>;
3087
3087
  }, z.core.$strip>>>;
3088
3088
  }, z.core.$strip>;
@@ -3269,10 +3269,10 @@ declare const evalChartBuiltinMetricSchema: z.ZodEnum<{
3269
3269
  type EvalChartBuiltinMetric = z.infer<typeof evalChartBuiltinMetricSchema>;
3270
3270
  /** Reducer applied to a numeric column across all cases of a single run. */
3271
3271
  declare const evalChartAggregateSchema: z.ZodEnum<{
3272
+ sum: "sum";
3272
3273
  avg: "avg";
3273
3274
  min: "min";
3274
3275
  max: "max";
3275
- sum: "sum";
3276
3276
  latest: "latest";
3277
3277
  passThresholdRate: "passThresholdRate";
3278
3278
  }>;
@@ -3285,9 +3285,9 @@ type EvalChartAggregate = z.infer<typeof evalChartAggregateSchema>;
3285
3285
  declare const evalChartColorSchema: z.ZodEnum<{
3286
3286
  success: "success";
3287
3287
  error: "error";
3288
+ warning: "warning";
3288
3289
  accent: "accent";
3289
3290
  accentDim: "accentDim";
3290
- warning: "warning";
3291
3291
  textMuted: "textMuted";
3292
3292
  }>;
3293
3293
  /** Semantic color token resolved to a theme color by the web UI. */
@@ -3314,9 +3314,9 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3314
3314
  color: z.ZodOptional<z.ZodEnum<{
3315
3315
  success: "success";
3316
3316
  error: "error";
3317
+ warning: "warning";
3317
3318
  accent: "accent";
3318
3319
  accentDim: "accentDim";
3319
- warning: "warning";
3320
3320
  textMuted: "textMuted";
3321
3321
  }>>;
3322
3322
  axis: z.ZodOptional<z.ZodEnum<{
@@ -3327,10 +3327,10 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3327
3327
  source: z.ZodLiteral<"column">;
3328
3328
  key: z.ZodString;
3329
3329
  aggregate: z.ZodEnum<{
3330
+ sum: "sum";
3330
3331
  avg: "avg";
3331
3332
  min: "min";
3332
3333
  max: "max";
3333
- sum: "sum";
3334
3334
  latest: "latest";
3335
3335
  passThresholdRate: "passThresholdRate";
3336
3336
  }>;
@@ -3338,9 +3338,9 @@ declare const evalChartMetricSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
3338
3338
  color: z.ZodOptional<z.ZodEnum<{
3339
3339
  success: "success";
3340
3340
  error: "error";
3341
+ warning: "warning";
3341
3342
  accent: "accent";
3342
3343
  accentDim: "accentDim";
3343
- warning: "warning";
3344
3344
  textMuted: "textMuted";
3345
3345
  }>>;
3346
3346
  axis: z.ZodOptional<z.ZodEnum<{
@@ -3362,10 +3362,10 @@ declare const evalChartTooltipExtraSchema: z.ZodDiscriminatedUnion<[z.ZodObject<
3362
3362
  source: z.ZodLiteral<"column">;
3363
3363
  key: z.ZodString;
3364
3364
  aggregate: z.ZodEnum<{
3365
+ sum: "sum";
3365
3366
  avg: "avg";
3366
3367
  min: "min";
3367
3368
  max: "max";
3368
- sum: "sum";
3369
3369
  latest: "latest";
3370
3370
  passThresholdRate: "passThresholdRate";
3371
3371
  }>;
@@ -3397,9 +3397,9 @@ declare const evalChartConfigSchema: z.ZodObject<{
3397
3397
  color: z.ZodOptional<z.ZodEnum<{
3398
3398
  success: "success";
3399
3399
  error: "error";
3400
+ warning: "warning";
3400
3401
  accent: "accent";
3401
3402
  accentDim: "accentDim";
3402
- warning: "warning";
3403
3403
  textMuted: "textMuted";
3404
3404
  }>>;
3405
3405
  axis: z.ZodOptional<z.ZodEnum<{
@@ -3410,10 +3410,10 @@ declare const evalChartConfigSchema: z.ZodObject<{
3410
3410
  source: z.ZodLiteral<"column">;
3411
3411
  key: z.ZodString;
3412
3412
  aggregate: z.ZodEnum<{
3413
+ sum: "sum";
3413
3414
  avg: "avg";
3414
3415
  min: "min";
3415
3416
  max: "max";
3416
- sum: "sum";
3417
3417
  latest: "latest";
3418
3418
  passThresholdRate: "passThresholdRate";
3419
3419
  }>;
@@ -3421,9 +3421,9 @@ declare const evalChartConfigSchema: z.ZodObject<{
3421
3421
  color: z.ZodOptional<z.ZodEnum<{
3422
3422
  success: "success";
3423
3423
  error: "error";
3424
+ warning: "warning";
3424
3425
  accent: "accent";
3425
3426
  accentDim: "accentDim";
3426
- warning: "warning";
3427
3427
  textMuted: "textMuted";
3428
3428
  }>>;
3429
3429
  axis: z.ZodOptional<z.ZodEnum<{
@@ -3452,10 +3452,10 @@ declare const evalChartConfigSchema: z.ZodObject<{
3452
3452
  source: z.ZodLiteral<"column">;
3453
3453
  key: z.ZodString;
3454
3454
  aggregate: z.ZodEnum<{
3455
+ sum: "sum";
3455
3456
  avg: "avg";
3456
3457
  min: "min";
3457
3458
  max: "max";
3458
- sum: "sum";
3459
3459
  latest: "latest";
3460
3460
  passThresholdRate: "passThresholdRate";
3461
3461
  }>;
@@ -3487,9 +3487,9 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3487
3487
  color: z.ZodOptional<z.ZodEnum<{
3488
3488
  success: "success";
3489
3489
  error: "error";
3490
+ warning: "warning";
3490
3491
  accent: "accent";
3491
3492
  accentDim: "accentDim";
3492
- warning: "warning";
3493
3493
  textMuted: "textMuted";
3494
3494
  }>>;
3495
3495
  axis: z.ZodOptional<z.ZodEnum<{
@@ -3500,10 +3500,10 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3500
3500
  source: z.ZodLiteral<"column">;
3501
3501
  key: z.ZodString;
3502
3502
  aggregate: z.ZodEnum<{
3503
+ sum: "sum";
3503
3504
  avg: "avg";
3504
3505
  min: "min";
3505
3506
  max: "max";
3506
- sum: "sum";
3507
3507
  latest: "latest";
3508
3508
  passThresholdRate: "passThresholdRate";
3509
3509
  }>;
@@ -3511,9 +3511,9 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3511
3511
  color: z.ZodOptional<z.ZodEnum<{
3512
3512
  success: "success";
3513
3513
  error: "error";
3514
+ warning: "warning";
3514
3515
  accent: "accent";
3515
3516
  accentDim: "accentDim";
3516
- warning: "warning";
3517
3517
  textMuted: "textMuted";
3518
3518
  }>>;
3519
3519
  axis: z.ZodOptional<z.ZodEnum<{
@@ -3542,10 +3542,10 @@ declare const evalChartsConfigSchema: z.ZodArray<z.ZodObject<{
3542
3542
  source: z.ZodLiteral<"column">;
3543
3543
  key: z.ZodString;
3544
3544
  aggregate: z.ZodEnum<{
3545
+ sum: "sum";
3545
3546
  avg: "avg";
3546
3547
  min: "min";
3547
3548
  max: "max";
3548
- sum: "sum";
3549
3549
  latest: "latest";
3550
3550
  passThresholdRate: "passThresholdRate";
3551
3551
  }>;
@@ -3573,8 +3573,8 @@ declare const runManifestSchema$1: z.ZodObject<{
3573
3573
  evalSourceFingerprints: z.ZodDefault<z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>>;
3574
3574
  target: z.ZodObject<{
3575
3575
  mode: z.ZodEnum<{
3576
- caseIds: "caseIds";
3577
3576
  all: "all";
3577
+ caseIds: "caseIds";
3578
3578
  evalIds: "evalIds";
3579
3579
  }>;
3580
3580
  evalKeys: z.ZodOptional<z.ZodArray<z.ZodString>>;
@@ -4729,8 +4729,8 @@ declare const cacheRecordingSchema: z.ZodObject<{
4729
4729
  finalStatus: z.ZodOptional<z.ZodEnum<{
4730
4730
  error: "error";
4731
4731
  running: "running";
4732
- cancelled: "cancelled";
4733
4732
  ok: "ok";
4733
+ cancelled: "cancelled";
4734
4734
  }>>;
4735
4735
  finalError: z.ZodOptional<z.ZodObject<{
4736
4736
  name: z.ZodOptional<z.ZodString>;
@@ -4830,8 +4830,8 @@ declare const cacheEntrySchema: z.ZodObject<{
4830
4830
  finalStatus: z.ZodOptional<z.ZodEnum<{
4831
4831
  error: "error";
4832
4832
  running: "running";
4833
- cancelled: "cancelled";
4834
4833
  ok: "ok";
4834
+ cancelled: "cancelled";
4835
4835
  }>>;
4836
4836
  finalError: z.ZodOptional<z.ZodObject<{
4837
4837
  name: z.ZodOptional<z.ZodString>;
@@ -4948,8 +4948,8 @@ declare const cacheDebugKeyEntrySchema: z.ZodObject<{
4948
4948
  finalStatus: z.ZodOptional<z.ZodEnum<{
4949
4949
  error: "error";
4950
4950
  running: "running";
4951
- cancelled: "cancelled";
4952
4951
  ok: "ok";
4952
+ cancelled: "cancelled";
4953
4953
  }>>;
4954
4954
  finalError: z.ZodOptional<z.ZodObject<{
4955
4955
  name: z.ZodOptional<z.ZodString>;
@@ -5055,8 +5055,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5055
5055
  finalStatus: z.ZodOptional<z.ZodEnum<{
5056
5056
  error: "error";
5057
5057
  running: "running";
5058
- cancelled: "cancelled";
5059
5058
  ok: "ok";
5059
+ cancelled: "cancelled";
5060
5060
  }>>;
5061
5061
  finalError: z.ZodOptional<z.ZodObject<{
5062
5062
  name: z.ZodOptional<z.ZodString>;
@@ -5164,8 +5164,8 @@ declare const cacheEntryWithDebugKeySchema$1: z.ZodObject<{
5164
5164
  finalStatus: z.ZodOptional<z.ZodEnum<{
5165
5165
  error: "error";
5166
5166
  running: "running";
5167
- cancelled: "cancelled";
5168
5167
  ok: "ok";
5168
+ cancelled: "cancelled";
5169
5169
  }>>;
5170
5170
  finalError: z.ZodOptional<z.ZodObject<{
5171
5171
  name: z.ZodOptional<z.ZodString>;
@@ -5271,8 +5271,8 @@ declare const cacheFileSchema: z.ZodObject<{
5271
5271
  finalStatus: z.ZodOptional<z.ZodEnum<{
5272
5272
  error: "error";
5273
5273
  running: "running";
5274
- cancelled: "cancelled";
5275
5274
  ok: "ok";
5275
+ cancelled: "cancelled";
5276
5276
  }>>;
5277
5277
  finalError: z.ZodOptional<z.ZodObject<{
5278
5278
  name: z.ZodOptional<z.ZodString>;
@@ -5388,8 +5388,8 @@ declare const cacheDebugKeyFileSchema: z.ZodObject<{
5388
5388
  finalStatus: z.ZodOptional<z.ZodEnum<{
5389
5389
  error: "error";
5390
5390
  running: "running";
5391
- cancelled: "cancelled";
5392
5391
  ok: "ok";
5392
+ cancelled: "cancelled";
5393
5393
  }>>;
5394
5394
  finalError: z.ZodOptional<z.ZodObject<{
5395
5395
  name: z.ZodOptional<z.ZodString>;
@@ -5573,8 +5573,8 @@ type ConfigReloadState = z.infer<typeof configReloadStateSchema$1>;
5573
5573
  declare const createRunRequestSchema$1: z.ZodObject<{
5574
5574
  target: z.ZodObject<{
5575
5575
  mode: z.ZodEnum<{
5576
- caseIds: "caseIds";
5577
5576
  all: "all";
5577
+ caseIds: "caseIds";
5578
5578
  evalIds: "evalIds";
5579
5579
  }>;
5580
5580
  evalKeys: z.ZodOptional<z.ZodArray<z.ZodString>>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.61.1",
3
+ "version": "0.61.2",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -212,11 +212,11 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape (forma
212
212
  - `tracingAssertions` is a single function that can be authored globally or locally on one eval when a finished-trace invariant should pass or fail the case without creating a fake score column. It receives the same `{ trace, input, case }` context as `deriveFromTracing`; call `evalAssert(...)` or `evalExpect(...)` inside it. Useful trace helpers include `trace.findSpan(name)`, `trace.findSpans(name)`, `trace.hasSpan(name)`, `trace.findSpansByKind(kind)`, `trace.findToolCallSpans()`, `trace.listToolCallSpanNames()`, `trace.hasToolCallSpan(name)`, `trace.getToolCallSpans(name)`, `trace.getToolCallSpanCount(toolName)`, `trace.hasToolCallSpanCount(toolName, expectedCalls)`, `trace.listSpanNames(kind?)`, `trace.listSpanNamesDfs(kind?)`, and `trace.flattenDfs()`. The tool-call helpers include both `kind: 'tool'` spans and imported execution spans recorded as `kind: 'tool_call'`. Tool-name checks and counts match the span `name` as well as GenAI/Mastra identity attributes such as `genAI["gen_ai.tool.name"]` and `mastra.entityName`; list helpers prefer those tool identity attributes when present. `getToolCallSpans(name)` returns one normalized object per matching call, including parsed `arguments`, parsed `result`, `description`, `toolType`, `attributes`, and the original `span`.
213
213
  - `traceDisplay` promotes selected span attributes into the trace tree and detail pane; it supports aggregation across subtrees (`scope`, `mode`) and user-defined `transform(...)` for derived views (e.g. currency conversion). See the `TraceDisplayInputConfig` type.
214
214
  - `llmCalls` (in `agent-evals.config.ts`) configures how LLM-call spans are summarized for review. Defaults to `kind: 'llm'` spans with `model`, `usage.*`, `latencyMs`, `input`, `output`, etc. read from conventional attribute paths. The default `steps` path reads an array from `span.attributes.steps`; if it is missing, direct child `model_step` spans are shown as that call's steps. Tool calls are aggregated from the configured `toolCalls` path plus step-level `toolCalls` on authored step arrays or direct `model_step` child spans, including Mastra's serialized `mastra.model_step.output` format, and child `tool_call` execution spans under each model step. `latencyMs` is time to first token; duration, total tokens, output tokens/sec, and USD costs are derived. Override `kinds` to broaden the filter, override `attributes.<field>` for non-default primitive span shapes, configure model-keyed `pricing` to derive USD costs from token counts, with nested `providers` entries for provider-specific rates, add `costCurrencies` to show converted cost columns in the expanded breakdown table only, add `derivedAttributes` to persist computed values back onto matching LLM spans before trace consumers run, and add entries to `metrics` to surface arbitrary user metrics (`format: 'string' | 'number' | 'duration' | 'json' | 'boolean'`, `placements: ['header' | 'body']`). `derivedAttributes` can be a keyed map for one-off fields or one callback that returns multiple path/value pairs. Derived keys are dot-paths under `span.attributes`; return `undefined` to skip one span or one returned key.
215
- - Default usage config derives missing eval outputs from matching LLM/API spans before `outputsSchema` and scores run: `apiCalls`, `costUsd`, `llmTurns`, `inputTokens`, `outputTokens`, `totalTokens`, `cachedInputTokens`, `cacheCreationInputTokens`, `reasoningTokens`, and `llmDurationMs`. Authored outputs and column overrides win. Default usage columns, stats, and charts use `hideIfNoValue: true`. Default LLM usage charts configure cost, input tokens, and output tokens separately and use `dedupeConsecutiveValues: true` to skip repeated adjacent chart values. `totalTokens` is input + output only; cache read/write tokens stay separate and affect `costUsd` at their own rates. `llmTurns` is the maximum per-call turn count in the case run, using configured steps when available and otherwise one turn per matched LLM call span. Derived base input cost uses `inputTokens - cachedInputTokens - cacheCreationInputTokens` so cache details are not double-counted. `cacheCreationInputTokens` is the total cache-write count; optional `cacheCreationInput1hTokens` only splits that total for 1-hour write pricing via `cacheCreationInput1hUsdPerMillion`. `llmDurationMs` sums elapsed matched LLM span durations; it is not time-to-first-token latency. Remove defaults globally or per eval with `removeDefaultConfig: true` or a key list such as `removeDefaultConfig: ['apiCalls', 'reasoningTokens']`.
215
+ - Default usage config derives missing eval outputs from matching LLM/API spans before `outputsSchema` and scores run: `apiCalls`, `costUsd`, `llmTurns`, `inputTokens`, `outputTokens`, `totalTokens`, `cachedInputTokens`, `cacheCreationInputTokens`, `reasoningTokens`, and `llmDurationMs`. Authored outputs and column overrides win. The web UI fills in baseline run-health stats (`cases`, `passRate`, `duration`) and a pass-rate/duration history chart when an eval has not already authored equivalent run-health UI. If discovery metadata is missing but saved runs contain runtime columns such as `costUsd`, `inputTokens`, or `apiCalls`, the single-eval page can infer the standard usage stats and charts from those saved run values. Default usage columns, stats, and charts use `hideIfNoValue: true`. Default LLM usage charts configure cost, input tokens, and output tokens separately and use `dedupeConsecutiveValues: true` to skip repeated adjacent chart values. `totalTokens` is input + output only; cache read/write tokens stay separate and affect `costUsd` at their own rates. `llmTurns` is the maximum per-call turn count in the case run, using configured steps when available and otherwise one turn per matched LLM call span. Derived base input cost uses `inputTokens - cachedInputTokens - cacheCreationInputTokens` so cache details are not double-counted. `cacheCreationInputTokens` is the total cache-write count; optional `cacheCreationInput1hTokens` only splits that total for 1-hour write pricing via `cacheCreationInput1hUsdPerMillion`. `llmDurationMs` sums elapsed matched LLM span durations; it is not time-to-first-token latency. Remove defaults globally or per eval with `removeDefaultConfig: true` or a key list such as `removeDefaultConfig: ['apiCalls', 'reasoningTokens']`.
216
216
  - `apiCalls` (in `agent-evals.config.ts`) configures how API-call spans are summarized for review. Defaults to `kind: 'api'`, `'http'`, `'http.client'`, and `'fetch'` spans with `method`, `url`, `statusCode`, `request`, `routeAlias`, `response`, `requestBody`, `responseBody`, `headers`, `durationMs`, and `error` read from conventional attribute paths. Override `kinds` or `attributes.<field>` for external tracers. Set a per-span `routeAlias` attribute such as `/v3/tabs/:id` to group dynamic URL paths in API-call route labels and endpoint charts while preserving original URLs in row details. Add `derivedAttributes` as a keyed map or object-returning callback for computed persisted API span attributes, and add `metrics` with the same formats and placements as LLM-call metrics.
217
217
  - `runLogs` (in `agent-evals.config.ts`) controls case log capture. Use `runLogs: { captureConsole: false }` to keep console output in the terminal without persisting console calls to case details. Manual `evalLog(...)` calls are still captured. Captured log locations store the selected user-facing source frame and the full JavaScript stack so agents can inspect additional frames in persisted artifacts when diagnosing where a log came from.
218
218
 
219
- Stats rows and history charts can be authored via `stats` / `charts` on the eval definition. Global `stats` in `agent-evals.config.ts` combine with eval-level stats. Native stat kinds include `cases`, `passRate`, `duration`, and `cacheHits`; `cacheHits` shows Agent Eval operation-level cache hits over total cache operations (`hits/total`) from spans and `evalTracer.cache(...)` refs, not LLM provider prompt-cache read tokens such as `cachedInputTokens`. Cache-hit stats use a separate aggregate control and default to `sum`; `avg` is average per-case hit rate, and min/max/best/worst select cases by hit rate. `duration` aggregates per-case durations using the same modes as column stats. Usage stats and LLM usage charts are added by default unless removed with `removeDefaultConfig`. Column stats can override `format` and `numberFormat`, otherwise they inherit from the matching column. Duration and column stat aggregates support `avg`, `min`, `max`, `sum`, `best` (highest finite value), and `worst` (lowest finite value). Use `defaultStatAggregate` in `agent-evals.config.ts` to set the workspace-wide initial duration/column stat mode, or on an eval definition to override it for that eval. Number formats use `maxDecimalPlaces` to cap decimals and `minDecimalPlaces` to pad trailing zeroes. Without `maxDecimalPlaces`, the default cap is 3 decimal places. Stats and charts support `hideIfNoValue: true`. Charts support `dedupeConsecutiveValues: true` to omit consecutive points whose plotted metrics and tooltip extras match the previous kept point. Their shapes live in the types; no need to memorize the option set.
219
+ Stats rows and history charts can be authored via `stats` / `charts` on the eval definition. Global `stats` in `agent-evals.config.ts` combine with eval-level stats. The web UI automatically supplies missing `cases`, `passRate`, and `duration` stats plus a pass-rate/duration history chart, including for a single completed run. Native stat kinds include `cases`, `passRate`, `duration`, and `cacheHits`; `cacheHits` shows Agent Eval operation-level cache hits over total cache operations (`hits/total`) from spans and `evalTracer.cache(...)` refs, not LLM provider prompt-cache read tokens such as `cachedInputTokens`. Cache-hit stats use a separate aggregate control and default to `sum`; `avg` is average per-case hit rate, and min/max/best/worst select cases by hit rate. `duration` aggregates per-case durations using the same modes as column stats. Usage stats and LLM usage charts are added by default unless removed with `removeDefaultConfig`. Column stats can override `format` and `numberFormat`, otherwise they inherit from the matching column. Duration and column stat aggregates support `avg`, `min`, `max`, `sum`, `best` (highest finite value), and `worst` (lowest finite value). Use `defaultStatAggregate` in `agent-evals.config.ts` to set the workspace-wide initial duration/column stat mode, or on an eval definition to override it for that eval. Number formats use `maxDecimalPlaces` to cap decimals and `minDecimalPlaces` to pad trailing zeroes. Without `maxDecimalPlaces`, the default cap is 3 decimal places. Stats and charts support `hideIfNoValue: true`. Charts support `dedupeConsecutiveValues: true` to omit consecutive points whose plotted metrics and tooltip extras match the previous kept point. Rendered charts with no plottable values show an unavailable state instead of a blank frame. Their shapes live in the types; no need to memorize the option set.
220
220
 
221
221
  ## Cached operations
222
222