@ls-stack/agent-eval 0.58.3 → 0.58.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -405,10 +405,11 @@ type EvalTraceTree = {
405
405
  findSpan: (name: string) => EvalTraceSpan$2 | undefined; /** Return every span whose name exactly matches `name`. */
406
406
  findSpans: (name: string) => EvalTraceSpan$2[]; /** Return whether any span name exactly matches `name`. */
407
407
  hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
408
- findSpansByKind: (kind: string) => EvalTraceSpan$2[]; /** Return every span with `kind: 'tool'`. */
409
- findToolCallSpans: () => EvalTraceSpan$2[]; /** Return the names of every span with `kind: 'tool'`. */
410
- listToolCallSpanNames: () => string[]; /** Return whether a `kind: 'tool'` span has a name exactly matching `name`. */
411
- hasToolCallSpan: (name: string) => boolean; /** Return span names in creation order, optionally filtered by kind. */
408
+ findSpansByKind: (kind: string) => EvalTraceSpan$2[]; /** Return every span with `kind: 'tool'` or `kind: 'tool_call'`. */
409
+ findToolCallSpans: () => EvalTraceSpan$2[]; /** Return the names of every span with `kind: 'tool'` or `kind: 'tool_call'`. */
410
+ listToolCallSpanNames: () => string[]; /** Return whether a tool-call span has a name exactly matching `name`. */
411
+ hasToolCallSpan: (name: string) => boolean; /** Return whether a tool-call span name appears exactly `expectedCalls` times. */
412
+ hasNToolCallSpans: (toolName: string, expectedCalls: number) => boolean; /** Return span names in creation order, optionally filtered by kind. */
412
413
  listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
413
414
  listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
414
415
  flattenDfs: () => EvalTraceSpan$2[];
@@ -2242,9 +2243,9 @@ type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
2242
2243
  */
2243
2244
  declare const evalStatAggregateSchema: z$1.ZodEnum<{
2244
2245
  sum: "sum";
2246
+ avg: "avg";
2245
2247
  min: "min";
2246
2248
  max: "max";
2247
- avg: "avg";
2248
2249
  best: "best";
2249
2250
  worst: "worst";
2250
2251
  }>;
@@ -2274,9 +2275,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2274
2275
  kind: z$1.ZodLiteral<"duration">;
2275
2276
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2276
2277
  sum: "sum";
2278
+ avg: "avg";
2277
2279
  min: "min";
2278
2280
  max: "max";
2279
- avg: "avg";
2280
2281
  best: "best";
2281
2282
  worst: "worst";
2282
2283
  }>>;
@@ -2285,9 +2286,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2285
2286
  kind: z$1.ZodLiteral<"cacheHits">;
2286
2287
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2287
2288
  sum: "sum";
2289
+ avg: "avg";
2288
2290
  min: "min";
2289
2291
  max: "max";
2290
- avg: "avg";
2291
2292
  best: "best";
2292
2293
  worst: "worst";
2293
2294
  }>>;
@@ -2298,9 +2299,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2298
2299
  label: z$1.ZodOptional<z$1.ZodString>;
2299
2300
  aggregate: z$1.ZodEnum<{
2300
2301
  sum: "sum";
2302
+ avg: "avg";
2301
2303
  min: "min";
2302
2304
  max: "max";
2303
- avg: "avg";
2304
2305
  best: "best";
2305
2306
  worst: "worst";
2306
2307
  }>;
@@ -2338,9 +2339,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2338
2339
  kind: z$1.ZodLiteral<"duration">;
2339
2340
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2340
2341
  sum: "sum";
2342
+ avg: "avg";
2341
2343
  min: "min";
2342
2344
  max: "max";
2343
- avg: "avg";
2344
2345
  best: "best";
2345
2346
  worst: "worst";
2346
2347
  }>>;
@@ -2349,9 +2350,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2349
2350
  kind: z$1.ZodLiteral<"cacheHits">;
2350
2351
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2351
2352
  sum: "sum";
2353
+ avg: "avg";
2352
2354
  min: "min";
2353
2355
  max: "max";
2354
- avg: "avg";
2355
2356
  best: "best";
2356
2357
  worst: "worst";
2357
2358
  }>>;
@@ -2362,9 +2363,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2362
2363
  label: z$1.ZodOptional<z$1.ZodString>;
2363
2364
  aggregate: z$1.ZodEnum<{
2364
2365
  sum: "sum";
2366
+ avg: "avg";
2365
2367
  min: "min";
2366
2368
  max: "max";
2367
- avg: "avg";
2368
2369
  best: "best";
2369
2370
  worst: "worst";
2370
2371
  }>;
@@ -2465,9 +2466,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2465
2466
  kind: z$1.ZodLiteral<"duration">;
2466
2467
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2467
2468
  sum: "sum";
2469
+ avg: "avg";
2468
2470
  min: "min";
2469
2471
  max: "max";
2470
- avg: "avg";
2471
2472
  best: "best";
2472
2473
  worst: "worst";
2473
2474
  }>>;
@@ -2476,9 +2477,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2476
2477
  kind: z$1.ZodLiteral<"cacheHits">;
2477
2478
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2478
2479
  sum: "sum";
2480
+ avg: "avg";
2479
2481
  min: "min";
2480
2482
  max: "max";
2481
- avg: "avg";
2482
2483
  best: "best";
2483
2484
  worst: "worst";
2484
2485
  }>>;
@@ -2489,9 +2490,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2489
2490
  label: z$1.ZodOptional<z$1.ZodString>;
2490
2491
  aggregate: z$1.ZodEnum<{
2491
2492
  sum: "sum";
2493
+ avg: "avg";
2492
2494
  min: "min";
2493
2495
  max: "max";
2494
- avg: "avg";
2495
2496
  best: "best";
2496
2497
  worst: "worst";
2497
2498
  }>;
@@ -2516,9 +2517,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2516
2517
  }, z$1.core.$strip>], "kind">>>;
2517
2518
  defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
2518
2519
  sum: "sum";
2520
+ avg: "avg";
2519
2521
  min: "min";
2520
2522
  max: "max";
2521
- avg: "avg";
2522
2523
  best: "best";
2523
2524
  worst: "worst";
2524
2525
  }>>;
@@ -2539,8 +2540,8 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2539
2540
  }>;
2540
2541
  label: z$1.ZodOptional<z$1.ZodString>;
2541
2542
  color: z$1.ZodOptional<z$1.ZodEnum<{
2542
- error: "error";
2543
2543
  success: "success";
2544
+ error: "error";
2544
2545
  warning: "warning";
2545
2546
  accent: "accent";
2546
2547
  accentDim: "accentDim";
@@ -2555,16 +2556,16 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2555
2556
  key: z$1.ZodString;
2556
2557
  aggregate: z$1.ZodEnum<{
2557
2558
  sum: "sum";
2559
+ avg: "avg";
2558
2560
  min: "min";
2559
2561
  max: "max";
2560
- avg: "avg";
2561
2562
  latest: "latest";
2562
2563
  passThresholdRate: "passThresholdRate";
2563
2564
  }>;
2564
2565
  label: z$1.ZodOptional<z$1.ZodString>;
2565
2566
  color: z$1.ZodOptional<z$1.ZodEnum<{
2566
- error: "error";
2567
2567
  success: "success";
2568
+ error: "error";
2568
2569
  warning: "warning";
2569
2570
  accent: "accent";
2570
2571
  accentDim: "accentDim";
@@ -2597,9 +2598,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2597
2598
  key: z$1.ZodString;
2598
2599
  aggregate: z$1.ZodEnum<{
2599
2600
  sum: "sum";
2601
+ avg: "avg";
2600
2602
  min: "min";
2601
2603
  max: "max";
2602
- avg: "avg";
2603
2604
  latest: "latest";
2604
2605
  passThresholdRate: "passThresholdRate";
2605
2606
  }>;
@@ -2698,9 +2699,9 @@ declare const caseRowSchema$1: z$1.ZodObject<{
2698
2699
  error: "error";
2699
2700
  running: "running";
2700
2701
  cancelled: "cancelled";
2701
- pending: "pending";
2702
2702
  pass: "pass";
2703
2703
  fail: "fail";
2704
+ pending: "pending";
2704
2705
  }>;
2705
2706
  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
2706
2707
  cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
@@ -2901,10 +2902,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2901
2902
  namespace: z$1.ZodString;
2902
2903
  key: z$1.ZodString;
2903
2904
  status: z$1.ZodEnum<{
2904
- bypass: "bypass";
2905
- refresh: "refresh";
2906
2905
  hit: "hit";
2907
2906
  miss: "miss";
2907
+ refresh: "refresh";
2908
+ bypass: "bypass";
2908
2909
  }>;
2909
2910
  read: z$1.ZodOptional<z$1.ZodBoolean>;
2910
2911
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -2925,9 +2926,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2925
2926
  error: "error";
2926
2927
  running: "running";
2927
2928
  cancelled: "cancelled";
2928
- pending: "pending";
2929
2929
  pass: "pass";
2930
2930
  fail: "fail";
2931
+ pending: "pending";
2931
2932
  }>;
2932
2933
  input: z$1.ZodUnknown;
2933
2934
  trace: z$1.ZodArray<z$1.ZodObject<{
@@ -3073,10 +3074,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3073
3074
  namespace: z$1.ZodString;
3074
3075
  key: z$1.ZodString;
3075
3076
  status: z$1.ZodEnum<{
3076
- bypass: "bypass";
3077
- refresh: "refresh";
3078
3077
  hit: "hit";
3079
3078
  miss: "miss";
3079
+ refresh: "refresh";
3080
+ bypass: "bypass";
3080
3081
  }>;
3081
3082
  read: z$1.ZodOptional<z$1.ZodBoolean>;
3082
3083
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -3194,10 +3195,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3194
3195
  namespace: z$1.ZodString;
3195
3196
  key: z$1.ZodString;
3196
3197
  status: z$1.ZodEnum<{
3197
- bypass: "bypass";
3198
- refresh: "refresh";
3199
3198
  hit: "hit";
3200
3199
  miss: "miss";
3200
+ refresh: "refresh";
3201
+ bypass: "bypass";
3201
3202
  }>;
3202
3203
  read: z$1.ZodOptional<z$1.ZodBoolean>;
3203
3204
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -3251,9 +3252,9 @@ type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
3251
3252
  /** Reducer applied to a numeric column across all cases of a single run. */
3252
3253
  declare const evalChartAggregateSchema: z$1.ZodEnum<{
3253
3254
  sum: "sum";
3255
+ avg: "avg";
3254
3256
  min: "min";
3255
3257
  max: "max";
3256
- avg: "avg";
3257
3258
  latest: "latest";
3258
3259
  passThresholdRate: "passThresholdRate";
3259
3260
  }>;
@@ -3264,8 +3265,8 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
3264
3265
  * not emit raw hex so authored evals stay decoupled from the web theme.
3265
3266
  */
3266
3267
  declare const evalChartColorSchema: z$1.ZodEnum<{
3267
- error: "error";
3268
3268
  success: "success";
3269
+ error: "error";
3269
3270
  warning: "warning";
3270
3271
  accent: "accent";
3271
3272
  accentDim: "accentDim";
@@ -3293,8 +3294,8 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3293
3294
  }>;
3294
3295
  label: z$1.ZodOptional<z$1.ZodString>;
3295
3296
  color: z$1.ZodOptional<z$1.ZodEnum<{
3296
- error: "error";
3297
3297
  success: "success";
3298
+ error: "error";
3298
3299
  warning: "warning";
3299
3300
  accent: "accent";
3300
3301
  accentDim: "accentDim";
@@ -3309,16 +3310,16 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3309
3310
  key: z$1.ZodString;
3310
3311
  aggregate: z$1.ZodEnum<{
3311
3312
  sum: "sum";
3313
+ avg: "avg";
3312
3314
  min: "min";
3313
3315
  max: "max";
3314
- avg: "avg";
3315
3316
  latest: "latest";
3316
3317
  passThresholdRate: "passThresholdRate";
3317
3318
  }>;
3318
3319
  label: z$1.ZodOptional<z$1.ZodString>;
3319
3320
  color: z$1.ZodOptional<z$1.ZodEnum<{
3320
- error: "error";
3321
3321
  success: "success";
3322
+ error: "error";
3322
3323
  warning: "warning";
3323
3324
  accent: "accent";
3324
3325
  accentDim: "accentDim";
@@ -3344,9 +3345,9 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
3344
3345
  key: z$1.ZodString;
3345
3346
  aggregate: z$1.ZodEnum<{
3346
3347
  sum: "sum";
3348
+ avg: "avg";
3347
3349
  min: "min";
3348
3350
  max: "max";
3349
- avg: "avg";
3350
3351
  latest: "latest";
3351
3352
  passThresholdRate: "passThresholdRate";
3352
3353
  }>;
@@ -3376,8 +3377,8 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3376
3377
  }>;
3377
3378
  label: z$1.ZodOptional<z$1.ZodString>;
3378
3379
  color: z$1.ZodOptional<z$1.ZodEnum<{
3379
- error: "error";
3380
3380
  success: "success";
3381
+ error: "error";
3381
3382
  warning: "warning";
3382
3383
  accent: "accent";
3383
3384
  accentDim: "accentDim";
@@ -3392,16 +3393,16 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3392
3393
  key: z$1.ZodString;
3393
3394
  aggregate: z$1.ZodEnum<{
3394
3395
  sum: "sum";
3396
+ avg: "avg";
3395
3397
  min: "min";
3396
3398
  max: "max";
3397
- avg: "avg";
3398
3399
  latest: "latest";
3399
3400
  passThresholdRate: "passThresholdRate";
3400
3401
  }>;
3401
3402
  label: z$1.ZodOptional<z$1.ZodString>;
3402
3403
  color: z$1.ZodOptional<z$1.ZodEnum<{
3403
- error: "error";
3404
3404
  success: "success";
3405
+ error: "error";
3405
3406
  warning: "warning";
3406
3407
  accent: "accent";
3407
3408
  accentDim: "accentDim";
@@ -3434,9 +3435,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3434
3435
  key: z$1.ZodString;
3435
3436
  aggregate: z$1.ZodEnum<{
3436
3437
  sum: "sum";
3438
+ avg: "avg";
3437
3439
  min: "min";
3438
3440
  max: "max";
3439
- avg: "avg";
3440
3441
  latest: "latest";
3441
3442
  passThresholdRate: "passThresholdRate";
3442
3443
  }>;
@@ -3466,8 +3467,8 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3466
3467
  }>;
3467
3468
  label: z$1.ZodOptional<z$1.ZodString>;
3468
3469
  color: z$1.ZodOptional<z$1.ZodEnum<{
3469
- error: "error";
3470
3470
  success: "success";
3471
+ error: "error";
3471
3472
  warning: "warning";
3472
3473
  accent: "accent";
3473
3474
  accentDim: "accentDim";
@@ -3482,16 +3483,16 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3482
3483
  key: z$1.ZodString;
3483
3484
  aggregate: z$1.ZodEnum<{
3484
3485
  sum: "sum";
3486
+ avg: "avg";
3485
3487
  min: "min";
3486
3488
  max: "max";
3487
- avg: "avg";
3488
3489
  latest: "latest";
3489
3490
  passThresholdRate: "passThresholdRate";
3490
3491
  }>;
3491
3492
  label: z$1.ZodOptional<z$1.ZodString>;
3492
3493
  color: z$1.ZodOptional<z$1.ZodEnum<{
3493
- error: "error";
3494
3494
  success: "success";
3495
+ error: "error";
3495
3496
  warning: "warning";
3496
3497
  accent: "accent";
3497
3498
  accentDim: "accentDim";
@@ -3524,9 +3525,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3524
3525
  key: z$1.ZodString;
3525
3526
  aggregate: z$1.ZodEnum<{
3526
3527
  sum: "sum";
3528
+ avg: "avg";
3527
3529
  min: "min";
3528
3530
  max: "max";
3529
- avg: "avg";
3530
3531
  latest: "latest";
3531
3532
  passThresholdRate: "passThresholdRate";
3532
3533
  }>;
@@ -3555,8 +3556,8 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3555
3556
  target: z$1.ZodObject<{
3556
3557
  mode: z$1.ZodEnum<{
3557
3558
  all: "all";
3558
- evalIds: "evalIds";
3559
3559
  caseIds: "caseIds";
3560
+ evalIds: "evalIds";
3560
3561
  }>;
3561
3562
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
3562
3563
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -3570,9 +3571,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3570
3571
  median: "median";
3571
3572
  }>>>;
3572
3573
  cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
3573
- use: "use";
3574
- bypass: "bypass";
3575
3574
  refresh: "refresh";
3575
+ bypass: "bypass";
3576
+ use: "use";
3576
3577
  }>>;
3577
3578
  }, z$1.core.$strip>;
3578
3579
  /** Persisted lifecycle metadata for a single eval run. */
@@ -3694,10 +3695,11 @@ type EvalTraceTree$1 = {
3694
3695
  findSpan: (name: string) => EvalTraceSpan$1 | undefined; /** Return every span whose name exactly matches `name`. */
3695
3696
  findSpans: (name: string) => EvalTraceSpan$1[]; /** Return whether any span name exactly matches `name`. */
3696
3697
  hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
3697
- findSpansByKind: (kind: string) => EvalTraceSpan$1[]; /** Return every span with `kind: 'tool'`. */
3698
- findToolCallSpans: () => EvalTraceSpan$1[]; /** Return the names of every span with `kind: 'tool'`. */
3699
- listToolCallSpanNames: () => string[]; /** Return whether a `kind: 'tool'` span has a name exactly matching `name`. */
3700
- hasToolCallSpan: (name: string) => boolean; /** Return span names in creation order, optionally filtered by kind. */
3698
+ findSpansByKind: (kind: string) => EvalTraceSpan$1[]; /** Return every span with `kind: 'tool'` or `kind: 'tool_call'`. */
3699
+ findToolCallSpans: () => EvalTraceSpan$1[]; /** Return the names of every span with `kind: 'tool'` or `kind: 'tool_call'`. */
3700
+ listToolCallSpanNames: () => string[]; /** Return whether a tool-call span has a name exactly matching `name`. */
3701
+ hasToolCallSpan: (name: string) => boolean; /** Return whether a tool-call span name appears exactly `expectedCalls` times. */
3702
+ hasNToolCallSpans: (toolName: string, expectedCalls: number) => boolean; /** Return span names in creation order, optionally filtered by kind. */
3701
3703
  listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
3702
3704
  listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
3703
3705
  flattenDfs: () => EvalTraceSpan$1[];
@@ -4290,15 +4292,26 @@ type AgentEvalsConfig$1 = {
4290
4292
  /** Disable the cache entirely; spans with `cache` options execute as if uncached. */enabled?: boolean; /** Override the directory used to persist cache entries. */
4291
4293
  dir?: string;
4292
4294
  /**
4293
- * Default maximum entries retained for each cache namespace. Defaults to
4294
- * `100`; non-positive or non-finite values fall back to the default.
4295
- */
4296
- maxEntriesPerNamespace?: number;
4297
- /**
4298
- * Exact namespace-specific retention caps. Values override
4299
- * `maxEntriesPerNamespace` for matching namespaces.
4295
+ * Maximum entries retained per cache namespace.
4296
+ *
4297
+ * Pass a number to set the default cap for every namespace. Pass an object
4298
+ * to set a default cap plus exact namespace-specific caps. Non-positive or
4299
+ * non-finite values fall back to the default.
4300
+ *
4301
+ * @example
4302
+ * ```ts
4303
+ * cache: {
4304
+ * maxEntries: {
4305
+ * default: 50,
4306
+ * namespaces: { 'receipt-audit.receipt-audit-context': 200 },
4307
+ * },
4308
+ * }
4309
+ * ```
4300
4310
  */
4301
- maxEntriesByNamespace?: Record<string, number>;
4311
+ maxEntries?: number | {
4312
+ default?: number;
4313
+ namespaces?: Record<string, number>;
4314
+ };
4302
4315
  /**
4303
4316
  * Milliseconds the runner waits after becoming idle before pruning indexed
4304
4317
  * cache entries. Defaults to `5000`; non-positive or non-finite values use
@@ -4309,8 +4322,7 @@ type AgentEvalsConfig$1 = {
4309
4322
  * Minimum milliseconds between `lastAccessedAt` index rewrites for repeated
4310
4323
  * cache hits. Defaults to four hours. Set to `0` to record every hit.
4311
4324
  */
4312
- lastAccessedAtUpdateIntervalMs?: number; /** Legacy alias for `maxEntriesPerNamespace`, retained so older config files keep working. */
4313
- maxEntriesPerEval?: number;
4325
+ lastAccessedAtUpdateIntervalMs?: number;
4314
4326
  };
4315
4327
  };
4316
4328
  /** Zod schema for validating `agent-evals.config.ts` input. */
@@ -4519,9 +4531,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
4519
4531
  * - `refresh`: never read, always write (forces re-execution and overwrites).
4520
4532
  */
4521
4533
  declare const cacheModeSchema: z$1.ZodEnum<{
4522
- use: "use";
4523
- bypass: "bypass";
4524
4534
  refresh: "refresh";
4535
+ bypass: "bypass";
4536
+ use: "use";
4525
4537
  }>;
4526
4538
  /** Mode controlling how cached spans behave during a run. */
4527
4539
  type CacheMode = z$1.infer<typeof cacheModeSchema>;
@@ -4535,17 +4547,17 @@ declare const spanCacheOptionsSchema: z$1.ZodObject<{
4535
4547
  type SpanCacheOptions = z$1.infer<typeof spanCacheOptionsSchema>;
4536
4548
  /** Category of operation stored in the eval cache. */
4537
4549
  declare const cacheOperationTypeSchema: z$1.ZodEnum<{
4538
- span: "span";
4539
4550
  value: "value";
4551
+ span: "span";
4540
4552
  }>;
4541
4553
  /** Category of operation stored in the eval cache. */
4542
4554
  type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
4543
4555
  /** Status of a cache lookup recorded on a span or case scope. */
4544
4556
  declare const cacheStatusSchema: z$1.ZodEnum<{
4545
- bypass: "bypass";
4546
- refresh: "refresh";
4547
4557
  hit: "hit";
4548
4558
  miss: "miss";
4559
+ refresh: "refresh";
4560
+ bypass: "bypass";
4549
4561
  }>;
4550
4562
  /** Status of a cache lookup recorded on a span or case scope. */
4551
4563
  type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
@@ -4562,10 +4574,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
4562
4574
  namespace: z$1.ZodString;
4563
4575
  key: z$1.ZodString;
4564
4576
  status: z$1.ZodEnum<{
4565
- bypass: "bypass";
4566
- refresh: "refresh";
4567
4577
  hit: "hit";
4568
4578
  miss: "miss";
4579
+ refresh: "refresh";
4580
+ bypass: "bypass";
4569
4581
  }>;
4570
4582
  read: z$1.ZodOptional<z$1.ZodBoolean>;
4571
4583
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -4761,8 +4773,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
4761
4773
  key: z$1.ZodString;
4762
4774
  namespace: z$1.ZodString;
4763
4775
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4764
- span: "span";
4765
4776
  value: "value";
4777
+ span: "span";
4766
4778
  }>>;
4767
4779
  operationName: z$1.ZodOptional<z$1.ZodString>;
4768
4780
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4868,8 +4880,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4868
4880
  key: z$1.ZodString;
4869
4881
  namespace: z$1.ZodString;
4870
4882
  operationType: z$1.ZodEnum<{
4871
- span: "span";
4872
4883
  value: "value";
4884
+ span: "span";
4873
4885
  }>;
4874
4886
  operationName: z$1.ZodString;
4875
4887
  storedAt: z$1.ZodString;
@@ -4879,8 +4891,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4879
4891
  key: z$1.ZodString;
4880
4892
  namespace: z$1.ZodString;
4881
4893
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4882
- span: "span";
4883
4894
  value: "value";
4895
+ span: "span";
4884
4896
  }>>;
4885
4897
  operationName: z$1.ZodOptional<z$1.ZodString>;
4886
4898
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4986,8 +4998,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4986
4998
  key: z$1.ZodString;
4987
4999
  namespace: z$1.ZodString;
4988
5000
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4989
- span: "span";
4990
5001
  value: "value";
5002
+ span: "span";
4991
5003
  }>>;
4992
5004
  operationName: z$1.ZodOptional<z$1.ZodString>;
4993
5005
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -5084,8 +5096,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
5084
5096
  key: z$1.ZodString;
5085
5097
  namespace: z$1.ZodString;
5086
5098
  operationType: z$1.ZodEnum<{
5087
- span: "span";
5088
5099
  value: "value";
5100
+ span: "span";
5089
5101
  }>;
5090
5102
  operationName: z$1.ZodString;
5091
5103
  storedAt: z$1.ZodString;
@@ -5095,8 +5107,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
5095
5107
  key: z$1.ZodString;
5096
5108
  namespace: z$1.ZodString;
5097
5109
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
5098
- span: "span";
5099
5110
  value: "value";
5111
+ span: "span";
5100
5112
  }>>;
5101
5113
  operationName: z$1.ZodOptional<z$1.ZodString>;
5102
5114
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -5202,8 +5214,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
5202
5214
  key: z$1.ZodString;
5203
5215
  namespace: z$1.ZodString;
5204
5216
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
5205
- span: "span";
5206
5217
  value: "value";
5218
+ span: "span";
5207
5219
  }>>;
5208
5220
  operationName: z$1.ZodOptional<z$1.ZodString>;
5209
5221
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -5308,8 +5320,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
5308
5320
  key: z$1.ZodString;
5309
5321
  namespace: z$1.ZodString;
5310
5322
  operationType: z$1.ZodEnum<{
5311
- span: "span";
5312
5323
  value: "value";
5324
+ span: "span";
5313
5325
  }>;
5314
5326
  operationName: z$1.ZodString;
5315
5327
  storedAt: z$1.ZodString;
@@ -5319,8 +5331,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
5319
5331
  key: z$1.ZodString;
5320
5332
  namespace: z$1.ZodString;
5321
5333
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
5322
- span: "span";
5323
5334
  value: "value";
5335
+ span: "span";
5324
5336
  }>>;
5325
5337
  operationName: z$1.ZodOptional<z$1.ZodString>;
5326
5338
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -5494,8 +5506,8 @@ type SseEnvelope = z$1.infer<typeof sseEnvelopeSchema$1>; //#endregion
5494
5506
  //#region src/schemas/api.d.ts
5495
5507
  /** Lifecycle state for an app config reload triggered by `agent-evals.config.ts`. */
5496
5508
  declare const configReloadStatusSchema: z$1.ZodEnum<{
5497
- idle: "idle";
5498
5509
  pending: "pending";
5510
+ idle: "idle";
5499
5511
  reloading: "reloading";
5500
5512
  }>;
5501
5513
  /** Status for config reloads in the long-running app server. */
@@ -5503,8 +5515,8 @@ type ConfigReloadStatus = z$1.infer<typeof configReloadStatusSchema>;
5503
5515
  /** UI/API-visible state for config reloads in `agent-evals app`. */
5504
5516
  declare const configReloadStateSchema$1: z$1.ZodObject<{
5505
5517
  status: z$1.ZodEnum<{
5506
- idle: "idle";
5507
5518
  pending: "pending";
5519
+ idle: "idle";
5508
5520
  reloading: "reloading";
5509
5521
  }>;
5510
5522
  activeRunCount: z$1.ZodNumber;
@@ -5518,8 +5530,8 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
5518
5530
  target: z$1.ZodObject<{
5519
5531
  mode: z$1.ZodEnum<{
5520
5532
  all: "all";
5521
- evalIds: "evalIds";
5522
5533
  caseIds: "caseIds";
5534
+ evalIds: "evalIds";
5523
5535
  }>;
5524
5536
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
5525
5537
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -5531,9 +5543,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
5531
5543
  temporary: z$1.ZodOptional<z$1.ZodBoolean>;
5532
5544
  cache: z$1.ZodOptional<z$1.ZodObject<{
5533
5545
  mode: z$1.ZodDefault<z$1.ZodEnum<{
5534
- use: "use";
5535
- bypass: "bypass";
5536
5546
  refresh: "refresh";
5547
+ bypass: "bypass";
5548
+ use: "use";
5537
5549
  }>>;
5538
5550
  }, z$1.core.$strip>>;
5539
5551
  manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as setScopeCacheContext, A as repoFile, B as evalTime, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as EvalRuntimeUsageError, Ft as getEvalRegistry, H as getEvalCaseInput, I as appendToEvalOutput, J as runInEvalRuntimeScope, K as mergeEvalOutput, M as readManualInputFile, N as evalExpect, O as serializeCacheRecording, P as EvalAssertionError, Q as setEvalOutput, R as evalAssert, S as evalSpan, T as hashCacheKeySync, U as incrementEvalOutput, V as getCurrentScope, W as isInEvalScope, X as runInExistingEvalScope, Y as runInEvalScope, at as extractApiCalls, b as buildTraceTree, ct as simulateTokenAllocation, et as startEvalBackgroundJob, it as extractCacheHits, j as manualInputFileValueSchema, k as serializeCacheValue, ot as extractLlmCalls, q as nextEvalId, rt as extractCacheEntries, st as simulateLlmCallCost, ut as getNestedAttribute, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalLog } from "./runExecution-CFw0MQFs.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-SP4kEtYL.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-7GbQj1sb.mjs";
1
+ import { $ as setEvalOutput, A as serializeCacheValue, B as evalLog, C as evalSpan, D as deserializeCacheRecording, E as hashCacheKeySync, F as EvalAssertionError, G as isInEvalScope, H as getCurrentScope, I as EvalRuntimeUsageError, It as getEvalRegistry, J as nextEvalId, L as appendToEvalOutput, M as manualInputFileValueSchema, N as readManualInputFile, O as deserializeCacheValue, P as evalExpect, S as captureEvalSpanError, T as hashCacheKey, U as getEvalCaseInput, V as evalTime, W as incrementEvalOutput, X as runInEvalScope, Y as runInEvalRuntimeScope, Z as runInExistingEvalScope, at as extractCacheHits, b as z, ct as simulateLlmCallCost, dt as getNestedAttribute, et as setScopeCacheContext, it as extractCacheEntries, j as repoFile, k as serializeCacheRecording, lt as simulateTokenAllocation, ot as extractApiCalls, q as mergeEvalOutput, st as extractLlmCalls, tt as startEvalBackgroundJob, w as evalTracer, x as buildTraceTree, z as evalAssert } from "./runExecution-BMnJXWhN.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-ClAkjTvo.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-DfzidkYr.mjs";
4
4
  export { EvalAssertionError, EvalRuntimeUsageError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,5 +1,5 @@
1
- import { At as evalStatsConfigSchema, L as configureEvalRunLogs, Mt as evalChartsConfigSchema, Nt as columnDefSchema, Tt as buildEvalKey, bt as runManifestSchema, jt as manualInputDescriptorSchema, kt as evalStatAggregateSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, tt as createRunRequestSchema, v as createFsCacheStore, xt as runSummarySchema } from "./runExecution-CFw0MQFs.mjs";
2
- import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-CxjiQmof.mjs";
1
+ import { At as evalStatAggregateSchema, Et as buildEvalKey, Mt as manualInputDescriptorSchema, Nt as evalChartsConfigSchema, Pt as columnDefSchema, R as configureEvalRunLogs, St as runSummarySchema, jt as evalStatsConfigSchema, l as registerAgentEvalsPackageResolutionHooks, nt as createRunRequestSchema, p as loadConfig, v as createFsCacheStore, xt as runManifestSchema, y as getCacheRetentionOptions } from "./runExecution-BMnJXWhN.mjs";
2
+ import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-CvbTAoEb.mjs";
3
3
  import { z } from "zod/v4";
4
4
  import { readFile } from "node:fs/promises";
5
5
  import { relative } from "node:path";
@@ -137,11 +137,12 @@ async function main() {
137
137
  registerAgentEvalsPackageResolutionHooks();
138
138
  const config = await loadConfig();
139
139
  configureEvalRunLogs({ captureConsole: config.runLogs?.captureConsole !== false });
140
+ const cacheRetentionOptions = getCacheRetentionOptions(config.cache);
140
141
  const cacheStore = createFsCacheStore({
141
142
  workspaceRoot: context.workspaceRoot,
142
143
  dir: config.cache?.dir,
143
- maxEntriesPerNamespace: config.cache?.maxEntriesPerNamespace ?? config.cache?.maxEntriesPerEval,
144
- maxEntriesByNamespace: config.cache?.maxEntriesByNamespace,
144
+ maxEntriesPerNamespace: cacheRetentionOptions.maxEntriesPerNamespace,
145
+ maxEntriesByNamespace: cacheRetentionOptions.maxEntriesByNamespace,
145
146
  lastAccessedAtUpdateIntervalMs: config.cache?.lastAccessedAtUpdateIntervalMs
146
147
  });
147
148
  const evalMetas = await discoverRunEvals({