@ls-stack/agent-eval 0.58.1 → 0.58.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -268,6 +268,7 @@ declare const runLogEntrySchema$1: z$1.ZodObject<{
268
268
  phase: z$1.ZodEnum<{
269
269
  eval: "eval";
270
270
  derive: "derive";
271
+ tracingAssertions: "tracingAssertions";
271
272
  outputsSchema: "outputsSchema";
272
273
  scorer: "scorer";
273
274
  }>;
@@ -399,10 +400,17 @@ type EvalCase$1$1<TInput = unknown> = {
399
400
  };
400
401
  /** Query helpers built from the flattened trace recorded for one eval case. */
401
402
  type EvalTraceTree = {
402
- spans: EvalTraceSpan$2[];
403
- rootSpans: EvalTraceSpan$2[];
404
- findSpan: (name: string) => EvalTraceSpan$2 | undefined;
405
- findSpansByKind: (kind: string) => EvalTraceSpan$2[];
403
+ /** Flat span list in creation order. */spans: EvalTraceSpan$2[]; /** Top-level spans whose `parentId` is `null`. */
404
+ rootSpans: EvalTraceSpan$2[]; /** Return the first span whose name exactly matches `name`. */
405
+ findSpan: (name: string) => EvalTraceSpan$2 | undefined; /** Return every span whose name exactly matches `name`. */
406
+ findSpans: (name: string) => EvalTraceSpan$2[]; /** Return whether any span name exactly matches `name`. */
407
+ hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
408
+ findSpansByKind: (kind: string) => EvalTraceSpan$2[]; /** Return every span with `kind: 'tool'`. */
409
+ findToolCallSpans: () => EvalTraceSpan$2[]; /** Return the names of every span with `kind: 'tool'`. */
410
+ listToolCallSpanNames: () => string[]; /** Return whether a `kind: 'tool'` span has a name exactly matching `name`. */
411
+ hasToolCallSpan: (name: string) => boolean; /** Return span names in creation order, optionally filtered by kind. */
412
+ listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
413
+ listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
406
414
  flattenDfs: () => EvalTraceSpan$2[];
407
415
  checkpoints: Map<string, unknown>;
408
416
  };
@@ -421,6 +429,10 @@ type EvalDeriveMap<TInput = unknown> = Record<string, EvalDeriveValueFn<TInput>>
421
429
  type EvalDeriveFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => Record<string, unknown> | Promise<Record<string, unknown>>;
422
430
  /** Trace-derived output config accepted globally and on eval definitions. */
423
431
  type EvalDeriveConfig<TInput = unknown> = EvalDeriveMap<TInput> | EvalDeriveFn<TInput>;
432
+ /** Function that records trace-derived assertions for one case. */
433
+ type EvalTracingAssertionsFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => MaybePromise$1<void>;
434
+ /** Trace-derived assertion config accepted globally and on eval definitions. */
435
+ type EvalTracingAssertionsConfig<TInput = unknown> = EvalTracingAssertionsFn<TInput>;
424
436
  /** UI overrides for a derived or scored column emitted by an eval. */
425
437
  type EvalColumnOverride = {
426
438
  /** Display label shown for the column in tables and detail views. */label?: string;
@@ -1142,9 +1154,18 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
1142
1154
  *
1143
1155
  * Prefer the keyed map form when each key has one derivation. The
1144
1156
  * object-returning callback form is also supported. Derived values only fill
1145
- * keys not already recorded during execution.
1157
+ * keys not already recorded during execution. Assertion helpers are not
1158
+ * allowed here; use `tracingAssertions` for trace-derived pass/fail checks.
1146
1159
  */
1147
1160
  deriveFromTracing?: EvalDeriveConfig<TInput>;
1161
+ /**
1162
+ * Record assertions from the finished execution trace.
1163
+ *
1164
+ * Runs after `deriveFromTracing` and before output schema validation and
1165
+ * scores. Use `evalAssert(...)` or `evalExpect(...)` inside the callback to
1166
+ * write normal assertion results without creating score columns.
1167
+ */
1168
+ tracingAssertions?: EvalTracingAssertionsConfig<TInput>;
1148
1169
  /**
1149
1170
  * Computed score columns for each case.
1150
1171
  *
@@ -1503,12 +1524,16 @@ type EvalCaseScope = {
1503
1524
  * covers run-time module/environment loading, including top-level code in
1504
1525
  * modules imported while a run is being prepared.
1505
1526
  */
1506
- type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'outputsSchema' | 'scorer';
1527
+ type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'tracingAssertions' | 'outputsSchema' | 'scorer';
1507
1528
  type EvalLogLevelInput = RunLogLevel$1 | 'warning';
1508
1529
  /** Error thrown when an eval assertion fails during case execution. */
1509
1530
  declare class EvalAssertionError extends Error {
1510
1531
  constructor(message: string);
1511
1532
  }
1533
+ /** Error thrown when an SDK helper is used in an unsupported runner phase. */
1534
+ declare class EvalRuntimeUsageError extends Error {
1535
+ constructor(message: string);
1536
+ }
1512
1537
  /** Return the host process clock, bypassing the eval Date shim. */
1513
1538
  /**
1514
1539
  * Eval time helpers for reading and moving the active eval clock.
@@ -1539,8 +1564,10 @@ declare function getCurrentScope(): EvalCaseScope | undefined;
1539
1564
  *
1540
1565
  * Returns `null` outside eval-owned work, `env` while the runner is loading
1541
1566
  * eval modules for a run, `cases` while generating cases, `eval` while running
1542
- * case `execute`, `derive` while deriving outputs from traces, `outputsSchema`
1543
- * while validating outputs, and `scorer` while computing scores.
1567
+ * case `execute`, `derive` while deriving outputs from traces,
1568
+ * `tracingAssertions` while checking trace-derived assertions,
1569
+ * `outputsSchema` while validating outputs, and `scorer` while computing
1570
+ * scores.
1544
1571
  */
1545
1572
  declare function isInEvalScope(): EvalRuntimeScope | null;
1546
1573
  /**
@@ -1656,7 +1683,8 @@ declare function incrementEvalOutput(key: string, delta: number): void;
1656
1683
  * Calls made outside `runInEvalScope(...)` are ignored so shared workflow code
1657
1684
  * can safely reuse `evalAssert(...)` when it also runs outside an eval. The
1658
1685
  * TypeScript assertion signature still narrows the checked value after the
1659
- * call.
1686
+ * call. Calls inside `deriveFromTracing` throw because derivations must only
1687
+ * write outputs; use `tracingAssertions` for trace-derived pass/fail checks.
1660
1688
  */
1661
1689
  declare function evalAssert(condition: unknown, message: string): asserts condition; //#endregion
1662
1690
  //#region src/valueCache.d.ts
@@ -2014,9 +2042,9 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
2014
2042
  subtree: "subtree";
2015
2043
  }>>;
2016
2044
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2017
- sum: "sum";
2018
2045
  all: "all";
2019
2046
  last: "last";
2047
+ sum: "sum";
2020
2048
  }>>;
2021
2049
  }, z$1.core.$strip>;
2022
2050
  /**
@@ -2050,9 +2078,9 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
2050
2078
  subtree: "subtree";
2051
2079
  }>>;
2052
2080
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2053
- sum: "sum";
2054
2081
  all: "all";
2055
2082
  last: "last";
2083
+ sum: "sum";
2056
2084
  }>>;
2057
2085
  }, z$1.core.$strip>>>;
2058
2086
  }, z$1.core.$strip>;
@@ -2090,9 +2118,9 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
2090
2118
  subtree: "subtree";
2091
2119
  }>>;
2092
2120
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2093
- sum: "sum";
2094
2121
  all: "all";
2095
2122
  last: "last";
2123
+ sum: "sum";
2096
2124
  }>>;
2097
2125
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
2098
2126
  }, z$1.core.$strip>;
@@ -2128,9 +2156,9 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
2128
2156
  subtree: "subtree";
2129
2157
  }>>;
2130
2158
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2131
- sum: "sum";
2132
2159
  all: "all";
2133
2160
  last: "last";
2161
+ sum: "sum";
2134
2162
  }>>;
2135
2163
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
2136
2164
  }, z$1.core.$strip>>>;
@@ -2167,8 +2195,8 @@ declare const traceSpanSchema$1: z$1.ZodObject<{
2167
2195
  status: z$1.ZodEnum<{
2168
2196
  error: "error";
2169
2197
  running: "running";
2170
- cancelled: "cancelled";
2171
2198
  ok: "ok";
2199
+ cancelled: "cancelled";
2172
2200
  }>;
2173
2201
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
2174
2202
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2213,10 +2241,10 @@ type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
2213
2241
  * `best` selects the highest finite value and `worst` selects the lowest.
2214
2242
  */
2215
2243
  declare const evalStatAggregateSchema: z$1.ZodEnum<{
2216
- avg: "avg";
2244
+ sum: "sum";
2217
2245
  min: "min";
2218
2246
  max: "max";
2219
- sum: "sum";
2247
+ avg: "avg";
2220
2248
  best: "best";
2221
2249
  worst: "worst";
2222
2250
  }>;
@@ -2245,10 +2273,10 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2245
2273
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2246
2274
  kind: z$1.ZodLiteral<"duration">;
2247
2275
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2248
- avg: "avg";
2276
+ sum: "sum";
2249
2277
  min: "min";
2250
2278
  max: "max";
2251
- sum: "sum";
2279
+ avg: "avg";
2252
2280
  best: "best";
2253
2281
  worst: "worst";
2254
2282
  }>>;
@@ -2256,10 +2284,10 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2256
2284
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2257
2285
  kind: z$1.ZodLiteral<"cacheHits">;
2258
2286
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2259
- avg: "avg";
2287
+ sum: "sum";
2260
2288
  min: "min";
2261
2289
  max: "max";
2262
- sum: "sum";
2290
+ avg: "avg";
2263
2291
  best: "best";
2264
2292
  worst: "worst";
2265
2293
  }>>;
@@ -2269,10 +2297,10 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2269
2297
  key: z$1.ZodString;
2270
2298
  label: z$1.ZodOptional<z$1.ZodString>;
2271
2299
  aggregate: z$1.ZodEnum<{
2272
- avg: "avg";
2300
+ sum: "sum";
2273
2301
  min: "min";
2274
2302
  max: "max";
2275
- sum: "sum";
2303
+ avg: "avg";
2276
2304
  best: "best";
2277
2305
  worst: "worst";
2278
2306
  }>;
@@ -2309,10 +2337,10 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2309
2337
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2310
2338
  kind: z$1.ZodLiteral<"duration">;
2311
2339
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2312
- avg: "avg";
2340
+ sum: "sum";
2313
2341
  min: "min";
2314
2342
  max: "max";
2315
- sum: "sum";
2343
+ avg: "avg";
2316
2344
  best: "best";
2317
2345
  worst: "worst";
2318
2346
  }>>;
@@ -2320,10 +2348,10 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2320
2348
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2321
2349
  kind: z$1.ZodLiteral<"cacheHits">;
2322
2350
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2323
- avg: "avg";
2351
+ sum: "sum";
2324
2352
  min: "min";
2325
2353
  max: "max";
2326
- sum: "sum";
2354
+ avg: "avg";
2327
2355
  best: "best";
2328
2356
  worst: "worst";
2329
2357
  }>>;
@@ -2333,10 +2361,10 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2333
2361
  key: z$1.ZodString;
2334
2362
  label: z$1.ZodOptional<z$1.ZodString>;
2335
2363
  aggregate: z$1.ZodEnum<{
2336
- avg: "avg";
2364
+ sum: "sum";
2337
2365
  min: "min";
2338
2366
  max: "max";
2339
- sum: "sum";
2367
+ avg: "avg";
2340
2368
  best: "best";
2341
2369
  worst: "worst";
2342
2370
  }>;
@@ -2419,10 +2447,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2419
2447
  caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2420
2448
  lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
2421
2449
  error: "error";
2422
- pass: "pass";
2423
- fail: "fail";
2424
2450
  running: "running";
2425
2451
  cancelled: "cancelled";
2452
+ pass: "pass";
2453
+ fail: "fail";
2426
2454
  unscored: "unscored";
2427
2455
  }>>;
2428
2456
  stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
@@ -2436,10 +2464,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2436
2464
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2437
2465
  kind: z$1.ZodLiteral<"duration">;
2438
2466
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2439
- avg: "avg";
2467
+ sum: "sum";
2440
2468
  min: "min";
2441
2469
  max: "max";
2442
- sum: "sum";
2470
+ avg: "avg";
2443
2471
  best: "best";
2444
2472
  worst: "worst";
2445
2473
  }>>;
@@ -2447,10 +2475,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2447
2475
  hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2448
2476
  kind: z$1.ZodLiteral<"cacheHits">;
2449
2477
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2450
- avg: "avg";
2478
+ sum: "sum";
2451
2479
  min: "min";
2452
2480
  max: "max";
2453
- sum: "sum";
2481
+ avg: "avg";
2454
2482
  best: "best";
2455
2483
  worst: "worst";
2456
2484
  }>>;
@@ -2460,10 +2488,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2460
2488
  key: z$1.ZodString;
2461
2489
  label: z$1.ZodOptional<z$1.ZodString>;
2462
2490
  aggregate: z$1.ZodEnum<{
2463
- avg: "avg";
2491
+ sum: "sum";
2464
2492
  min: "min";
2465
2493
  max: "max";
2466
- sum: "sum";
2494
+ avg: "avg";
2467
2495
  best: "best";
2468
2496
  worst: "worst";
2469
2497
  }>;
@@ -2487,10 +2515,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2487
2515
  accent: z$1.ZodOptional<z$1.ZodBoolean>;
2488
2516
  }, z$1.core.$strip>], "kind">>>;
2489
2517
  defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
2490
- avg: "avg";
2518
+ sum: "sum";
2491
2519
  min: "min";
2492
2520
  max: "max";
2493
- sum: "sum";
2521
+ avg: "avg";
2494
2522
  best: "best";
2495
2523
  worst: "worst";
2496
2524
  }>>;
@@ -2511,11 +2539,11 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2511
2539
  }>;
2512
2540
  label: z$1.ZodOptional<z$1.ZodString>;
2513
2541
  color: z$1.ZodOptional<z$1.ZodEnum<{
2514
- success: "success";
2515
2542
  error: "error";
2543
+ success: "success";
2544
+ warning: "warning";
2516
2545
  accent: "accent";
2517
2546
  accentDim: "accentDim";
2518
- warning: "warning";
2519
2547
  textMuted: "textMuted";
2520
2548
  }>>;
2521
2549
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2526,20 +2554,20 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2526
2554
  source: z$1.ZodLiteral<"column">;
2527
2555
  key: z$1.ZodString;
2528
2556
  aggregate: z$1.ZodEnum<{
2529
- avg: "avg";
2557
+ sum: "sum";
2530
2558
  min: "min";
2531
2559
  max: "max";
2532
- sum: "sum";
2560
+ avg: "avg";
2533
2561
  latest: "latest";
2534
2562
  passThresholdRate: "passThresholdRate";
2535
2563
  }>;
2536
2564
  label: z$1.ZodOptional<z$1.ZodString>;
2537
2565
  color: z$1.ZodOptional<z$1.ZodEnum<{
2538
- success: "success";
2539
2566
  error: "error";
2567
+ success: "success";
2568
+ warning: "warning";
2540
2569
  accent: "accent";
2541
2570
  accentDim: "accentDim";
2542
- warning: "warning";
2543
2571
  textMuted: "textMuted";
2544
2572
  }>>;
2545
2573
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -2568,10 +2596,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2568
2596
  source: z$1.ZodLiteral<"column">;
2569
2597
  key: z$1.ZodString;
2570
2598
  aggregate: z$1.ZodEnum<{
2571
- avg: "avg";
2599
+ sum: "sum";
2572
2600
  min: "min";
2573
2601
  max: "max";
2574
- sum: "sum";
2602
+ avg: "avg";
2575
2603
  latest: "latest";
2576
2604
  passThresholdRate: "passThresholdRate";
2577
2605
  }>;
@@ -2668,11 +2696,11 @@ declare const caseRowSchema$1: z$1.ZodObject<{
2668
2696
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2669
2697
  status: z$1.ZodEnum<{
2670
2698
  error: "error";
2671
- pass: "pass";
2672
- fail: "fail";
2673
2699
  running: "running";
2674
2700
  cancelled: "cancelled";
2675
2701
  pending: "pending";
2702
+ pass: "pass";
2703
+ fail: "fail";
2676
2704
  }>;
2677
2705
  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
2678
2706
  cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
@@ -2753,6 +2781,7 @@ type RunLogLevel = z$1.infer<typeof runLogLevelSchema>;
2753
2781
  declare const runLogPhaseSchema: z$1.ZodEnum<{
2754
2782
  eval: "eval";
2755
2783
  derive: "derive";
2784
+ tracingAssertions: "tracingAssertions";
2756
2785
  outputsSchema: "outputsSchema";
2757
2786
  scorer: "scorer";
2758
2787
  }>;
@@ -2779,6 +2808,7 @@ declare const runLogEntrySchema: z$1.ZodObject<{
2779
2808
  phase: z$1.ZodEnum<{
2780
2809
  eval: "eval";
2781
2810
  derive: "derive";
2811
+ tracingAssertions: "tracingAssertions";
2782
2812
  outputsSchema: "outputsSchema";
2783
2813
  scorer: "scorer";
2784
2814
  }>;
@@ -2808,8 +2838,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2808
2838
  status: z$1.ZodEnum<{
2809
2839
  error: "error";
2810
2840
  running: "running";
2811
- cancelled: "cancelled";
2812
2841
  ok: "ok";
2842
+ cancelled: "cancelled";
2813
2843
  }>;
2814
2844
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
2815
2845
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2859,9 +2889,9 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2859
2889
  subtree: "subtree";
2860
2890
  }>>;
2861
2891
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2862
- sum: "sum";
2863
2892
  all: "all";
2864
2893
  last: "last";
2894
+ sum: "sum";
2865
2895
  }>>;
2866
2896
  }, z$1.core.$strip>>>;
2867
2897
  }, z$1.core.$strip>;
@@ -2871,10 +2901,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2871
2901
  namespace: z$1.ZodString;
2872
2902
  key: z$1.ZodString;
2873
2903
  status: z$1.ZodEnum<{
2904
+ bypass: "bypass";
2905
+ refresh: "refresh";
2874
2906
  hit: "hit";
2875
2907
  miss: "miss";
2876
- refresh: "refresh";
2877
- bypass: "bypass";
2878
2908
  }>;
2879
2909
  read: z$1.ZodOptional<z$1.ZodBoolean>;
2880
2910
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -2893,11 +2923,11 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2893
2923
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2894
2924
  status: z$1.ZodEnum<{
2895
2925
  error: "error";
2896
- pass: "pass";
2897
- fail: "fail";
2898
2926
  running: "running";
2899
2927
  cancelled: "cancelled";
2900
2928
  pending: "pending";
2929
+ pass: "pass";
2930
+ fail: "fail";
2901
2931
  }>;
2902
2932
  input: z$1.ZodUnknown;
2903
2933
  trace: z$1.ZodArray<z$1.ZodObject<{
@@ -2911,8 +2941,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2911
2941
  status: z$1.ZodEnum<{
2912
2942
  error: "error";
2913
2943
  running: "running";
2914
- cancelled: "cancelled";
2915
2944
  ok: "ok";
2945
+ cancelled: "cancelled";
2916
2946
  }>;
2917
2947
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
2918
2948
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -2962,9 +2992,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2962
2992
  subtree: "subtree";
2963
2993
  }>>;
2964
2994
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2965
- sum: "sum";
2966
2995
  all: "all";
2967
2996
  last: "last";
2997
+ sum: "sum";
2968
2998
  }>>;
2969
2999
  }, z$1.core.$strip>>>;
2970
3000
  }, z$1.core.$strip>;
@@ -2980,8 +3010,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2980
3010
  status: z$1.ZodEnum<{
2981
3011
  error: "error";
2982
3012
  running: "running";
2983
- cancelled: "cancelled";
2984
3013
  ok: "ok";
3014
+ cancelled: "cancelled";
2985
3015
  }>;
2986
3016
  attributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
2987
3017
  error: z$1.ZodOptional<z$1.ZodObject<{
@@ -3031,9 +3061,9 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3031
3061
  subtree: "subtree";
3032
3062
  }>>;
3033
3063
  mode: z$1.ZodOptional<z$1.ZodEnum<{
3034
- sum: "sum";
3035
3064
  all: "all";
3036
3065
  last: "last";
3066
+ sum: "sum";
3037
3067
  }>>;
3038
3068
  }, z$1.core.$strip>>>;
3039
3069
  }, z$1.core.$strip>;
@@ -3043,10 +3073,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3043
3073
  namespace: z$1.ZodString;
3044
3074
  key: z$1.ZodString;
3045
3075
  status: z$1.ZodEnum<{
3076
+ bypass: "bypass";
3077
+ refresh: "refresh";
3046
3078
  hit: "hit";
3047
3079
  miss: "miss";
3048
- refresh: "refresh";
3049
- bypass: "bypass";
3050
3080
  }>;
3051
3081
  read: z$1.ZodOptional<z$1.ZodBoolean>;
3052
3082
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -3137,6 +3167,7 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3137
3167
  phase: z$1.ZodEnum<{
3138
3168
  eval: "eval";
3139
3169
  derive: "derive";
3170
+ tracingAssertions: "tracingAssertions";
3140
3171
  outputsSchema: "outputsSchema";
3141
3172
  scorer: "scorer";
3142
3173
  }>;
@@ -3163,10 +3194,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3163
3194
  namespace: z$1.ZodString;
3164
3195
  key: z$1.ZodString;
3165
3196
  status: z$1.ZodEnum<{
3197
+ bypass: "bypass";
3198
+ refresh: "refresh";
3166
3199
  hit: "hit";
3167
3200
  miss: "miss";
3168
- refresh: "refresh";
3169
- bypass: "bypass";
3170
3201
  }>;
3171
3202
  read: z$1.ZodOptional<z$1.ZodBoolean>;
3172
3203
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -3219,10 +3250,10 @@ declare const evalChartBuiltinMetricSchema: z$1.ZodEnum<{
3219
3250
  type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
3220
3251
  /** Reducer applied to a numeric column across all cases of a single run. */
3221
3252
  declare const evalChartAggregateSchema: z$1.ZodEnum<{
3222
- avg: "avg";
3253
+ sum: "sum";
3223
3254
  min: "min";
3224
3255
  max: "max";
3225
- sum: "sum";
3256
+ avg: "avg";
3226
3257
  latest: "latest";
3227
3258
  passThresholdRate: "passThresholdRate";
3228
3259
  }>;
@@ -3233,11 +3264,11 @@ type EvalChartAggregate = z$1.infer<typeof evalChartAggregateSchema>;
3233
3264
  * not emit raw hex so authored evals stay decoupled from the web theme.
3234
3265
  */
3235
3266
  declare const evalChartColorSchema: z$1.ZodEnum<{
3236
- success: "success";
3237
3267
  error: "error";
3268
+ success: "success";
3269
+ warning: "warning";
3238
3270
  accent: "accent";
3239
3271
  accentDim: "accentDim";
3240
- warning: "warning";
3241
3272
  textMuted: "textMuted";
3242
3273
  }>;
3243
3274
  /** Semantic color token resolved to a theme color by the web UI. */
@@ -3262,11 +3293,11 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3262
3293
  }>;
3263
3294
  label: z$1.ZodOptional<z$1.ZodString>;
3264
3295
  color: z$1.ZodOptional<z$1.ZodEnum<{
3265
- success: "success";
3266
3296
  error: "error";
3297
+ success: "success";
3298
+ warning: "warning";
3267
3299
  accent: "accent";
3268
3300
  accentDim: "accentDim";
3269
- warning: "warning";
3270
3301
  textMuted: "textMuted";
3271
3302
  }>>;
3272
3303
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3277,20 +3308,20 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3277
3308
  source: z$1.ZodLiteral<"column">;
3278
3309
  key: z$1.ZodString;
3279
3310
  aggregate: z$1.ZodEnum<{
3280
- avg: "avg";
3311
+ sum: "sum";
3281
3312
  min: "min";
3282
3313
  max: "max";
3283
- sum: "sum";
3314
+ avg: "avg";
3284
3315
  latest: "latest";
3285
3316
  passThresholdRate: "passThresholdRate";
3286
3317
  }>;
3287
3318
  label: z$1.ZodOptional<z$1.ZodString>;
3288
3319
  color: z$1.ZodOptional<z$1.ZodEnum<{
3289
- success: "success";
3290
3320
  error: "error";
3321
+ success: "success";
3322
+ warning: "warning";
3291
3323
  accent: "accent";
3292
3324
  accentDim: "accentDim";
3293
- warning: "warning";
3294
3325
  textMuted: "textMuted";
3295
3326
  }>>;
3296
3327
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3312,10 +3343,10 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
3312
3343
  source: z$1.ZodLiteral<"column">;
3313
3344
  key: z$1.ZodString;
3314
3345
  aggregate: z$1.ZodEnum<{
3315
- avg: "avg";
3346
+ sum: "sum";
3316
3347
  min: "min";
3317
3348
  max: "max";
3318
- sum: "sum";
3349
+ avg: "avg";
3319
3350
  latest: "latest";
3320
3351
  passThresholdRate: "passThresholdRate";
3321
3352
  }>;
@@ -3345,11 +3376,11 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3345
3376
  }>;
3346
3377
  label: z$1.ZodOptional<z$1.ZodString>;
3347
3378
  color: z$1.ZodOptional<z$1.ZodEnum<{
3348
- success: "success";
3349
3379
  error: "error";
3380
+ success: "success";
3381
+ warning: "warning";
3350
3382
  accent: "accent";
3351
3383
  accentDim: "accentDim";
3352
- warning: "warning";
3353
3384
  textMuted: "textMuted";
3354
3385
  }>>;
3355
3386
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3360,20 +3391,20 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3360
3391
  source: z$1.ZodLiteral<"column">;
3361
3392
  key: z$1.ZodString;
3362
3393
  aggregate: z$1.ZodEnum<{
3363
- avg: "avg";
3394
+ sum: "sum";
3364
3395
  min: "min";
3365
3396
  max: "max";
3366
- sum: "sum";
3397
+ avg: "avg";
3367
3398
  latest: "latest";
3368
3399
  passThresholdRate: "passThresholdRate";
3369
3400
  }>;
3370
3401
  label: z$1.ZodOptional<z$1.ZodString>;
3371
3402
  color: z$1.ZodOptional<z$1.ZodEnum<{
3372
- success: "success";
3373
3403
  error: "error";
3404
+ success: "success";
3405
+ warning: "warning";
3374
3406
  accent: "accent";
3375
3407
  accentDim: "accentDim";
3376
- warning: "warning";
3377
3408
  textMuted: "textMuted";
3378
3409
  }>>;
3379
3410
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3402,10 +3433,10 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3402
3433
  source: z$1.ZodLiteral<"column">;
3403
3434
  key: z$1.ZodString;
3404
3435
  aggregate: z$1.ZodEnum<{
3405
- avg: "avg";
3436
+ sum: "sum";
3406
3437
  min: "min";
3407
3438
  max: "max";
3408
- sum: "sum";
3439
+ avg: "avg";
3409
3440
  latest: "latest";
3410
3441
  passThresholdRate: "passThresholdRate";
3411
3442
  }>;
@@ -3435,11 +3466,11 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3435
3466
  }>;
3436
3467
  label: z$1.ZodOptional<z$1.ZodString>;
3437
3468
  color: z$1.ZodOptional<z$1.ZodEnum<{
3438
- success: "success";
3439
3469
  error: "error";
3470
+ success: "success";
3471
+ warning: "warning";
3440
3472
  accent: "accent";
3441
3473
  accentDim: "accentDim";
3442
- warning: "warning";
3443
3474
  textMuted: "textMuted";
3444
3475
  }>>;
3445
3476
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3450,20 +3481,20 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3450
3481
  source: z$1.ZodLiteral<"column">;
3451
3482
  key: z$1.ZodString;
3452
3483
  aggregate: z$1.ZodEnum<{
3453
- avg: "avg";
3484
+ sum: "sum";
3454
3485
  min: "min";
3455
3486
  max: "max";
3456
- sum: "sum";
3487
+ avg: "avg";
3457
3488
  latest: "latest";
3458
3489
  passThresholdRate: "passThresholdRate";
3459
3490
  }>;
3460
3491
  label: z$1.ZodOptional<z$1.ZodString>;
3461
3492
  color: z$1.ZodOptional<z$1.ZodEnum<{
3462
- success: "success";
3463
3493
  error: "error";
3494
+ success: "success";
3495
+ warning: "warning";
3464
3496
  accent: "accent";
3465
3497
  accentDim: "accentDim";
3466
- warning: "warning";
3467
3498
  textMuted: "textMuted";
3468
3499
  }>>;
3469
3500
  axis: z$1.ZodOptional<z$1.ZodEnum<{
@@ -3492,10 +3523,10 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3492
3523
  source: z$1.ZodLiteral<"column">;
3493
3524
  key: z$1.ZodString;
3494
3525
  aggregate: z$1.ZodEnum<{
3495
- avg: "avg";
3526
+ sum: "sum";
3496
3527
  min: "min";
3497
3528
  max: "max";
3498
- sum: "sum";
3529
+ avg: "avg";
3499
3530
  latest: "latest";
3500
3531
  passThresholdRate: "passThresholdRate";
3501
3532
  }>;
@@ -3523,9 +3554,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3523
3554
  evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
3524
3555
  target: z$1.ZodObject<{
3525
3556
  mode: z$1.ZodEnum<{
3526
- caseIds: "caseIds";
3527
3557
  all: "all";
3528
3558
  evalIds: "evalIds";
3559
+ caseIds: "caseIds";
3529
3560
  }>;
3530
3561
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
3531
3562
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -3539,9 +3570,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3539
3570
  median: "median";
3540
3571
  }>>>;
3541
3572
  cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
3542
- refresh: "refresh";
3543
- bypass: "bypass";
3544
3573
  use: "use";
3574
+ bypass: "bypass";
3575
+ refresh: "refresh";
3545
3576
  }>>;
3546
3577
  }, z$1.core.$strip>;
3547
3578
  /** Persisted lifecycle metadata for a single eval run. */
@@ -3610,7 +3641,7 @@ type ScopedCaseSummary = {
3610
3641
  //#endregion
3611
3642
  //#region src/evalStatus.d.ts
3612
3643
  /** Display status used for eval, file, and folder UI surfaces. */
3613
- type EvalDisplayStatus = DerivedStatus | 'stale' | 'outdated' | 'unscored';
3644
+ type EvalDisplayStatus = DerivedStatus | 'enqueued' | 'stale' | 'outdated' | 'unscored';
3614
3645
  /**
3615
3646
  * Derive the user-facing eval status from the raw latest run result plus
3616
3647
  * freshness state.
@@ -3658,10 +3689,17 @@ type EvalCase$1<TInput = unknown> = {
3658
3689
  };
3659
3690
  /** Query helpers built from the flattened trace recorded for one eval case. */
3660
3691
  type EvalTraceTree$1 = {
3661
- spans: EvalTraceSpan$1[];
3662
- rootSpans: EvalTraceSpan$1[];
3663
- findSpan: (name: string) => EvalTraceSpan$1 | undefined;
3664
- findSpansByKind: (kind: string) => EvalTraceSpan$1[];
3692
+ /** Flat span list in creation order. */spans: EvalTraceSpan$1[]; /** Top-level spans whose `parentId` is `null`. */
3693
+ rootSpans: EvalTraceSpan$1[]; /** Return the first span whose name exactly matches `name`. */
3694
+ findSpan: (name: string) => EvalTraceSpan$1 | undefined; /** Return every span whose name exactly matches `name`. */
3695
+ findSpans: (name: string) => EvalTraceSpan$1[]; /** Return whether any span name exactly matches `name`. */
3696
+ hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
3697
+ findSpansByKind: (kind: string) => EvalTraceSpan$1[]; /** Return every span with `kind: 'tool'`. */
3698
+ findToolCallSpans: () => EvalTraceSpan$1[]; /** Return the names of every span with `kind: 'tool'`. */
3699
+ listToolCallSpanNames: () => string[]; /** Return whether a `kind: 'tool'` span has a name exactly matching `name`. */
3700
+ hasToolCallSpan: (name: string) => boolean; /** Return span names in creation order, optionally filtered by kind. */
3701
+ listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
3702
+ listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
3665
3703
  flattenDfs: () => EvalTraceSpan$1[];
3666
3704
  checkpoints: Map<string, unknown>;
3667
3705
  };
@@ -3681,6 +3719,11 @@ type EvalDeriveFn$1<TInput = unknown> = (ctx: EvalDeriveContext$1<TInput>) => Re
3681
3719
  /** Trace-derived output config accepted globally and on eval definitions. */
3682
3720
  type EvalDeriveConfig$1<TInput = unknown> = EvalDeriveMap$1<TInput> | EvalDeriveFn$1<TInput>;
3683
3721
  /** Schema for keyed or object-returning trace-derived output config. */
3722
+ /** Function that records trace-derived assertions for one case. */
3723
+ type EvalTracingAssertionsFn$1<TInput = unknown> = (ctx: EvalDeriveContext$1<TInput>) => MaybePromise<void>;
3724
+ /** Trace-derived assertion config accepted globally and on eval definitions. */
3725
+ type EvalTracingAssertionsConfig$1<TInput = unknown> = EvalTracingAssertionsFn$1<TInput>;
3726
+ /** Schema for trace-derived assertion config. */
3684
3727
  /** UI overrides for a derived or scored column emitted by an eval. */
3685
3728
  type EvalColumnOverride$1 = {
3686
3729
  /** Display label shown for the column in tables and detail views. */label?: string;
@@ -4133,9 +4176,19 @@ type AgentEvalsConfig$1 = {
4133
4176
  * Prefer the keyed map form for shared metrics:
4134
4177
  * `{ toolCalls: ({ trace }) => trace.findSpansByKind('tool').length }`.
4135
4178
  * The object-returning function form is also supported. Derived outputs
4136
- * only fill keys that were not already recorded by eval execution.
4179
+ * only fill keys that were not already recorded by eval execution. Do not
4180
+ * call assertion helpers here; use `tracingAssertions` for trace-derived
4181
+ * pass/fail checks.
4137
4182
  */
4138
4183
  deriveFromTracing?: EvalDeriveConfig$1;
4184
+ /**
4185
+ * Workspace-wide assertions derived from the finished execution trace.
4186
+ *
4187
+ * These run after `deriveFromTracing` and before output schema validation and
4188
+ * scores. Use `evalAssert(...)` or `evalExpect(...)` inside the callback to
4189
+ * record normal assertion results without creating fake score columns.
4190
+ */
4191
+ tracingAssertions?: EvalTracingAssertionsConfig$1;
4139
4192
  /**
4140
4193
  * Workspace-wide stats prepended to every eval's stats row.
4141
4194
  *
@@ -4466,9 +4519,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
4466
4519
  * - `refresh`: never read, always write (forces re-execution and overwrites).
4467
4520
  */
4468
4521
  declare const cacheModeSchema: z$1.ZodEnum<{
4469
- refresh: "refresh";
4470
- bypass: "bypass";
4471
4522
  use: "use";
4523
+ bypass: "bypass";
4524
+ refresh: "refresh";
4472
4525
  }>;
4473
4526
  /** Mode controlling how cached spans behave during a run. */
4474
4527
  type CacheMode = z$1.infer<typeof cacheModeSchema>;
@@ -4482,17 +4535,17 @@ declare const spanCacheOptionsSchema: z$1.ZodObject<{
4482
4535
  type SpanCacheOptions = z$1.infer<typeof spanCacheOptionsSchema>;
4483
4536
  /** Category of operation stored in the eval cache. */
4484
4537
  declare const cacheOperationTypeSchema: z$1.ZodEnum<{
4485
- value: "value";
4486
4538
  span: "span";
4539
+ value: "value";
4487
4540
  }>;
4488
4541
  /** Category of operation stored in the eval cache. */
4489
4542
  type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
4490
4543
  /** Status of a cache lookup recorded on a span or case scope. */
4491
4544
  declare const cacheStatusSchema: z$1.ZodEnum<{
4545
+ bypass: "bypass";
4546
+ refresh: "refresh";
4492
4547
  hit: "hit";
4493
4548
  miss: "miss";
4494
- refresh: "refresh";
4495
- bypass: "bypass";
4496
4549
  }>;
4497
4550
  /** Status of a cache lookup recorded on a span or case scope. */
4498
4551
  type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
@@ -4509,10 +4562,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
4509
4562
  namespace: z$1.ZodString;
4510
4563
  key: z$1.ZodString;
4511
4564
  status: z$1.ZodEnum<{
4565
+ bypass: "bypass";
4566
+ refresh: "refresh";
4512
4567
  hit: "hit";
4513
4568
  miss: "miss";
4514
- refresh: "refresh";
4515
- bypass: "bypass";
4516
4569
  }>;
4517
4570
  read: z$1.ZodOptional<z$1.ZodBoolean>;
4518
4571
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -4620,8 +4673,8 @@ declare const cacheRecordingSchema: z$1.ZodObject<{
4620
4673
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4621
4674
  error: "error";
4622
4675
  running: "running";
4623
- cancelled: "cancelled";
4624
4676
  ok: "ok";
4677
+ cancelled: "cancelled";
4625
4678
  }>>;
4626
4679
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4627
4680
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4708,8 +4761,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
4708
4761
  key: z$1.ZodString;
4709
4762
  namespace: z$1.ZodString;
4710
4763
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4711
- value: "value";
4712
4764
  span: "span";
4765
+ value: "value";
4713
4766
  }>>;
4714
4767
  operationName: z$1.ZodOptional<z$1.ZodString>;
4715
4768
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4721,8 +4774,8 @@ declare const cacheEntrySchema: z$1.ZodObject<{
4721
4774
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4722
4775
  error: "error";
4723
4776
  running: "running";
4724
- cancelled: "cancelled";
4725
4777
  ok: "ok";
4778
+ cancelled: "cancelled";
4726
4779
  }>>;
4727
4780
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4728
4781
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4815,8 +4868,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4815
4868
  key: z$1.ZodString;
4816
4869
  namespace: z$1.ZodString;
4817
4870
  operationType: z$1.ZodEnum<{
4818
- value: "value";
4819
4871
  span: "span";
4872
+ value: "value";
4820
4873
  }>;
4821
4874
  operationName: z$1.ZodString;
4822
4875
  storedAt: z$1.ZodString;
@@ -4826,8 +4879,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4826
4879
  key: z$1.ZodString;
4827
4880
  namespace: z$1.ZodString;
4828
4881
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4829
- value: "value";
4830
4882
  span: "span";
4883
+ value: "value";
4831
4884
  }>>;
4832
4885
  operationName: z$1.ZodOptional<z$1.ZodString>;
4833
4886
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4839,8 +4892,8 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
4839
4892
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4840
4893
  error: "error";
4841
4894
  running: "running";
4842
- cancelled: "cancelled";
4843
4895
  ok: "ok";
4896
+ cancelled: "cancelled";
4844
4897
  }>>;
4845
4898
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4846
4899
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -4933,8 +4986,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4933
4986
  key: z$1.ZodString;
4934
4987
  namespace: z$1.ZodString;
4935
4988
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
4936
- value: "value";
4937
4989
  span: "span";
4990
+ value: "value";
4938
4991
  }>>;
4939
4992
  operationName: z$1.ZodOptional<z$1.ZodString>;
4940
4993
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -4946,8 +4999,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
4946
4999
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
4947
5000
  error: "error";
4948
5001
  running: "running";
4949
- cancelled: "cancelled";
4950
5002
  ok: "ok";
5003
+ cancelled: "cancelled";
4951
5004
  }>>;
4952
5005
  finalError: z$1.ZodOptional<z$1.ZodObject<{
4953
5006
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -5031,8 +5084,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
5031
5084
  key: z$1.ZodString;
5032
5085
  namespace: z$1.ZodString;
5033
5086
  operationType: z$1.ZodEnum<{
5034
- value: "value";
5035
5087
  span: "span";
5088
+ value: "value";
5036
5089
  }>;
5037
5090
  operationName: z$1.ZodString;
5038
5091
  storedAt: z$1.ZodString;
@@ -5042,8 +5095,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
5042
5095
  key: z$1.ZodString;
5043
5096
  namespace: z$1.ZodString;
5044
5097
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
5045
- value: "value";
5046
5098
  span: "span";
5099
+ value: "value";
5047
5100
  }>>;
5048
5101
  operationName: z$1.ZodOptional<z$1.ZodString>;
5049
5102
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -5055,8 +5108,8 @@ declare const cacheEntryWithDebugKeySchema$1: z$1.ZodObject<{
5055
5108
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
5056
5109
  error: "error";
5057
5110
  running: "running";
5058
- cancelled: "cancelled";
5059
5111
  ok: "ok";
5112
+ cancelled: "cancelled";
5060
5113
  }>>;
5061
5114
  finalError: z$1.ZodOptional<z$1.ZodObject<{
5062
5115
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -5149,8 +5202,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
5149
5202
  key: z$1.ZodString;
5150
5203
  namespace: z$1.ZodString;
5151
5204
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
5152
- value: "value";
5153
5205
  span: "span";
5206
+ value: "value";
5154
5207
  }>>;
5155
5208
  operationName: z$1.ZodOptional<z$1.ZodString>;
5156
5209
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -5162,8 +5215,8 @@ declare const cacheFileSchema: z$1.ZodObject<{
5162
5215
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
5163
5216
  error: "error";
5164
5217
  running: "running";
5165
- cancelled: "cancelled";
5166
5218
  ok: "ok";
5219
+ cancelled: "cancelled";
5167
5220
  }>>;
5168
5221
  finalError: z$1.ZodOptional<z$1.ZodObject<{
5169
5222
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -5255,8 +5308,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
5255
5308
  key: z$1.ZodString;
5256
5309
  namespace: z$1.ZodString;
5257
5310
  operationType: z$1.ZodEnum<{
5258
- value: "value";
5259
5311
  span: "span";
5312
+ value: "value";
5260
5313
  }>;
5261
5314
  operationName: z$1.ZodString;
5262
5315
  storedAt: z$1.ZodString;
@@ -5266,8 +5319,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
5266
5319
  key: z$1.ZodString;
5267
5320
  namespace: z$1.ZodString;
5268
5321
  operationType: z$1.ZodOptional<z$1.ZodEnum<{
5269
- value: "value";
5270
5322
  span: "span";
5323
+ value: "value";
5271
5324
  }>>;
5272
5325
  operationName: z$1.ZodOptional<z$1.ZodString>;
5273
5326
  spanName: z$1.ZodOptional<z$1.ZodString>;
@@ -5279,8 +5332,8 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
5279
5332
  finalStatus: z$1.ZodOptional<z$1.ZodEnum<{
5280
5333
  error: "error";
5281
5334
  running: "running";
5282
- cancelled: "cancelled";
5283
5335
  ok: "ok";
5336
+ cancelled: "cancelled";
5284
5337
  }>>;
5285
5338
  finalError: z$1.ZodOptional<z$1.ZodObject<{
5286
5339
  name: z$1.ZodOptional<z$1.ZodString>;
@@ -5441,8 +5494,8 @@ type SseEnvelope = z$1.infer<typeof sseEnvelopeSchema$1>; //#endregion
5441
5494
  //#region src/schemas/api.d.ts
5442
5495
  /** Lifecycle state for an app config reload triggered by `agent-evals.config.ts`. */
5443
5496
  declare const configReloadStatusSchema: z$1.ZodEnum<{
5444
- pending: "pending";
5445
5497
  idle: "idle";
5498
+ pending: "pending";
5446
5499
  reloading: "reloading";
5447
5500
  }>;
5448
5501
  /** Status for config reloads in the long-running app server. */
@@ -5450,8 +5503,8 @@ type ConfigReloadStatus = z$1.infer<typeof configReloadStatusSchema>;
5450
5503
  /** UI/API-visible state for config reloads in `agent-evals app`. */
5451
5504
  declare const configReloadStateSchema$1: z$1.ZodObject<{
5452
5505
  status: z$1.ZodEnum<{
5453
- pending: "pending";
5454
5506
  idle: "idle";
5507
+ pending: "pending";
5455
5508
  reloading: "reloading";
5456
5509
  }>;
5457
5510
  activeRunCount: z$1.ZodNumber;
@@ -5464,9 +5517,9 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema$1>;
5464
5517
  declare const createRunRequestSchema$1: z$1.ZodObject<{
5465
5518
  target: z$1.ZodObject<{
5466
5519
  mode: z$1.ZodEnum<{
5467
- caseIds: "caseIds";
5468
5520
  all: "all";
5469
5521
  evalIds: "evalIds";
5522
+ caseIds: "caseIds";
5470
5523
  }>;
5471
5524
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
5472
5525
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -5478,9 +5531,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
5478
5531
  temporary: z$1.ZodOptional<z$1.ZodBoolean>;
5479
5532
  cache: z$1.ZodOptional<z$1.ZodObject<{
5480
5533
  mode: z$1.ZodDefault<z$1.ZodEnum<{
5481
- refresh: "refresh";
5482
- bypass: "bypass";
5483
5534
  use: "use";
5535
+ bypass: "bypass";
5536
+ refresh: "refresh";
5484
5537
  }>>;
5485
5538
  }, z$1.core.$strip>>;
5486
5539
  manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
@@ -6366,6 +6419,7 @@ declare const caseDetailSchema: z$1.ZodObject<{
6366
6419
  phase: z$1.ZodEnum<{
6367
6420
  eval: "eval";
6368
6421
  derive: "derive";
6422
+ tracingAssertions: "tracingAssertions";
6369
6423
  outputsSchema: "outputsSchema";
6370
6424
  scorer: "scorer";
6371
6425
  }>;
@@ -6992,7 +7046,8 @@ type EvalRunner = {
6992
7046
  getEvals(): EvalSummary$1[]; /** Look up one discovered eval by id. */
6993
7047
  getEval(id: string): EvalSummary$1 | undefined; /** Return discovery errors that should be shown before running evals. */
6994
7048
  getDiscoveryIssues(): DiscoveryIssue$1[]; /** Return current config-reload state for the long-running app server. */
6995
- getConfigReloadState(): ConfigReloadState$1; /** Re-scan configured eval files and emit a discovery update to listeners. */
7049
+ getConfigReloadState(): ConfigReloadState$1; /** Return the effective per-run case concurrency after applying defaults. */
7050
+ getConfiguredConcurrency(): number; /** Re-scan configured eval files and emit a discovery update to listeners. */
6996
7051
  refreshDiscovery(): Promise<void>;
6997
7052
  startRun(request: CreateRunRequest$1): Promise<{
6998
7053
  manifest: RunManifest$1;
@@ -7227,4 +7282,4 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
7227
7282
  /** Return whether the active eval case has tags matching the typed input. */
7228
7283
  declare function matchesEvalTags(input: EvalTagMatchInput): boolean;
7229
7284
  //#endregion
7230
- export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
7285
+ export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, EvalRuntimeUsageError, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type EvalTracingAssertionsConfig, type EvalTracingAssertionsFn, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };