@ls-stack/agent-eval 0.58.0 → 0.58.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -268,6 +268,7 @@ declare const runLogEntrySchema$1: z$1.ZodObject<{
268
268
  phase: z$1.ZodEnum<{
269
269
  eval: "eval";
270
270
  derive: "derive";
271
+ tracingAssertions: "tracingAssertions";
271
272
  outputsSchema: "outputsSchema";
272
273
  scorer: "scorer";
273
274
  }>;
@@ -399,10 +400,17 @@ type EvalCase$1$1<TInput = unknown> = {
399
400
  };
400
401
  /** Query helpers built from the flattened trace recorded for one eval case. */
401
402
  type EvalTraceTree = {
402
- spans: EvalTraceSpan$2[];
403
- rootSpans: EvalTraceSpan$2[];
404
- findSpan: (name: string) => EvalTraceSpan$2 | undefined;
405
- findSpansByKind: (kind: string) => EvalTraceSpan$2[];
403
+ /** Flat span list in creation order. */spans: EvalTraceSpan$2[]; /** Top-level spans whose `parentId` is `null`. */
404
+ rootSpans: EvalTraceSpan$2[]; /** Return the first span whose name exactly matches `name`. */
405
+ findSpan: (name: string) => EvalTraceSpan$2 | undefined; /** Return every span whose name exactly matches `name`. */
406
+ findSpans: (name: string) => EvalTraceSpan$2[]; /** Return whether any span name exactly matches `name`. */
407
+ hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
408
+ findSpansByKind: (kind: string) => EvalTraceSpan$2[]; /** Return every span with `kind: 'tool'`. */
409
+ findToolCallSpans: () => EvalTraceSpan$2[]; /** Return the names of every span with `kind: 'tool'`. */
410
+ listToolCallSpanNames: () => string[]; /** Return whether a `kind: 'tool'` span has a name exactly matching `name`. */
411
+ hasToolCallSpan: (name: string) => boolean; /** Return span names in creation order, optionally filtered by kind. */
412
+ listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
413
+ listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
406
414
  flattenDfs: () => EvalTraceSpan$2[];
407
415
  checkpoints: Map<string, unknown>;
408
416
  };
@@ -421,6 +429,12 @@ type EvalDeriveMap<TInput = unknown> = Record<string, EvalDeriveValueFn<TInput>>
421
429
  type EvalDeriveFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => Record<string, unknown> | Promise<Record<string, unknown>>;
422
430
  /** Trace-derived output config accepted globally and on eval definitions. */
423
431
  type EvalDeriveConfig<TInput = unknown> = EvalDeriveMap<TInput> | EvalDeriveFn<TInput>;
432
+ /** Function that records trace-derived assertions for one case. */
433
+ type EvalTracingAssertionsFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => MaybePromise$1<void>;
434
+ /** Keyed trace-derived assertion config for grouping related checks. */
435
+ type EvalTracingAssertionsMap<TInput = unknown> = Record<string, EvalTracingAssertionsFn<TInput>>;
436
+ /** Trace-derived assertion config accepted globally and on eval definitions. */
437
+ type EvalTracingAssertionsConfig<TInput = unknown> = EvalTracingAssertionsMap<TInput> | EvalTracingAssertionsFn<TInput>;
424
438
  /** UI overrides for a derived or scored column emitted by an eval. */
425
439
  type EvalColumnOverride = {
426
440
  /** Display label shown for the column in tables and detail views. */label?: string;
@@ -1142,9 +1156,18 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
1142
1156
  *
1143
1157
  * Prefer the keyed map form when each key has one derivation. The
1144
1158
  * object-returning callback form is also supported. Derived values only fill
1145
- * keys not already recorded during execution.
1159
+ * keys not already recorded during execution. Assertion helpers are not
1160
+ * allowed here; use `tracingAssertions` for trace-derived pass/fail checks.
1146
1161
  */
1147
1162
  deriveFromTracing?: EvalDeriveConfig<TInput>;
1163
+ /**
1164
+ * Record assertions from the finished execution trace.
1165
+ *
1166
+ * Runs after `deriveFromTracing` and before output schema validation and
1167
+ * scores. Use `evalAssert(...)` or `evalExpect(...)` inside the callback to
1168
+ * write normal assertion results without creating score columns.
1169
+ */
1170
+ tracingAssertions?: EvalTracingAssertionsConfig<TInput>;
1148
1171
  /**
1149
1172
  * Computed score columns for each case.
1150
1173
  *
@@ -1455,7 +1478,9 @@ type CacheScopeContext = {
1455
1478
  /** Active recording frame captured while a cached operation body executes. */
1456
1479
  type CacheRecordingFrame = {
1457
1480
  /** Length of `scope.spans` immediately before the cached body started. */baseSpanIndex: number; /** Parent id used when recording and replaying direct child spans. */
1458
- replayParentSpanId: string | null; /** Ordered observable effects recorded during the cached body. */
1481
+ replayParentSpanId: string | null; /** Spans created by this cache body's async execution branch. */
1482
+ spanIds: Set<string>; /** Non-cache attributes written to the replay parent by this async branch. */
1483
+ finalAttributes: Record<string, unknown>; /** Ordered observable effects recorded during the cached body. */
1459
1484
  ops: CacheRecordingOp$1[];
1460
1485
  };
1461
1486
  /** Mutable per-case runtime state stored in async local storage. */
@@ -1480,11 +1505,6 @@ type EvalCaseScope = {
1480
1505
  logs: RunLogEntry$1[];
1481
1506
  spans: EvalTraceSpan$2[];
1482
1507
  checkpoints: Map<string, unknown>;
1483
- /**
1484
- * Stack of active cache recorders. Ops are written to the top-most frame
1485
- * when it exists and `replayingDepth === 0`.
1486
- */
1487
- recordingStack: CacheRecordingFrame[];
1488
1508
  /**
1489
1509
  * Incremented while replaying a cached operation, so nested SDK calls do not
1490
1510
  * accidentally double-record ops into outer recorders.
@@ -1506,12 +1526,16 @@ type EvalCaseScope = {
1506
1526
  * covers run-time module/environment loading, including top-level code in
1507
1527
  * modules imported while a run is being prepared.
1508
1528
  */
1509
- type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'outputsSchema' | 'scorer';
1529
+ type EvalRuntimeScope = 'env' | 'cases' | 'eval' | 'derive' | 'tracingAssertions' | 'outputsSchema' | 'scorer';
1510
1530
  type EvalLogLevelInput = RunLogLevel$1 | 'warning';
1511
1531
  /** Error thrown when an eval assertion fails during case execution. */
1512
1532
  declare class EvalAssertionError extends Error {
1513
1533
  constructor(message: string);
1514
1534
  }
1535
+ /** Error thrown when an SDK helper is used in an unsupported runner phase. */
1536
+ declare class EvalRuntimeUsageError extends Error {
1537
+ constructor(message: string);
1538
+ }
1515
1539
  /** Return the host process clock, bypassing the eval Date shim. */
1516
1540
  /**
1517
1541
  * Eval time helpers for reading and moving the active eval clock.
@@ -1542,8 +1566,10 @@ declare function getCurrentScope(): EvalCaseScope | undefined;
1542
1566
  *
1543
1567
  * Returns `null` outside eval-owned work, `env` while the runner is loading
1544
1568
  * eval modules for a run, `cases` while generating cases, `eval` while running
1545
- * case `execute`, `derive` while deriving outputs from traces, `outputsSchema`
1546
- * while validating outputs, and `scorer` while computing scores.
1569
+ * case `execute`, `derive` while deriving outputs from traces,
1570
+ * `tracingAssertions` while checking trace-derived assertions,
1571
+ * `outputsSchema` while validating outputs, and `scorer` while computing
1572
+ * scores.
1547
1573
  */
1548
1574
  declare function isInEvalScope(): EvalRuntimeScope | null;
1549
1575
  /**
@@ -1659,7 +1685,8 @@ declare function incrementEvalOutput(key: string, delta: number): void;
1659
1685
  * Calls made outside `runInEvalScope(...)` are ignored so shared workflow code
1660
1686
  * can safely reuse `evalAssert(...)` when it also runs outside an eval. The
1661
1687
  * TypeScript assertion signature still narrows the checked value after the
1662
- * call.
1688
+ * call. Calls inside `deriveFromTracing` throw because derivations must only
1689
+ * write outputs; use `tracingAssertions` for trace-derived pass/fail checks.
1663
1690
  */
1664
1691
  declare function evalAssert(condition: unknown, message: string): asserts condition; //#endregion
1665
1692
  //#region src/valueCache.d.ts
@@ -2017,8 +2044,8 @@ declare const traceAttributeDisplaySchema: z$1.ZodObject<{
2017
2044
  subtree: "subtree";
2018
2045
  }>>;
2019
2046
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2020
- all: "all";
2021
2047
  sum: "sum";
2048
+ all: "all";
2022
2049
  last: "last";
2023
2050
  }>>;
2024
2051
  }, z$1.core.$strip>;
@@ -2053,8 +2080,8 @@ declare const traceDisplayConfigSchema: z$1.ZodObject<{
2053
2080
  subtree: "subtree";
2054
2081
  }>>;
2055
2082
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2056
- all: "all";
2057
2083
  sum: "sum";
2084
+ all: "all";
2058
2085
  last: "last";
2059
2086
  }>>;
2060
2087
  }, z$1.core.$strip>>>;
@@ -2093,8 +2120,8 @@ declare const traceAttributeDisplayInputSchema: z$1.ZodObject<{
2093
2120
  subtree: "subtree";
2094
2121
  }>>;
2095
2122
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2096
- all: "all";
2097
2123
  sum: "sum";
2124
+ all: "all";
2098
2125
  last: "last";
2099
2126
  }>>;
2100
2127
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
@@ -2131,8 +2158,8 @@ declare const traceDisplayInputConfigSchema: z$1.ZodObject<{
2131
2158
  subtree: "subtree";
2132
2159
  }>>;
2133
2160
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2134
- all: "all";
2135
2161
  sum: "sum";
2162
+ all: "all";
2136
2163
  last: "last";
2137
2164
  }>>;
2138
2165
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
@@ -2217,9 +2244,9 @@ type EvalFreshnessStatus = z$1.infer<typeof evalFreshnessStatusSchema>;
2217
2244
  */
2218
2245
  declare const evalStatAggregateSchema: z$1.ZodEnum<{
2219
2246
  avg: "avg";
2220
- sum: "sum";
2221
2247
  min: "min";
2222
2248
  max: "max";
2249
+ sum: "sum";
2223
2250
  best: "best";
2224
2251
  worst: "worst";
2225
2252
  }>;
@@ -2249,9 +2276,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2249
2276
  kind: z$1.ZodLiteral<"duration">;
2250
2277
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2251
2278
  avg: "avg";
2252
- sum: "sum";
2253
2279
  min: "min";
2254
2280
  max: "max";
2281
+ sum: "sum";
2255
2282
  best: "best";
2256
2283
  worst: "worst";
2257
2284
  }>>;
@@ -2260,9 +2287,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2260
2287
  kind: z$1.ZodLiteral<"cacheHits">;
2261
2288
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2262
2289
  avg: "avg";
2263
- sum: "sum";
2264
2290
  min: "min";
2265
2291
  max: "max";
2292
+ sum: "sum";
2266
2293
  best: "best";
2267
2294
  worst: "worst";
2268
2295
  }>>;
@@ -2273,9 +2300,9 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2273
2300
  label: z$1.ZodOptional<z$1.ZodString>;
2274
2301
  aggregate: z$1.ZodEnum<{
2275
2302
  avg: "avg";
2276
- sum: "sum";
2277
2303
  min: "min";
2278
2304
  max: "max";
2305
+ sum: "sum";
2279
2306
  best: "best";
2280
2307
  worst: "worst";
2281
2308
  }>;
@@ -2313,9 +2340,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2313
2340
  kind: z$1.ZodLiteral<"duration">;
2314
2341
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2315
2342
  avg: "avg";
2316
- sum: "sum";
2317
2343
  min: "min";
2318
2344
  max: "max";
2345
+ sum: "sum";
2319
2346
  best: "best";
2320
2347
  worst: "worst";
2321
2348
  }>>;
@@ -2324,9 +2351,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2324
2351
  kind: z$1.ZodLiteral<"cacheHits">;
2325
2352
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2326
2353
  avg: "avg";
2327
- sum: "sum";
2328
2354
  min: "min";
2329
2355
  max: "max";
2356
+ sum: "sum";
2330
2357
  best: "best";
2331
2358
  worst: "worst";
2332
2359
  }>>;
@@ -2337,9 +2364,9 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
2337
2364
  label: z$1.ZodOptional<z$1.ZodString>;
2338
2365
  aggregate: z$1.ZodEnum<{
2339
2366
  avg: "avg";
2340
- sum: "sum";
2341
2367
  min: "min";
2342
2368
  max: "max";
2369
+ sum: "sum";
2343
2370
  best: "best";
2344
2371
  worst: "worst";
2345
2372
  }>;
@@ -2422,10 +2449,10 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2422
2449
  caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2423
2450
  lastRunStatus: z$1.ZodNullable<z$1.ZodEnum<{
2424
2451
  error: "error";
2425
- running: "running";
2426
- cancelled: "cancelled";
2427
2452
  pass: "pass";
2428
2453
  fail: "fail";
2454
+ running: "running";
2455
+ cancelled: "cancelled";
2429
2456
  unscored: "unscored";
2430
2457
  }>>;
2431
2458
  stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
@@ -2440,9 +2467,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2440
2467
  kind: z$1.ZodLiteral<"duration">;
2441
2468
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2442
2469
  avg: "avg";
2443
- sum: "sum";
2444
2470
  min: "min";
2445
2471
  max: "max";
2472
+ sum: "sum";
2446
2473
  best: "best";
2447
2474
  worst: "worst";
2448
2475
  }>>;
@@ -2451,9 +2478,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2451
2478
  kind: z$1.ZodLiteral<"cacheHits">;
2452
2479
  aggregate: z$1.ZodOptional<z$1.ZodEnum<{
2453
2480
  avg: "avg";
2454
- sum: "sum";
2455
2481
  min: "min";
2456
2482
  max: "max";
2483
+ sum: "sum";
2457
2484
  best: "best";
2458
2485
  worst: "worst";
2459
2486
  }>>;
@@ -2464,9 +2491,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2464
2491
  label: z$1.ZodOptional<z$1.ZodString>;
2465
2492
  aggregate: z$1.ZodEnum<{
2466
2493
  avg: "avg";
2467
- sum: "sum";
2468
2494
  min: "min";
2469
2495
  max: "max";
2496
+ sum: "sum";
2470
2497
  best: "best";
2471
2498
  worst: "worst";
2472
2499
  }>;
@@ -2491,9 +2518,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2491
2518
  }, z$1.core.$strip>], "kind">>>;
2492
2519
  defaultStatAggregate: z$1.ZodOptional<z$1.ZodEnum<{
2493
2520
  avg: "avg";
2494
- sum: "sum";
2495
2521
  min: "min";
2496
2522
  max: "max";
2523
+ sum: "sum";
2497
2524
  best: "best";
2498
2525
  worst: "worst";
2499
2526
  }>>;
@@ -2530,9 +2557,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2530
2557
  key: z$1.ZodString;
2531
2558
  aggregate: z$1.ZodEnum<{
2532
2559
  avg: "avg";
2533
- sum: "sum";
2534
2560
  min: "min";
2535
2561
  max: "max";
2562
+ sum: "sum";
2536
2563
  latest: "latest";
2537
2564
  passThresholdRate: "passThresholdRate";
2538
2565
  }>;
@@ -2572,9 +2599,9 @@ declare const evalSummarySchema$1: z$1.ZodObject<{
2572
2599
  key: z$1.ZodString;
2573
2600
  aggregate: z$1.ZodEnum<{
2574
2601
  avg: "avg";
2575
- sum: "sum";
2576
2602
  min: "min";
2577
2603
  max: "max";
2604
+ sum: "sum";
2578
2605
  latest: "latest";
2579
2606
  passThresholdRate: "passThresholdRate";
2580
2607
  }>;
@@ -2671,11 +2698,11 @@ declare const caseRowSchema$1: z$1.ZodObject<{
2671
2698
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2672
2699
  status: z$1.ZodEnum<{
2673
2700
  error: "error";
2674
- pending: "pending";
2675
- running: "running";
2676
- cancelled: "cancelled";
2677
2701
  pass: "pass";
2678
2702
  fail: "fail";
2703
+ running: "running";
2704
+ cancelled: "cancelled";
2705
+ pending: "pending";
2679
2706
  }>;
2680
2707
  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
2681
2708
  cacheHits: z$1.ZodOptional<z$1.ZodNumber>;
@@ -2756,6 +2783,7 @@ type RunLogLevel = z$1.infer<typeof runLogLevelSchema>;
2756
2783
  declare const runLogPhaseSchema: z$1.ZodEnum<{
2757
2784
  eval: "eval";
2758
2785
  derive: "derive";
2786
+ tracingAssertions: "tracingAssertions";
2759
2787
  outputsSchema: "outputsSchema";
2760
2788
  scorer: "scorer";
2761
2789
  }>;
@@ -2782,6 +2810,7 @@ declare const runLogEntrySchema: z$1.ZodObject<{
2782
2810
  phase: z$1.ZodEnum<{
2783
2811
  eval: "eval";
2784
2812
  derive: "derive";
2813
+ tracingAssertions: "tracingAssertions";
2785
2814
  outputsSchema: "outputsSchema";
2786
2815
  scorer: "scorer";
2787
2816
  }>;
@@ -2862,8 +2891,8 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2862
2891
  subtree: "subtree";
2863
2892
  }>>;
2864
2893
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2865
- all: "all";
2866
2894
  sum: "sum";
2895
+ all: "all";
2867
2896
  last: "last";
2868
2897
  }>>;
2869
2898
  }, z$1.core.$strip>>>;
@@ -2874,10 +2903,10 @@ declare const scoreTraceSchema: z$1.ZodObject<{
2874
2903
  namespace: z$1.ZodString;
2875
2904
  key: z$1.ZodString;
2876
2905
  status: z$1.ZodEnum<{
2877
- bypass: "bypass";
2878
- refresh: "refresh";
2879
2906
  hit: "hit";
2880
2907
  miss: "miss";
2908
+ refresh: "refresh";
2909
+ bypass: "bypass";
2881
2910
  }>;
2882
2911
  read: z$1.ZodOptional<z$1.ZodBoolean>;
2883
2912
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -2896,11 +2925,11 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2896
2925
  tags: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2897
2926
  status: z$1.ZodEnum<{
2898
2927
  error: "error";
2899
- pending: "pending";
2900
- running: "running";
2901
- cancelled: "cancelled";
2902
2928
  pass: "pass";
2903
2929
  fail: "fail";
2930
+ running: "running";
2931
+ cancelled: "cancelled";
2932
+ pending: "pending";
2904
2933
  }>;
2905
2934
  input: z$1.ZodUnknown;
2906
2935
  trace: z$1.ZodArray<z$1.ZodObject<{
@@ -2965,8 +2994,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
2965
2994
  subtree: "subtree";
2966
2995
  }>>;
2967
2996
  mode: z$1.ZodOptional<z$1.ZodEnum<{
2968
- all: "all";
2969
2997
  sum: "sum";
2998
+ all: "all";
2970
2999
  last: "last";
2971
3000
  }>>;
2972
3001
  }, z$1.core.$strip>>>;
@@ -3034,8 +3063,8 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3034
3063
  subtree: "subtree";
3035
3064
  }>>;
3036
3065
  mode: z$1.ZodOptional<z$1.ZodEnum<{
3037
- all: "all";
3038
3066
  sum: "sum";
3067
+ all: "all";
3039
3068
  last: "last";
3040
3069
  }>>;
3041
3070
  }, z$1.core.$strip>>>;
@@ -3046,10 +3075,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3046
3075
  namespace: z$1.ZodString;
3047
3076
  key: z$1.ZodString;
3048
3077
  status: z$1.ZodEnum<{
3049
- bypass: "bypass";
3050
- refresh: "refresh";
3051
3078
  hit: "hit";
3052
3079
  miss: "miss";
3080
+ refresh: "refresh";
3081
+ bypass: "bypass";
3053
3082
  }>;
3054
3083
  read: z$1.ZodOptional<z$1.ZodBoolean>;
3055
3084
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -3140,6 +3169,7 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3140
3169
  phase: z$1.ZodEnum<{
3141
3170
  eval: "eval";
3142
3171
  derive: "derive";
3172
+ tracingAssertions: "tracingAssertions";
3143
3173
  outputsSchema: "outputsSchema";
3144
3174
  scorer: "scorer";
3145
3175
  }>;
@@ -3166,10 +3196,10 @@ declare const caseDetailSchema$1: z$1.ZodObject<{
3166
3196
  namespace: z$1.ZodString;
3167
3197
  key: z$1.ZodString;
3168
3198
  status: z$1.ZodEnum<{
3169
- bypass: "bypass";
3170
- refresh: "refresh";
3171
3199
  hit: "hit";
3172
3200
  miss: "miss";
3201
+ refresh: "refresh";
3202
+ bypass: "bypass";
3173
3203
  }>;
3174
3204
  read: z$1.ZodOptional<z$1.ZodBoolean>;
3175
3205
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -3223,9 +3253,9 @@ type EvalChartBuiltinMetric = z$1.infer<typeof evalChartBuiltinMetricSchema>;
3223
3253
  /** Reducer applied to a numeric column across all cases of a single run. */
3224
3254
  declare const evalChartAggregateSchema: z$1.ZodEnum<{
3225
3255
  avg: "avg";
3226
- sum: "sum";
3227
3256
  min: "min";
3228
3257
  max: "max";
3258
+ sum: "sum";
3229
3259
  latest: "latest";
3230
3260
  passThresholdRate: "passThresholdRate";
3231
3261
  }>;
@@ -3281,9 +3311,9 @@ declare const evalChartMetricSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
3281
3311
  key: z$1.ZodString;
3282
3312
  aggregate: z$1.ZodEnum<{
3283
3313
  avg: "avg";
3284
- sum: "sum";
3285
3314
  min: "min";
3286
3315
  max: "max";
3316
+ sum: "sum";
3287
3317
  latest: "latest";
3288
3318
  passThresholdRate: "passThresholdRate";
3289
3319
  }>;
@@ -3316,9 +3346,9 @@ declare const evalChartTooltipExtraSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObj
3316
3346
  key: z$1.ZodString;
3317
3347
  aggregate: z$1.ZodEnum<{
3318
3348
  avg: "avg";
3319
- sum: "sum";
3320
3349
  min: "min";
3321
3350
  max: "max";
3351
+ sum: "sum";
3322
3352
  latest: "latest";
3323
3353
  passThresholdRate: "passThresholdRate";
3324
3354
  }>;
@@ -3364,9 +3394,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3364
3394
  key: z$1.ZodString;
3365
3395
  aggregate: z$1.ZodEnum<{
3366
3396
  avg: "avg";
3367
- sum: "sum";
3368
3397
  min: "min";
3369
3398
  max: "max";
3399
+ sum: "sum";
3370
3400
  latest: "latest";
3371
3401
  passThresholdRate: "passThresholdRate";
3372
3402
  }>;
@@ -3406,9 +3436,9 @@ declare const evalChartConfigSchema: z$1.ZodObject<{
3406
3436
  key: z$1.ZodString;
3407
3437
  aggregate: z$1.ZodEnum<{
3408
3438
  avg: "avg";
3409
- sum: "sum";
3410
3439
  min: "min";
3411
3440
  max: "max";
3441
+ sum: "sum";
3412
3442
  latest: "latest";
3413
3443
  passThresholdRate: "passThresholdRate";
3414
3444
  }>;
@@ -3454,9 +3484,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3454
3484
  key: z$1.ZodString;
3455
3485
  aggregate: z$1.ZodEnum<{
3456
3486
  avg: "avg";
3457
- sum: "sum";
3458
3487
  min: "min";
3459
3488
  max: "max";
3489
+ sum: "sum";
3460
3490
  latest: "latest";
3461
3491
  passThresholdRate: "passThresholdRate";
3462
3492
  }>;
@@ -3496,9 +3526,9 @@ declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
3496
3526
  key: z$1.ZodString;
3497
3527
  aggregate: z$1.ZodEnum<{
3498
3528
  avg: "avg";
3499
- sum: "sum";
3500
3529
  min: "min";
3501
3530
  max: "max";
3531
+ sum: "sum";
3502
3532
  latest: "latest";
3503
3533
  passThresholdRate: "passThresholdRate";
3504
3534
  }>;
@@ -3514,10 +3544,10 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3514
3544
  shortId: z$1.ZodString;
3515
3545
  status: z$1.ZodEnum<{
3516
3546
  error: "error";
3517
- pending: "pending";
3518
3547
  running: "running";
3519
- completed: "completed";
3520
3548
  cancelled: "cancelled";
3549
+ pending: "pending";
3550
+ completed: "completed";
3521
3551
  }>;
3522
3552
  temporary: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodBoolean>>;
3523
3553
  startedAt: z$1.ZodString;
@@ -3526,9 +3556,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3526
3556
  evalSourceFingerprints: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodString>>>;
3527
3557
  target: z$1.ZodObject<{
3528
3558
  mode: z$1.ZodEnum<{
3559
+ caseIds: "caseIds";
3529
3560
  all: "all";
3530
3561
  evalIds: "evalIds";
3531
- caseIds: "caseIds";
3532
3562
  }>;
3533
3563
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
3534
3564
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -3542,9 +3572,9 @@ declare const runManifestSchema$1: z$1.ZodObject<{
3542
3572
  median: "median";
3543
3573
  }>>>;
3544
3574
  cacheMode: z$1.ZodOptional<z$1.ZodEnum<{
3545
- use: "use";
3546
- bypass: "bypass";
3547
3575
  refresh: "refresh";
3576
+ bypass: "bypass";
3577
+ use: "use";
3548
3578
  }>>;
3549
3579
  }, z$1.core.$strip>;
3550
3580
  /** Persisted lifecycle metadata for a single eval run. */
@@ -3554,10 +3584,10 @@ declare const runSummarySchema$1: z$1.ZodObject<{
3554
3584
  runId: z$1.ZodString;
3555
3585
  status: z$1.ZodEnum<{
3556
3586
  error: "error";
3557
- pending: "pending";
3558
3587
  running: "running";
3559
- completed: "completed";
3560
3588
  cancelled: "cancelled";
3589
+ pending: "pending";
3590
+ completed: "completed";
3561
3591
  }>;
3562
3592
  totalCases: z$1.ZodNumber;
3563
3593
  passedCases: z$1.ZodNumber;
@@ -3613,7 +3643,7 @@ type ScopedCaseSummary = {
3613
3643
  //#endregion
3614
3644
  //#region src/evalStatus.d.ts
3615
3645
  /** Display status used for eval, file, and folder UI surfaces. */
3616
- type EvalDisplayStatus = DerivedStatus | 'stale' | 'outdated' | 'unscored';
3646
+ type EvalDisplayStatus = DerivedStatus | 'enqueued' | 'stale' | 'outdated' | 'unscored';
3617
3647
  /**
3618
3648
  * Derive the user-facing eval status from the raw latest run result plus
3619
3649
  * freshness state.
@@ -3661,10 +3691,17 @@ type EvalCase$1<TInput = unknown> = {
3661
3691
  };
3662
3692
  /** Query helpers built from the flattened trace recorded for one eval case. */
3663
3693
  type EvalTraceTree$1 = {
3664
- spans: EvalTraceSpan$1[];
3665
- rootSpans: EvalTraceSpan$1[];
3666
- findSpan: (name: string) => EvalTraceSpan$1 | undefined;
3667
- findSpansByKind: (kind: string) => EvalTraceSpan$1[];
3694
+ /** Flat span list in creation order. */spans: EvalTraceSpan$1[]; /** Top-level spans whose `parentId` is `null`. */
3695
+ rootSpans: EvalTraceSpan$1[]; /** Return the first span whose name exactly matches `name`. */
3696
+ findSpan: (name: string) => EvalTraceSpan$1 | undefined; /** Return every span whose name exactly matches `name`. */
3697
+ findSpans: (name: string) => EvalTraceSpan$1[]; /** Return whether any span name exactly matches `name`. */
3698
+ hasSpan: (name: string) => boolean; /** Return every span whose kind exactly matches `kind`. */
3699
+ findSpansByKind: (kind: string) => EvalTraceSpan$1[]; /** Return every span with `kind: 'tool'`. */
3700
+ findToolCallSpans: () => EvalTraceSpan$1[]; /** Return the names of every span with `kind: 'tool'`. */
3701
+ listToolCallSpanNames: () => string[]; /** Return whether a `kind: 'tool'` span has a name exactly matching `name`. */
3702
+ hasToolCallSpan: (name: string) => boolean; /** Return span names in creation order, optionally filtered by kind. */
3703
+ listSpanNames: (kind?: string) => string[]; /** Return span names in depth-first tree order, optionally filtered by kind. */
3704
+ listSpanNamesDfs: (kind?: string) => string[]; /** Return all spans in depth-first tree order. */
3668
3705
  flattenDfs: () => EvalTraceSpan$1[];
3669
3706
  checkpoints: Map<string, unknown>;
3670
3707
  };
@@ -3684,6 +3721,13 @@ type EvalDeriveFn$1<TInput = unknown> = (ctx: EvalDeriveContext$1<TInput>) => Re
3684
3721
  /** Trace-derived output config accepted globally and on eval definitions. */
3685
3722
  type EvalDeriveConfig$1<TInput = unknown> = EvalDeriveMap$1<TInput> | EvalDeriveFn$1<TInput>;
3686
3723
  /** Schema for keyed or object-returning trace-derived output config. */
3724
+ /** Function that records trace-derived assertions for one case. */
3725
+ type EvalTracingAssertionsFn$1<TInput = unknown> = (ctx: EvalDeriveContext$1<TInput>) => MaybePromise<void>;
3726
+ /** Keyed trace-derived assertion config for grouping related checks. */
3727
+ type EvalTracingAssertionsMap$1<TInput = unknown> = Record<string, EvalTracingAssertionsFn$1<TInput>>;
3728
+ /** Trace-derived assertion config accepted globally and on eval definitions. */
3729
+ type EvalTracingAssertionsConfig$1<TInput = unknown> = EvalTracingAssertionsMap$1<TInput> | EvalTracingAssertionsFn$1<TInput>;
3730
+ /** Schema for function or keyed trace-derived assertion config. */
3687
3731
  /** UI overrides for a derived or scored column emitted by an eval. */
3688
3732
  type EvalColumnOverride$1 = {
3689
3733
  /** Display label shown for the column in tables and detail views. */label?: string;
@@ -4136,9 +4180,19 @@ type AgentEvalsConfig$1 = {
4136
4180
  * Prefer the keyed map form for shared metrics:
4137
4181
  * `{ toolCalls: ({ trace }) => trace.findSpansByKind('tool').length }`.
4138
4182
  * The object-returning function form is also supported. Derived outputs
4139
- * only fill keys that were not already recorded by eval execution.
4183
+ * only fill keys that were not already recorded by eval execution. Do not
4184
+ * call assertion helpers here; use `tracingAssertions` for trace-derived
4185
+ * pass/fail checks.
4140
4186
  */
4141
4187
  deriveFromTracing?: EvalDeriveConfig$1;
4188
+ /**
4189
+ * Workspace-wide assertions derived from the finished execution trace.
4190
+ *
4191
+ * These run after `deriveFromTracing` and before output schema validation and
4192
+ * scores. Use `evalAssert(...)` or `evalExpect(...)` inside the callback to
4193
+ * record normal assertion results without creating fake score columns.
4194
+ */
4195
+ tracingAssertions?: EvalTracingAssertionsConfig$1;
4142
4196
  /**
4143
4197
  * Workspace-wide stats prepended to every eval's stats row.
4144
4198
  *
@@ -4469,9 +4523,9 @@ declare function extractApiCalls(spans: EvalTraceSpan$1[], config: ResolvedApiCa
4469
4523
  * - `refresh`: never read, always write (forces re-execution and overwrites).
4470
4524
  */
4471
4525
  declare const cacheModeSchema: z$1.ZodEnum<{
4472
- use: "use";
4473
- bypass: "bypass";
4474
4526
  refresh: "refresh";
4527
+ bypass: "bypass";
4528
+ use: "use";
4475
4529
  }>;
4476
4530
  /** Mode controlling how cached spans behave during a run. */
4477
4531
  type CacheMode = z$1.infer<typeof cacheModeSchema>;
@@ -4492,10 +4546,10 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
4492
4546
  type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
4493
4547
  /** Status of a cache lookup recorded on a span or case scope. */
4494
4548
  declare const cacheStatusSchema: z$1.ZodEnum<{
4495
- bypass: "bypass";
4496
- refresh: "refresh";
4497
4549
  hit: "hit";
4498
4550
  miss: "miss";
4551
+ refresh: "refresh";
4552
+ bypass: "bypass";
4499
4553
  }>;
4500
4554
  /** Status of a cache lookup recorded on a span or case scope. */
4501
4555
  type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
@@ -4512,10 +4566,10 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
4512
4566
  namespace: z$1.ZodString;
4513
4567
  key: z$1.ZodString;
4514
4568
  status: z$1.ZodEnum<{
4515
- bypass: "bypass";
4516
- refresh: "refresh";
4517
4569
  hit: "hit";
4518
4570
  miss: "miss";
4571
+ refresh: "refresh";
4572
+ bypass: "bypass";
4519
4573
  }>;
4520
4574
  read: z$1.ZodOptional<z$1.ZodBoolean>;
4521
4575
  stored: z$1.ZodOptional<z$1.ZodBoolean>;
@@ -5467,9 +5521,9 @@ type ConfigReloadState = z$1.infer<typeof configReloadStateSchema$1>;
5467
5521
  declare const createRunRequestSchema$1: z$1.ZodObject<{
5468
5522
  target: z$1.ZodObject<{
5469
5523
  mode: z$1.ZodEnum<{
5524
+ caseIds: "caseIds";
5470
5525
  all: "all";
5471
5526
  evalIds: "evalIds";
5472
- caseIds: "caseIds";
5473
5527
  }>;
5474
5528
  evalKeys: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
5475
5529
  files: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -5481,9 +5535,9 @@ declare const createRunRequestSchema$1: z$1.ZodObject<{
5481
5535
  temporary: z$1.ZodOptional<z$1.ZodBoolean>;
5482
5536
  cache: z$1.ZodOptional<z$1.ZodObject<{
5483
5537
  mode: z$1.ZodDefault<z$1.ZodEnum<{
5484
- use: "use";
5485
- bypass: "bypass";
5486
5538
  refresh: "refresh";
5539
+ bypass: "bypass";
5540
+ use: "use";
5487
5541
  }>>;
5488
5542
  }, z$1.core.$strip>>;
5489
5543
  manualInputs: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>>;
@@ -6369,6 +6423,7 @@ declare const caseDetailSchema: z$1.ZodObject<{
6369
6423
  phase: z$1.ZodEnum<{
6370
6424
  eval: "eval";
6371
6425
  derive: "derive";
6426
+ tracingAssertions: "tracingAssertions";
6372
6427
  outputsSchema: "outputsSchema";
6373
6428
  scorer: "scorer";
6374
6429
  }>;
@@ -6995,7 +7050,8 @@ type EvalRunner = {
6995
7050
  getEvals(): EvalSummary$1[]; /** Look up one discovered eval by id. */
6996
7051
  getEval(id: string): EvalSummary$1 | undefined; /** Return discovery errors that should be shown before running evals. */
6997
7052
  getDiscoveryIssues(): DiscoveryIssue$1[]; /** Return current config-reload state for the long-running app server. */
6998
- getConfigReloadState(): ConfigReloadState$1; /** Re-scan configured eval files and emit a discovery update to listeners. */
7053
+ getConfigReloadState(): ConfigReloadState$1; /** Return the effective per-run case concurrency after applying defaults. */
7054
+ getConfiguredConcurrency(): number; /** Re-scan configured eval files and emit a discovery update to listeners. */
6999
7055
  refreshDiscovery(): Promise<void>;
7000
7056
  startRun(request: CreateRunRequest$1): Promise<{
7001
7057
  manifest: RunManifest$1;
@@ -7230,4 +7286,4 @@ declare function defineEval<TInput = unknown, TOutputs extends EvalOutputs = Eva
7230
7286
  /** Return whether the active eval case has tags matching the typed input. */
7231
7287
  declare function matchesEvalTags(input: EvalTagMatchInput): boolean;
7232
7288
  //#endregion
7233
- export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
7289
+ export { AgentEvalTagRegistry, AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheRepairSummary, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CallDerivedAttributesConfig, type CallDerivedAttributesFn, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type ConfigReloadState, type ConfigReloadStatus, type CreateRunRequest, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualInputConfig, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, EvalRuntimeUsageError, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, EvalTag, EvalTagMatchInput, type EvalTraceTree, type EvalTracingAssertionsConfig, type EvalTracingAssertionsFn, type EvalTracingAssertionsMap, type JsonCell, type LlmCallCostBreakdown, type LlmCallCostCurrency, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallSimulatedTokens, type LlmCallsConfigInput, type LlmCostScenario, type ManualInputDescriptor, type ManualInputFieldDescriptor, type ManualInputFieldKind, type ManualInputFieldOverride, type ManualInputFieldsConfig, type ManualInputFileValue, type ManualInputSelectOption, type MaterializeManualInputFilesResult, type NumberDisplayOptions, type ReadManualInputFileResult, type RemoveDefaultConfig, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallCostCurrency, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };