@ls-stack/agent-eval 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -99,6 +99,7 @@ declare const columnDefSchema: z$1.ZodObject<{
99
99
  passThreshold: z$1.ZodOptional<z$1.ZodNumber>;
100
100
  maxStars: z$1.ZodOptional<z$1.ZodNumber>;
101
101
  hideInTable: z$1.ZodOptional<z$1.ZodBoolean>;
102
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
102
103
  align: z$1.ZodOptional<z$1.ZodEnum<{
103
104
  left: "left";
104
105
  center: "center";
@@ -381,13 +382,17 @@ type EvalStatAggregate = z$1.infer<typeof evalStatAggregateSchema>;
381
382
  * `column` aggregates a score or numeric output column across the latest run.
382
383
  */
383
384
  declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
385
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
384
386
  kind: z$1.ZodLiteral<"cases">;
385
387
  }, z$1.core.$strip>, z$1.ZodObject<{
388
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
386
389
  kind: z$1.ZodLiteral<"passRate">;
387
390
  accent: z$1.ZodOptional<z$1.ZodBoolean>;
388
391
  }, z$1.core.$strip>, z$1.ZodObject<{
392
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
389
393
  kind: z$1.ZodLiteral<"duration">;
390
394
  }, z$1.core.$strip>, z$1.ZodObject<{
395
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
391
396
  kind: z$1.ZodLiteral<"column">;
392
397
  key: z$1.ZodString;
393
398
  label: z$1.ZodOptional<z$1.ZodString>;
@@ -419,13 +424,17 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
419
424
  type EvalStatItem = z$1.infer<typeof evalStatItemSchema>;
420
425
  /** Ordered list of stats rendered in the EvalCard stats row. */
421
426
  declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
427
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
422
428
  kind: z$1.ZodLiteral<"cases">;
423
429
  }, z$1.core.$strip>, z$1.ZodObject<{
430
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
424
431
  kind: z$1.ZodLiteral<"passRate">;
425
432
  accent: z$1.ZodOptional<z$1.ZodBoolean>;
426
433
  }, z$1.core.$strip>, z$1.ZodObject<{
434
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
427
435
  kind: z$1.ZodLiteral<"duration">;
428
436
  }, z$1.core.$strip>, z$1.ZodObject<{
437
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
429
438
  kind: z$1.ZodLiteral<"column">;
430
439
  key: z$1.ZodString;
431
440
  label: z$1.ZodOptional<z$1.ZodString>;
@@ -499,6 +508,7 @@ declare const evalSummarySchema: z$1.ZodObject<{
499
508
  passThreshold: z$1.ZodOptional<z$1.ZodNumber>;
500
509
  maxStars: z$1.ZodOptional<z$1.ZodNumber>;
501
510
  hideInTable: z$1.ZodOptional<z$1.ZodBoolean>;
511
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
502
512
  align: z$1.ZodOptional<z$1.ZodEnum<{
503
513
  left: "left";
504
514
  center: "center";
@@ -515,13 +525,17 @@ declare const evalSummarySchema: z$1.ZodObject<{
515
525
  unscored: "unscored";
516
526
  }>>;
517
527
  stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
528
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
518
529
  kind: z$1.ZodLiteral<"cases">;
519
530
  }, z$1.core.$strip>, z$1.ZodObject<{
531
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
520
532
  kind: z$1.ZodLiteral<"passRate">;
521
533
  accent: z$1.ZodOptional<z$1.ZodBoolean>;
522
534
  }, z$1.core.$strip>, z$1.ZodObject<{
535
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
523
536
  kind: z$1.ZodLiteral<"duration">;
524
537
  }, z$1.core.$strip>, z$1.ZodObject<{
538
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
525
539
  kind: z$1.ZodLiteral<"column">;
526
540
  key: z$1.ZodString;
527
541
  label: z$1.ZodOptional<z$1.ZodString>;
@@ -551,6 +565,7 @@ declare const evalSummarySchema: z$1.ZodObject<{
551
565
  }, z$1.core.$strip>], "kind">>>;
552
566
  charts: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
553
567
  heading: z$1.ZodOptional<z$1.ZodString>;
568
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
554
569
  type: z$1.ZodEnum<{
555
570
  area: "area";
556
571
  line: "line";
@@ -666,6 +681,7 @@ declare const caseRowSchema: z$1.ZodObject<{
666
681
  type CaseRow = z$1.infer<typeof caseRowSchema>;
667
682
  /** Structured assertion failure metadata captured for one case run. */
668
683
  declare const assertionFailureSchema: z$1.ZodObject<{
684
+ name: z$1.ZodOptional<z$1.ZodString>;
669
685
  message: z$1.ZodString;
670
686
  stack: z$1.ZodOptional<z$1.ZodString>;
671
687
  }, z$1.core.$strip>;
@@ -961,10 +977,12 @@ declare const caseDetailSchema: z$1.ZodObject<{
961
977
  fileName: z$1.ZodOptional<z$1.ZodString>;
962
978
  }, z$1.core.$strip>]>]>>;
963
979
  assertionFailures: z$1.ZodArray<z$1.ZodUnion<readonly [z$1.ZodObject<{
980
+ name: z$1.ZodOptional<z$1.ZodString>;
964
981
  message: z$1.ZodString;
965
982
  stack: z$1.ZodOptional<z$1.ZodString>;
966
983
  }, z$1.core.$strip>, z$1.ZodPipe<z$1.ZodString, z$1.ZodTransform<{
967
984
  message: string;
985
+ name?: string | undefined;
968
986
  stack?: string | undefined;
969
987
  }, string>>]>>;
970
988
  logs: z$1.ZodDefault<z$1.ZodArray<z$1.ZodObject<{
@@ -1008,6 +1026,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
1008
1026
  refresh: "refresh";
1009
1027
  bypass: "bypass";
1010
1028
  }>;
1029
+ read: z$1.ZodOptional<z$1.ZodBoolean>;
1030
+ stored: z$1.ZodOptional<z$1.ZodBoolean>;
1011
1031
  storedAt: z$1.ZodOptional<z$1.ZodString>;
1012
1032
  age: z$1.ZodOptional<z$1.ZodNumber>;
1013
1033
  }, z$1.core.$strip>>>;
@@ -1187,6 +1207,7 @@ type EvalChartTooltipExtra = z$1.infer<typeof evalChartTooltipExtraSchema>;
1187
1207
  */
1188
1208
  declare const evalChartConfigSchema: z$1.ZodObject<{
1189
1209
  heading: z$1.ZodOptional<z$1.ZodString>;
1210
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
1190
1211
  type: z$1.ZodEnum<{
1191
1212
  area: "area";
1192
1213
  line: "line";
@@ -1275,6 +1296,7 @@ type EvalChartConfig = z$1.infer<typeof evalChartConfigSchema>;
1275
1296
  */
1276
1297
  declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
1277
1298
  heading: z$1.ZodOptional<z$1.ZodString>;
1299
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
1278
1300
  type: z$1.ZodEnum<{
1279
1301
  area: "area";
1280
1302
  line: "line";
@@ -1549,6 +1571,82 @@ declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<t
1549
1571
  }>>]>;
1550
1572
  /** Removal config for built-in eval-level outputs and UI metadata. */
1551
1573
  type RemoveDefaultConfig = z$1.infer<typeof removeDefaultConfigSchema>;
1574
+ /** Single authored eval case with its stable identifier and input payload. */
1575
+ type EvalCase<TInput = unknown> = {
1576
+ id: string;
1577
+ input: TInput;
1578
+ tags?: string[];
1579
+ };
1580
+ /** Query helpers built from the flattened trace recorded for one eval case. */
1581
+ type EvalTraceTree = {
1582
+ spans: EvalTraceSpan[];
1583
+ rootSpans: EvalTraceSpan[];
1584
+ findSpan: (name: string) => EvalTraceSpan | undefined;
1585
+ findSpansByKind: (kind: string) => EvalTraceSpan[];
1586
+ flattenDfs: () => EvalTraceSpan[];
1587
+ checkpoints: Map<string, unknown>;
1588
+ };
1589
+ /** Context passed to `deriveFromTracing` after execution has completed. */
1590
+ type EvalDeriveContext<TInput = unknown> = {
1591
+ trace: EvalTraceTree;
1592
+ input: TInput;
1593
+ case: EvalCase<TInput>;
1594
+ };
1595
+ type MaybePromise<T> = T | Promise<T>;
1596
+ /** Function that derives one output value for a configured output key. */
1597
+ type EvalDeriveValueFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => MaybePromise<unknown>;
1598
+ /** Keyed `deriveFromTracing` config where each key derives one output value. */
1599
+ type EvalDeriveMap<TInput = unknown> = Record<string, EvalDeriveValueFn<TInput>>;
1600
+ /** Object-returning `deriveFromTracing` callback. */
1601
+ type EvalDeriveFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => Record<string, unknown> | Promise<Record<string, unknown>>;
1602
+ /** Trace-derived output config accepted globally and on eval definitions. */
1603
+ type EvalDeriveConfig<TInput = unknown> = EvalDeriveMap<TInput> | EvalDeriveFn<TInput>;
1604
+ /** Schema for keyed or object-returning trace-derived output config. */
1605
+ declare const evalDeriveConfigSchema: z$1.ZodType<EvalDeriveConfig>;
1606
+ /** UI overrides for a derived or scored column emitted by an eval. */
1607
+ type EvalColumnOverride = {
1608
+ /** Display label shown for the column in tables and detail views. */label?: string;
1609
+ /**
1610
+ * Presentation preset for the value.
1611
+ *
1612
+ * Use this to control how the UI renders the cell and infer table behavior,
1613
+ * for example `number`, `boolean`, `duration`, `markdown`, `json`, or
1614
+ * file/media previews.
1615
+ */
1616
+ format?: ColumnFormat;
1617
+ /**
1618
+ * Extra options for `format: 'number'`.
1619
+ *
1620
+ * Use this to add a prefix or suffix, control minimum and maximum decimal
1621
+ * places, or switch to compact notation such as `1.2K`.
1622
+ */
1623
+ numberFormat?: NumberDisplayOptions;
1624
+ /**
1625
+ * Hides the column from the runs table while keeping it available in detail
1626
+ * views and raw output data.
1627
+ */
1628
+ hideInTable?: boolean;
1629
+ /**
1630
+ * Hides the column from the runs table when none of the rendered rows have a
1631
+ * value. Missing values, `null`, and empty strings count as no value; `0` and
1632
+ * `false` remain visible.
1633
+ */
1634
+ hideIfNoValue?: boolean; /** Horizontal alignment used when rendering the column cells. */
1635
+ align?: 'left' | 'center' | 'right';
1636
+ /**
1637
+ * Maximum number of stars used when `format: 'stars'`.
1638
+ *
1639
+ * Values are still stored as normalized `0..1` numbers; the UI maps the
1640
+ * selected star count evenly across that range.
1641
+ */
1642
+ maxStars?: number;
1643
+ };
1644
+ /** Column override map keyed by output or score field name. */
1645
+ type EvalColumns = Record<string, EvalColumnOverride>;
1646
+ /** Schema for UI overrides on derived or scored columns. */
1647
+ declare const evalColumnOverrideSchema: z$1.ZodType<EvalColumnOverride>;
1648
+ /** Schema for column override maps keyed by output or score field name. */
1649
+ declare const evalColumnsSchema: z$1.ZodType<EvalColumns>;
1552
1650
  /** Render formats supported by an LLM-call metric in the UI. */
1553
1651
  declare const llmCallMetricFormatSchema: z$1.ZodEnum<{
1554
1652
  string: "string";
@@ -1662,21 +1760,44 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
1662
1760
  /** User-defined API-call metric authored in `agent-evals.config.ts`. */
1663
1761
  type ApiCallMetric = z$1.infer<typeof apiCallMetricSchema>;
1664
1762
  /**
1665
- * Schema for one model/provider pricing entry used to derive LLM-call costs
1666
- * from token counts.
1763
+ * Schema for pricing rates used to derive LLM-call costs from token counts.
1764
+ */
1765
+ declare const llmCallPricingRateSchema: z$1.ZodObject<{
1766
+ inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1767
+ outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1768
+ cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1769
+ cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1770
+ cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1771
+ reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1772
+ }, z$1.core.$strip>;
1773
+ /** Token pricing rates authored in `agent-evals.config.ts`. */
1774
+ type LlmCallPricingRate = z$1.infer<typeof llmCallPricingRateSchema>;
1775
+ /**
1776
+ * Schema for one model's pricing config. The object key is the exact model
1777
+ * name. Use `providers` when a model has provider-specific rates in addition
1778
+ * to, or instead of, generic model rates.
1667
1779
  */
1668
1780
  declare const llmCallPricingSchema: z$1.ZodObject<{
1669
- model: z$1.ZodString;
1670
- provider: z$1.ZodOptional<z$1.ZodString>;
1671
1781
  inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1672
1782
  outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1673
1783
  cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1674
1784
  cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1675
1785
  cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1676
1786
  reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1787
+ provider: z$1.ZodOptional<z$1.ZodString>;
1788
+ providers: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
1789
+ inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1790
+ outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1791
+ cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1792
+ cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1793
+ cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1794
+ reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1795
+ }, z$1.core.$strip>>>;
1677
1796
  }, z$1.core.$strip>;
1678
- /** Model/provider pricing entry authored in `agent-evals.config.ts`. */
1797
+ /** Model pricing config authored in `agent-evals.config.ts`. */
1679
1798
  type LlmCallPricing = z$1.infer<typeof llmCallPricingSchema>;
1799
+ /** Model-keyed pricing registry authored in `agent-evals.config.ts`. */
1800
+ type LlmCallPricingRegistry = Record<string, LlmCallPricing>;
1680
1801
  /** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
1681
1802
  declare const llmCallsConfigSchema: z$1.ZodObject<{
1682
1803
  kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -1698,15 +1819,22 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
1698
1819
  toolCalls: z$1.ZodOptional<z$1.ZodString>;
1699
1820
  }, z$1.core.$strip>>;
1700
1821
  derivedAttributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodCustom<CallDerivedAttribute, CallDerivedAttribute>>>;
1701
- pricing: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
1702
- model: z$1.ZodString;
1703
- provider: z$1.ZodOptional<z$1.ZodString>;
1822
+ pricing: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
1704
1823
  inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1705
1824
  outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1706
1825
  cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1707
1826
  cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1708
1827
  cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1709
1828
  reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1829
+ provider: z$1.ZodOptional<z$1.ZodString>;
1830
+ providers: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
1831
+ inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1832
+ outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1833
+ cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1834
+ cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1835
+ cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1836
+ reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1837
+ }, z$1.core.$strip>>>;
1710
1838
  }, z$1.core.$strip>>>;
1711
1839
  metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
1712
1840
  label: z$1.ZodString;
@@ -1855,7 +1983,7 @@ declare const DEFAULT_API_CALLS_CONFIG: ResolvedApiCallsConfig;
1855
1983
  * - Missing `metrics[].format` defaults to `'string'`.
1856
1984
  * - Missing `metrics[].placements` defaults to `['body']`.
1857
1985
  * - Missing `pricing` defaults to an empty registry; built-in costs are only
1858
- * derived from configured pricing and token counts.
1986
+ * derived from configured model-keyed pricing and token counts.
1859
1987
  */
1860
1988
  declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
1861
1989
  /**
@@ -1904,12 +2032,35 @@ type AgentEvalsConfig = {
1904
2032
  * definition taking precedence for matching `key` or `path` entries.
1905
2033
  */
1906
2034
  traceDisplay?: TraceDisplayInputConfig;
2035
+ /**
2036
+ * Workspace-wide output columns applied to every eval.
2037
+ *
2038
+ * Eval-level `columns` with the same key take precedence. Built-in default
2039
+ * columns are still added first unless removed with `removeDefaultConfig`.
2040
+ */
2041
+ columns?: EvalColumns;
2042
+ /**
2043
+ * Workspace-wide trace-derived outputs applied to every eval case.
2044
+ *
2045
+ * Prefer the keyed map form for shared metrics:
2046
+ * `{ toolCalls: ({ trace }) => trace.findSpansByKind('tool').length }`.
2047
+ * The object-returning function form is also supported. Derived outputs
2048
+ * only fill keys that were not already recorded by eval execution.
2049
+ */
2050
+ deriveFromTracing?: EvalDeriveConfig;
2051
+ /**
2052
+ * Workspace-wide stats prepended to every eval's stats row.
2053
+ *
2054
+ * Eval-level stats render after these, and built-in default stats are
2055
+ * appended last unless removed with `removeDefaultConfig`.
2056
+ */
2057
+ stats?: EvalStatsConfig;
1907
2058
  /**
1908
2059
  * Configuration for the "LLM calls" tab in the case-run drawer.
1909
2060
  *
1910
2061
  * Determines which trace spans are treated as LLM calls (`kinds`), how
1911
2062
  * structured fields like `model` and `usage.inputTokens` are read from
1912
- * span attributes, which pricing table derives built-in costs, and which
2063
+ * span attributes, which pricing registry derives built-in costs, and which
1913
2064
  * custom user-defined metrics are surfaced on each call. All fields are
1914
2065
  * optional and fall back to the documented defaults; the LLM calls tab is
1915
2066
  * shown automatically when at least one matching span exists in a case run.
@@ -1924,10 +2075,13 @@ type AgentEvalsConfig = {
1924
2075
  * metrics: [
1925
2076
  * { label: 'Retries', path: 'retryCount', format: 'number' },
1926
2077
  * ],
1927
- * pricing: [
1928
- * { model: 'gpt-4o-mini', provider: 'openai',
1929
- * inputUsdPerMillion: 0.15, outputUsdPerMillion: 0.6 },
1930
- * ],
2078
+ * pricing: {
2079
+ * 'gpt-4o-mini': {
2080
+ * provider: 'openai',
2081
+ * inputUsdPerMillion: 0.15,
2082
+ * outputUsdPerMillion: 0.6,
2083
+ * },
2084
+ * },
1931
2085
  * }
1932
2086
  * ```
1933
2087
  */
@@ -2036,6 +2190,47 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
2036
2190
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
2037
2191
  }, z$1.core.$strip>>>;
2038
2192
  }, z$1.core.$strip>>;
2193
+ columns: z$1.ZodOptional<z$1.ZodType<EvalColumns, unknown, z$1.core.$ZodTypeInternals<EvalColumns, unknown>>>;
2194
+ deriveFromTracing: z$1.ZodOptional<z$1.ZodType<EvalDeriveConfig<unknown>, unknown, z$1.core.$ZodTypeInternals<EvalDeriveConfig<unknown>, unknown>>>;
2195
+ stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
2196
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2197
+ kind: z$1.ZodLiteral<"cases">;
2198
+ }, z$1.core.$strip>, z$1.ZodObject<{
2199
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2200
+ kind: z$1.ZodLiteral<"passRate">;
2201
+ accent: z$1.ZodOptional<z$1.ZodBoolean>;
2202
+ }, z$1.core.$strip>, z$1.ZodObject<{
2203
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2204
+ kind: z$1.ZodLiteral<"duration">;
2205
+ }, z$1.core.$strip>, z$1.ZodObject<{
2206
+ hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
2207
+ kind: z$1.ZodLiteral<"column">;
2208
+ key: z$1.ZodString;
2209
+ label: z$1.ZodOptional<z$1.ZodString>;
2210
+ aggregate: z$1.ZodEnum<{
2211
+ last: "last";
2212
+ sum: "sum";
2213
+ avg: "avg";
2214
+ min: "min";
2215
+ max: "max";
2216
+ }>;
2217
+ format: z$1.ZodOptional<z$1.ZodEnum<{
2218
+ number: "number";
2219
+ boolean: "boolean";
2220
+ file: "file";
2221
+ duration: "duration";
2222
+ json: "json";
2223
+ markdown: "markdown";
2224
+ image: "image";
2225
+ audio: "audio";
2226
+ video: "video";
2227
+ percent: "percent";
2228
+ passFail: "passFail";
2229
+ stars: "stars";
2230
+ }>>;
2231
+ numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
2232
+ accent: z$1.ZodOptional<z$1.ZodBoolean>;
2233
+ }, z$1.core.$strip>], "kind">>>;
2039
2234
  llmCalls: z$1.ZodOptional<z$1.ZodObject<{
2040
2235
  kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2041
2236
  attributes: z$1.ZodOptional<z$1.ZodObject<{
@@ -2056,15 +2251,22 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
2056
2251
  toolCalls: z$1.ZodOptional<z$1.ZodString>;
2057
2252
  }, z$1.core.$strip>>;
2058
2253
  derivedAttributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodCustom<CallDerivedAttribute, CallDerivedAttribute>>>;
2059
- pricing: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
2060
- model: z$1.ZodString;
2061
- provider: z$1.ZodOptional<z$1.ZodString>;
2254
+ pricing: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
2062
2255
  inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2063
2256
  outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2064
2257
  cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2065
2258
  cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2066
2259
  cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2067
2260
  reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2261
+ provider: z$1.ZodOptional<z$1.ZodString>;
2262
+ providers: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
2263
+ inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2264
+ outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2265
+ cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2266
+ cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2267
+ cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2268
+ reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2269
+ }, z$1.core.$strip>>>;
2068
2270
  }, z$1.core.$strip>>>;
2069
2271
  metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
2070
2272
  label: z$1.ZodString;
@@ -2327,6 +2529,8 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
2327
2529
  hit: "hit";
2328
2530
  miss: "miss";
2329
2531
  }>;
2532
+ read: z$1.ZodOptional<z$1.ZodBoolean>;
2533
+ stored: z$1.ZodOptional<z$1.ZodBoolean>;
2330
2534
  storedAt: z$1.ZodOptional<z$1.ZodString>;
2331
2535
  age: z$1.ZodOptional<z$1.ZodNumber>;
2332
2536
  }, z$1.core.$strip>;
@@ -2344,7 +2548,6 @@ declare const cacheListItemSchema: z$1.ZodObject<{
2344
2548
  spanName: z$1.ZodOptional<z$1.ZodString>;
2345
2549
  spanKind: z$1.ZodOptional<z$1.ZodString>;
2346
2550
  storedAt: z$1.ZodString;
2347
- codeFingerprint: z$1.ZodString;
2348
2551
  sizeBytes: z$1.ZodNumber;
2349
2552
  }, z$1.core.$strip>;
2350
2553
  /** Summary row for a single cache entry. */
@@ -2469,7 +2672,6 @@ declare const cacheEntrySchema: z$1.ZodObject<{
2469
2672
  spanName: z$1.ZodOptional<z$1.ZodString>;
2470
2673
  spanKind: z$1.ZodOptional<z$1.ZodString>;
2471
2674
  storedAt: z$1.ZodString;
2472
- codeFingerprint: z$1.ZodString;
2473
2675
  recording: z$1.ZodObject<{
2474
2676
  returnValue: z$1.ZodUnknown;
2475
2677
  finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
@@ -2542,7 +2744,6 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
2542
2744
  }>;
2543
2745
  operationName: z$1.ZodString;
2544
2746
  storedAt: z$1.ZodString;
2545
- codeFingerprint: z$1.ZodString;
2546
2747
  rawKey: z$1.ZodUnknown;
2547
2748
  }, z$1.core.$strip>;
2548
2749
  /** Debug-only raw cache key entry. May contain sensitive prompt/input data. */
@@ -2560,7 +2761,6 @@ declare const cacheEntryWithDebugKeySchema: z$1.ZodObject<{
2560
2761
  spanName: z$1.ZodOptional<z$1.ZodString>;
2561
2762
  spanKind: z$1.ZodOptional<z$1.ZodString>;
2562
2763
  storedAt: z$1.ZodString;
2563
- codeFingerprint: z$1.ZodString;
2564
2764
  recording: z$1.ZodObject<{
2565
2765
  returnValue: z$1.ZodUnknown;
2566
2766
  finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
@@ -2629,7 +2829,6 @@ declare const cacheEntryWithDebugKeySchema: z$1.ZodObject<{
2629
2829
  }>;
2630
2830
  operationName: z$1.ZodString;
2631
2831
  storedAt: z$1.ZodString;
2632
- codeFingerprint: z$1.ZodString;
2633
2832
  rawKey: z$1.ZodUnknown;
2634
2833
  }, z$1.core.$strip>>;
2635
2834
  }, z$1.core.$strip>;
@@ -2651,7 +2850,6 @@ declare const cacheFileSchema: z$1.ZodObject<{
2651
2850
  spanName: z$1.ZodOptional<z$1.ZodString>;
2652
2851
  spanKind: z$1.ZodOptional<z$1.ZodString>;
2653
2852
  storedAt: z$1.ZodString;
2654
- codeFingerprint: z$1.ZodString;
2655
2853
  recording: z$1.ZodObject<{
2656
2854
  returnValue: z$1.ZodUnknown;
2657
2855
  finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
@@ -2728,7 +2926,6 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
2728
2926
  }>;
2729
2927
  operationName: z$1.ZodString;
2730
2928
  storedAt: z$1.ZodString;
2731
- codeFingerprint: z$1.ZodString;
2732
2929
  rawKey: z$1.ZodUnknown;
2733
2930
  }, z$1.core.$strip>>;
2734
2931
  }, z$1.core.$strip>;
@@ -2742,16 +2939,19 @@ type CacheDebugKeyFile = z$1.infer<typeof cacheDebugKeyFileSchema>;
2742
2939
  *
2743
2940
  * `action === 'hit'` rows reused an existing persisted cache entry.
2744
2941
  * `action === 'added'` rows came from a miss or refresh that wrote a persisted
2745
- * cache entry during the run. `origin === 'caseRoot'` rows came from
2746
- * `evalTracer.cache(...)` calls made directly from the case body (no
2747
- * surrounding `traceSpan`), which would otherwise be invisible.
2942
+ * cache entry during the run. `action === 'notStored'` rows executed a cached
2943
+ * operation but did not persist it because storage was disabled for that eval
2944
+ * scope. `origin === 'caseRoot'` rows came from `evalTracer.cache(...)` calls
2945
+ * made directly from the case body (no surrounding `traceSpan`), which would
2946
+ * otherwise be invisible.
2748
2947
  */
2749
2948
  type CacheActivityEntry = {
2750
2949
  id: string;
2751
2950
  source: 'span' | 'value';
2752
2951
  origin: 'span' | 'caseRoot';
2753
- action: 'hit' | 'added';
2952
+ action: 'hit' | 'added' | 'notStored';
2754
2953
  status: 'hit' | 'miss' | 'refresh';
2954
+ stored: boolean;
2755
2955
  name: string;
2756
2956
  namespace: string;
2757
2957
  key: string;
@@ -2840,12 +3040,6 @@ declare const updateManualScoreRequestSchema: z$1.ZodObject<{
2840
3040
  type UpdateManualScoreRequest = z$1.infer<typeof updateManualScoreRequestSchema>;
2841
3041
  //#endregion
2842
3042
  //#region ../sdk/src/types.d.ts
2843
- /** Single authored eval case with its stable identifier and input payload. */
2844
- type EvalCase<TInput> = {
2845
- id: string;
2846
- input: TInput;
2847
- tags?: string[];
2848
- };
2849
3043
  /** Runtime output values collected from output helpers and `deriveFromTracing`. */
2850
3044
  type EvalOutputs = Record<string, unknown>;
2851
3045
  /**
@@ -2861,48 +3055,22 @@ type EvalStartTime = Date | number | string;
2861
3055
  * `deriveFromTracing` finish, before computed scores run.
2862
3056
  */
2863
3057
  type EvalOutputsSchema<TOutputs extends EvalOutputs> = z$1.ZodType<TOutputs>;
2864
- /** UI overrides for a derived or scored column emitted by an eval. */
2865
- type EvalColumnOverride = {
2866
- /** Display label shown for the column in tables and detail views. */label?: string;
3058
+ /** Per-eval controls for SDK operation caching. */
3059
+ type EvalCacheConfig = {
2867
3060
  /**
2868
- * Presentation preset for the value.
3061
+ * Whether cached spans and value caches may read existing persisted entries.
2869
3062
  *
2870
- * Use this to control how the UI renders the cell and infer table behavior,
2871
- * for example `number`, `boolean`, `duration`, `markdown`, `json`, or
2872
- * file/media previews.
3063
+ * Defaults to `true`. Set to `false` when this eval should always execute
3064
+ * cached operations instead of replaying previous results.
2873
3065
  */
2874
- format?: ColumnFormat;
3066
+ read?: boolean;
2875
3067
  /**
2876
- * Extra options for `format: 'number'`.
3068
+ * Whether cached spans and value caches may persist entries after execution.
2877
3069
  *
2878
- * Use this to add a prefix or suffix, control minimum and maximum decimal
2879
- * places, or switch to compact notation such as `1.2K`.
3070
+ * Defaults to `true`. Set to `false` when this eval may reuse existing cache
3071
+ * entries but must not create or refresh stored cache files.
2880
3072
  */
2881
- numberFormat?: NumberDisplayOptions;
2882
- /**
2883
- * Hides the column from the runs table while keeping it available in detail
2884
- * views and raw output data.
2885
- */
2886
- hideInTable?: boolean; /** Horizontal alignment used when rendering the column cells. */
2887
- align?: 'left' | 'center' | 'right';
2888
- /**
2889
- * Maximum number of stars used when `format: 'stars'`.
2890
- *
2891
- * Values are still stored as normalized `0..1` numbers; the UI maps the
2892
- * selected star count evenly across that range.
2893
- */
2894
- maxStars?: number;
2895
- };
2896
- /** Column override map keyed by output or score field name. */
2897
- type EvalColumns = Record<string, EvalColumnOverride>;
2898
- /** Query helpers built from the flattened trace recorded for one eval case. */
2899
- type EvalTraceTree = {
2900
- spans: EvalTraceSpan[];
2901
- rootSpans: EvalTraceSpan[];
2902
- findSpan: (name: string) => EvalTraceSpan | undefined;
2903
- findSpansByKind: (kind: string) => EvalTraceSpan[];
2904
- flattenDfs: () => EvalTraceSpan[];
2905
- checkpoints: Map<string, unknown>;
3073
+ store?: boolean;
2906
3074
  };
2907
3075
  /** Type-safe output writer passed to an eval's `execute` function. */
2908
3076
  type EvalSetOutput<TOutputs extends EvalOutputs = EvalOutputs> = <TKey extends Extract<keyof TOutputs, string>>(
@@ -2930,12 +3098,6 @@ type EvalExecuteContext<TInput, TOutputs extends EvalOutputs = EvalOutputs> = {
2930
3098
  */
2931
3099
  setOutput: EvalSetOutput<TOutputs>;
2932
3100
  };
2933
- /** Context passed to `deriveFromTracing` after execution has completed. */
2934
- type EvalDeriveContext<TInput> = {
2935
- trace: EvalTraceTree;
2936
- input: TInput;
2937
- case: EvalCase<TInput>;
2938
- };
2939
3101
  /** Context passed to score functions after outputs have been collected. */
2940
3102
  type EvalScoreContext<TInput, TOutputs extends EvalOutputs = EvalOutputs> = {
2941
3103
  input: TInput;
@@ -2995,8 +3157,27 @@ type EvalDefinitionOutputSchemaConfig<TOutputs extends EvalOutputs> = [EvalOutpu
2995
3157
  outputsSchema: EvalOutputsSchema<TOutputs>;
2996
3158
  };
2997
3159
  type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs> = {
3160
+ /**
3161
+ * Stable eval identifier within the authored eval file.
3162
+ *
3163
+ * The runner combines this value with the workspace-relative file path to
3164
+ * form the eval key used for targeting, persisted runs, and UI navigation.
3165
+ */
2998
3166
  id: string;
3167
+ /**
3168
+ * Human-readable eval name shown in the CLI and web UI.
3169
+ *
3170
+ * When omitted, consumers fall back to `id`.
3171
+ */
2999
3172
  title?: string;
3173
+ /**
3174
+ * Per-eval cache controls. Both `read` and `store` default to `true`.
3175
+ *
3176
+ * `read: false` skips cache lookups for this eval. `store: false` prevents
3177
+ * new or refreshed entries from being written while still allowing reads
3178
+ * unless `read` is also disabled.
3179
+ */
3180
+ cache?: EvalCacheConfig;
3000
3181
  /**
3001
3182
  * Authored cases for this eval.
3002
3183
  *
@@ -3004,6 +3185,13 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
3004
3185
  * eval once using a synthetic case with empty object input.
3005
3186
  */
3006
3187
  cases?: EvalCase<TInput>[] | (() => Promise<EvalCase<TInput>[]>);
3188
+ /**
3189
+ * Output and score column display overrides for this eval.
3190
+ *
3191
+ * Use this to label, format, group, hide, or otherwise customize columns
3192
+ * produced by default config, output helpers, `deriveFromTracing`, scores,
3193
+ * or manual scores.
3194
+ */
3007
3195
  columns?: EvalColumns;
3008
3196
  /**
3009
3197
  * Per-eval trace attribute display rules for the UI.
@@ -3038,8 +3226,30 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
3038
3226
  * elapsed time from the configured `startTime`.
3039
3227
  */
3040
3228
  freezeTime?: boolean;
3229
+ /**
3230
+ * Run one eval case.
3231
+ *
3232
+ * The callback receives the authored case input and a typed `setOutput`
3233
+ * helper. It may record outputs, run assertions, start traced work, and
3234
+ * return either synchronously or asynchronously. Thrown errors fail the
3235
+ * active case and skip later computed scores for that case.
3236
+ */
3041
3237
  execute: (ctx: EvalExecuteContext<TInput, TOutputs>) => Promise<void> | void;
3042
- deriveFromTracing?: (ctx: EvalDeriveContext<TInput>) => Partial<TOutputs> | Promise<Partial<TOutputs>>;
3238
+ /**
3239
+ * Derive additional output fields from the case trace after `execute`.
3240
+ *
3241
+ * Prefer the keyed map form when each key has one derivation. The
3242
+ * object-returning callback form is also supported. Derived values only fill
3243
+ * keys not already recorded during execution.
3244
+ */
3245
+ deriveFromTracing?: EvalDeriveConfig<TInput>;
3246
+ /**
3247
+ * Computed score columns for each case.
3248
+ *
3249
+ * Each key becomes a persisted score column. A score can be a bare callback
3250
+ * or an object with UI metadata and an optional `passThreshold`; thresholds
3251
+ * fail a case only when the computed value is strictly below the threshold.
3252
+ */
3043
3253
  scores?: Record<string, EvalScoreDef<TInput, TOutputs>>;
3044
3254
  /**
3045
3255
  * Score columns whose values are entered in the web UI after a run.
@@ -3061,8 +3271,8 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
3061
3271
  * column across the latest run's cases — `key` must match one of the eval's
3062
3272
  * score or column keys, and only finite numeric values participate in the
3063
3273
  * reduction. When no case has a numeric value for the key the stat renders
3064
- * an em dash. `label`, `format`, and `numberFormat` default to the matching
3065
- * `ColumnDef`.
3274
+ * an em dash, or hides when `hideIfNoValue` is true. `label`, `format`, and
3275
+ * `numberFormat` default to the matching `ColumnDef`.
3066
3276
  */
3067
3277
  stats?: EvalStatsConfig;
3068
3278
  /**
@@ -3077,7 +3287,8 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
3077
3287
  * the run summary. Column metrics aggregate a score or numeric output column
3078
3288
  * across the run using an `aggregate` reducer (`avg`, `sum`, `min`, `max`,
3079
3289
  * `latest`, `passThresholdRate`). `passThresholdRate` requires a score column
3080
- * with `passThreshold`.
3290
+ * with `passThreshold`. Set `hideIfNoValue` to hide a chart until at least
3291
+ * one metric has a numeric value in the rendered history window.
3081
3292
  */
3082
3293
  charts?: EvalChartsConfig;
3083
3294
  /**
@@ -3176,7 +3387,6 @@ type CacheDebugKeyWrite = {
3176
3387
  rawKey: unknown;
3177
3388
  operationType: CacheOperationType;
3178
3389
  operationName: string;
3179
- codeFingerprint: string;
3180
3390
  };
3181
3391
  /**
3182
3392
  * Adapter used by the SDK to read and write cache entries.
@@ -3199,8 +3409,21 @@ type CacheAdapter = {
3199
3409
  type CacheScopeContext = {
3200
3410
  adapter: CacheAdapter;
3201
3411
  mode: CacheMode;
3202
- evalId: string; /** Hash of the eval source file, stored as cache metadata for inspection. */
3203
- codeFingerprint: string;
3412
+ evalId: string;
3413
+ /**
3414
+ * Whether cache lookups are allowed for this eval scope. Defaults to `true`.
3415
+ *
3416
+ * Run-level `bypass` and `refresh` modes still take precedence and skip
3417
+ * reads even when this is enabled.
3418
+ */
3419
+ read?: boolean;
3420
+ /**
3421
+ * Whether cache writes are allowed for this eval scope. Defaults to `true`.
3422
+ *
3423
+ * Run-level `bypass` still takes precedence and skips writes even when this
3424
+ * is enabled.
3425
+ */
3426
+ store?: boolean;
3204
3427
  };
3205
3428
  /** Active recording frame captured while a cached operation body executes. */
3206
3429
  type CacheRecordingFrame = {
@@ -3397,7 +3620,7 @@ declare function incrementEvalOutput(key: string, delta: number): void;
3397
3620
  declare function evalAssert(condition: unknown, message: string): asserts condition;
3398
3621
  //#endregion
3399
3622
  //#region ../sdk/src/cacheSerialization.d.ts
3400
- declare const serializedCacheValueMarker = "__agentEvalsCacheSerialization";
3623
+ declare const serializedCacheValueMarker = "__aecs";
3401
3624
  declare const jsonSafeCacheValueVersion = "json-safe-v1";
3402
3625
  type JsonSafeCacheValueType = 'ArrayBuffer' | 'BigInt' | 'Blob' | 'CompressedJson' | 'CompressedString' | 'Date' | 'Error' | 'File' | 'Float64Array' | 'Headers' | 'Map' | 'Number' | 'Object' | 'RegExp' | 'Set' | 'URL' | 'URLSearchParams' | 'Undefined';
3403
3626
  type JsonSafeSerializedCacheValue = {
@@ -3409,16 +3632,32 @@ type JsonSafeSerializedCacheValue = {
3409
3632
  };
3410
3633
  /** JSON-safe persisted representation for one rich cached value. */
3411
3634
  type SerializedCacheValue = JsonSafeSerializedCacheValue;
3635
+ /** Options controlling how rich cache values are persisted as JSON-safe data. */
3636
+ type CacheSerializationOptions = {
3637
+ /**
3638
+ * Preserve JavaScript `undefined` values with explicit tagged wrappers.
3639
+ *
3640
+ * Disabled by default so undefined object fields, array items, map entries,
3641
+ * and set items are omitted instead of being written to cache files.
3642
+ */
3643
+ preserveUndefined?: boolean;
3644
+ };
3412
3645
  /**
3413
3646
  * Serialize one cached value while keeping plain JSON as plain JSON.
3414
3647
  *
3415
- * Rich runtime values use small tagged wrappers.
3648
+ * Rich runtime values use small tagged wrappers. Undefined values are omitted
3649
+ * by default; pass `preserveUndefined: true` to round-trip them explicitly.
3416
3650
  */
3417
- declare function serializeCacheValue(value: unknown): Promise<unknown>;
3651
+ declare function serializeCacheValue(value: unknown, options?: CacheSerializationOptions | undefined): Promise<unknown>;
3418
3652
  /** Revive one cached value, while preserving legacy JSON-round-tripped data. */
3419
3653
  declare function deserializeCacheValue(value: unknown): unknown;
3420
- /** Serialize all rich values captured in a cache recording before persistence. */
3421
- declare function serializeCacheRecording(recording: CacheRecording): Promise<CacheRecording>;
3654
+ /**
3655
+ * Serialize all rich values captured in a cache recording before persistence.
3656
+ *
3657
+ * Undefined values are omitted by default; pass `preserveUndefined: true` to
3658
+ * retain the legacy explicit undefined wrappers in the recording payload.
3659
+ */
3660
+ declare function serializeCacheRecording(recording: CacheRecording, options?: CacheSerializationOptions | undefined): Promise<CacheRecording>;
3422
3661
  /** Revive all rich values captured in a cache recording after lookup. */
3423
3662
  declare function deserializeCacheRecording(recording: CacheRecording): CacheRecording;
3424
3663
  //#endregion
@@ -3637,6 +3876,15 @@ type CacheClearFilter = {
3637
3876
  key?: string;
3638
3877
  };
3639
3878
  //#endregion
3879
+ //#region ../runner/src/recalculateDerivedAttributes.d.ts
3880
+ type RecalculateDerivedAttributesResult = {
3881
+ updated: true;
3882
+ caseDetail: CaseDetail;
3883
+ } | {
3884
+ updated: false;
3885
+ reason: string;
3886
+ };
3887
+ //#endregion
3640
3888
  //#region ../runner/src/runner.d.ts
3641
3889
  /** Imperative runner interface used by the server and CLI. */
3642
3890
  type EvalRunner = {
@@ -3703,7 +3951,11 @@ type EvalRunner = {
3703
3951
  */
3704
3952
  recomputeStatusesForEval(evalKey: string): Promise<{
3705
3953
  updatedRuns: number;
3706
- }>;
3954
+ }>; /** Recalculate configured LLM/API derived attributes for one persisted case trace. */
3955
+ recalculateDerivedAttributesForCase(params: {
3956
+ runId: string;
3957
+ caseId: string;
3958
+ }): Promise<RecalculateDerivedAttributesResult>;
3707
3959
  /**
3708
3960
  * Delete terminal persisted runs that touch one eval from memory and disk.
3709
3961
  * Accepts the exact eval key, with a legacy fallback for unique eval ids.
@@ -3754,4 +4006,4 @@ declare function createRunner({
3754
4006
  */
3755
4007
  declare function runCli(argv: string[]): Promise<void>;
3756
4008
  //#endregion
3757
- export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallsConfigInput, type NumberDisplayOptions, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
4009
+ export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallsConfigInput, type NumberDisplayOptions, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };