@ls-stack/agent-eval 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -913,6 +913,20 @@ declare const caseDetailSchema: z$1.ZodObject<{
913
913
  stack: z$1.ZodOptional<z$1.ZodString>;
914
914
  }, z$1.core.$strip>>;
915
915
  trial: z$1.ZodNumber;
916
+ cacheRefs: z$1.ZodDefault<z$1.ZodArray<z$1.ZodObject<{
917
+ type: z$1.ZodLiteral<"value">;
918
+ name: z$1.ZodString;
919
+ namespace: z$1.ZodString;
920
+ key: z$1.ZodString;
921
+ status: z$1.ZodEnum<{
922
+ hit: "hit";
923
+ miss: "miss";
924
+ refresh: "refresh";
925
+ bypass: "bypass";
926
+ }>;
927
+ storedAt: z$1.ZodOptional<z$1.ZodString>;
928
+ age: z$1.ZodOptional<z$1.ZodNumber>;
929
+ }, z$1.core.$strip>>>;
916
930
  }, z$1.core.$strip>;
917
931
  /** Full case payload including inputs, trace, outputs, and failures. */
918
932
  type CaseDetail = z$1.infer<typeof caseDetailSchema>;
@@ -1363,60 +1377,16 @@ type EvalTitleLike = {
1363
1377
  */
1364
1378
  declare function getEvalTitle(evalLike: EvalTitleLike): string;
1365
1379
  //#endregion
1366
- //#region ../shared/src/schemas/sse.d.ts
1367
- declare const sseEventTypeSchema: z$1.ZodEnum<{
1368
- "discovery.updated": "discovery.updated";
1369
- "run.started": "run.started";
1370
- "run.summary": "run.summary";
1371
- "case.started": "case.started";
1372
- "case.updated": "case.updated";
1373
- "case.finished": "case.finished";
1374
- "trace.span": "trace.span";
1375
- "run.finished": "run.finished";
1376
- "run.cancelled": "run.cancelled";
1377
- "run.error": "run.error";
1378
- }>;
1379
- /** Server-sent event name emitted by the runner or backend. */
1380
- type SseEventType = z$1.infer<typeof sseEventTypeSchema>;
1381
- /** Schema for the SSE envelope used to stream run updates to clients. */
1382
- declare const sseEnvelopeSchema: z$1.ZodObject<{
1383
- type: z$1.ZodString;
1384
- runId: z$1.ZodOptional<z$1.ZodString>;
1385
- timestamp: z$1.ZodString;
1386
- payload: z$1.ZodUnknown;
1387
- }, z$1.core.$strip>;
1388
- /** Wire format for a streamed event emitted during eval execution. */
1389
- type SseEnvelope = z$1.infer<typeof sseEnvelopeSchema>;
1390
- //#endregion
1391
- //#region ../shared/src/schemas/api.d.ts
1392
- /** Schema for the API request that starts a new eval run. */
1393
- declare const createRunRequestSchema: z$1.ZodObject<{
1394
- target: z$1.ZodObject<{
1395
- mode: z$1.ZodEnum<{
1396
- all: "all";
1397
- evalIds: "evalIds";
1398
- caseIds: "caseIds";
1399
- }>;
1400
- evalIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
1401
- caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
1402
- }, z$1.core.$strip>;
1403
- trials: z$1.ZodNumber;
1404
- cache: z$1.ZodOptional<z$1.ZodObject<{
1405
- mode: z$1.ZodDefault<z$1.ZodEnum<{
1406
- use: "use";
1407
- bypass: "bypass";
1408
- refresh: "refresh";
1409
- }>>;
1410
- }, z$1.core.$strip>>;
1411
- }, z$1.core.$strip>;
1412
- /** Request payload accepted by the run creation endpoint. */
1413
- type CreateRunRequest = z$1.infer<typeof createRunRequestSchema>;
1414
- /** Schema for updating a UI-authored manual score on one persisted case. */
1415
- declare const updateManualScoreRequestSchema: z$1.ZodObject<{
1416
- value: z$1.ZodNullable<z$1.ZodNumber>;
1417
- }, z$1.core.$strip>;
1418
- /** Request payload accepted by the manual score update endpoint. */
1419
- type UpdateManualScoreRequest = z$1.infer<typeof updateManualScoreRequestSchema>;
1380
+ //#region ../shared/src/utils/getNestedAttribute.d.ts
1381
+ /**
1382
+ * Read a value from `source` by walking a dot-separated path.
1383
+ *
1384
+ * Returns `undefined` when any segment of the path is missing or when an
1385
+ * intermediate value is not a plain object. Used by trace-attribute display,
1386
+ * the LLM calls extractor, and any consumer that needs to look up nested
1387
+ * properties from a span's `attributes` record.
1388
+ */
1389
+ declare function getNestedAttribute(value: unknown, path: string): unknown;
1420
1390
  //#endregion
1421
1391
  //#region ../shared/src/schemas/config.d.ts
1422
1392
  /** Strategy used to collapse repeated trials into one stored case result. */
@@ -1426,6 +1396,144 @@ declare const trialSelectionModeSchema: z$1.ZodEnum<{
1426
1396
  }>;
1427
1397
  /** Strategy used to collapse repeated trials into one stored case result. */
1428
1398
  type TrialSelectionMode = z$1.infer<typeof trialSelectionModeSchema>;
1399
+ /** Render formats supported by an LLM-call metric in the UI. */
1400
+ declare const llmCallMetricFormatSchema: z$1.ZodEnum<{
1401
+ string: "string";
1402
+ number: "number";
1403
+ boolean: "boolean";
1404
+ duration: "duration";
1405
+ json: "json";
1406
+ }>;
1407
+ /** Render format applied to an LLM-call metric value. */
1408
+ type LlmCallMetricFormat = z$1.infer<typeof llmCallMetricFormatSchema>;
1409
+ /** Where an LLM-call metric is rendered inside the LLM calls tab. */
1410
+ declare const llmCallMetricPlacementSchema: z$1.ZodEnum<{
1411
+ header: "header";
1412
+ body: "body";
1413
+ }>;
1414
+ /** Placement option for an LLM-call metric. */
1415
+ type LlmCallMetricPlacement = z$1.infer<typeof llmCallMetricPlacementSchema>;
1416
+ /**
1417
+ * Schema for a single user-defined metric attached to LLM call rows.
1418
+ *
1419
+ * Each metric reads `path` from the span's `attributes` and renders the value
1420
+ * with the configured `format` and `numberFormat`. `placements` controls
1421
+ * whether the metric appears as a chip on the collapsed row header, as a row
1422
+ * inside the expanded body, or both. Defaults to `['body']` when omitted.
1423
+ */
1424
+ declare const llmCallMetricSchema: z$1.ZodObject<{
1425
+ label: z$1.ZodString;
1426
+ tooltip: z$1.ZodOptional<z$1.ZodString>;
1427
+ path: z$1.ZodString;
1428
+ format: z$1.ZodOptional<z$1.ZodEnum<{
1429
+ string: "string";
1430
+ number: "number";
1431
+ boolean: "boolean";
1432
+ duration: "duration";
1433
+ json: "json";
1434
+ }>>;
1435
+ numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
1436
+ placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
1437
+ header: "header";
1438
+ body: "body";
1439
+ }>>>;
1440
+ }, z$1.core.$strip>;
1441
+ /** User-defined metric authored in `agent-evals.config.ts`. */
1442
+ type LlmCallMetric = z$1.infer<typeof llmCallMetricSchema>;
1443
+ /** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
1444
+ declare const llmCallsConfigSchema: z$1.ZodObject<{
1445
+ kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
1446
+ attributes: z$1.ZodOptional<z$1.ZodObject<{
1447
+ model: z$1.ZodOptional<z$1.ZodString>;
1448
+ provider: z$1.ZodOptional<z$1.ZodString>;
1449
+ inputTokens: z$1.ZodOptional<z$1.ZodString>;
1450
+ outputTokens: z$1.ZodOptional<z$1.ZodString>;
1451
+ cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
1452
+ cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
1453
+ reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
1454
+ totalTokens: z$1.ZodOptional<z$1.ZodString>;
1455
+ cost: z$1.ZodOptional<z$1.ZodString>;
1456
+ inputCost: z$1.ZodOptional<z$1.ZodString>;
1457
+ outputCost: z$1.ZodOptional<z$1.ZodString>;
1458
+ cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
1459
+ cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
1460
+ reasoningCost: z$1.ZodOptional<z$1.ZodString>;
1461
+ steps: z$1.ZodOptional<z$1.ZodString>;
1462
+ finishReason: z$1.ZodOptional<z$1.ZodString>;
1463
+ input: z$1.ZodOptional<z$1.ZodString>;
1464
+ output: z$1.ZodOptional<z$1.ZodString>;
1465
+ reasoning: z$1.ZodOptional<z$1.ZodString>;
1466
+ toolCalls: z$1.ZodOptional<z$1.ZodString>;
1467
+ }, z$1.core.$strip>>;
1468
+ metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
1469
+ label: z$1.ZodString;
1470
+ tooltip: z$1.ZodOptional<z$1.ZodString>;
1471
+ path: z$1.ZodString;
1472
+ format: z$1.ZodOptional<z$1.ZodEnum<{
1473
+ string: "string";
1474
+ number: "number";
1475
+ boolean: "boolean";
1476
+ duration: "duration";
1477
+ json: "json";
1478
+ }>>;
1479
+ numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
1480
+ placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
1481
+ header: "header";
1482
+ body: "body";
1483
+ }>>>;
1484
+ }, z$1.core.$strip>>>;
1485
+ }, z$1.core.$strip>;
1486
+ /** Authored LLM calls config accepted from `agent-evals.config.ts`. */
1487
+ type LlmCallsConfigInput = z$1.infer<typeof llmCallsConfigSchema>;
1488
+ /** Resolved LLM-calls config sent to the UI with all defaults applied. */
1489
+ type ResolvedLlmCallsConfig = {
1490
+ kinds: string[];
1491
+ attributes: {
1492
+ model: string;
1493
+ provider: string;
1494
+ inputTokens: string;
1495
+ outputTokens: string;
1496
+ cachedInputTokens: string;
1497
+ cacheCreationInputTokens: string;
1498
+ reasoningTokens: string;
1499
+ totalTokens: string;
1500
+ cost: string;
1501
+ inputCost: string;
1502
+ outputCost: string;
1503
+ cachedInputCost: string;
1504
+ cacheCreationInputCost: string;
1505
+ reasoningCost: string;
1506
+ steps: string;
1507
+ finishReason: string;
1508
+ input: string;
1509
+ output: string;
1510
+ reasoning: string;
1511
+ toolCalls: string;
1512
+ };
1513
+ metrics: ResolvedLlmCallMetric[];
1514
+ };
1515
+ /** Fully-resolved LLM-call metric used by the runner and UI. */
1516
+ type ResolvedLlmCallMetric = {
1517
+ label: string;
1518
+ tooltip?: string;
1519
+ path: string;
1520
+ format: LlmCallMetricFormat;
1521
+ numberFormat?: NumberDisplayOptions;
1522
+ placements: LlmCallMetricPlacement[];
1523
+ };
1524
+ /** Default LLM-calls config the UI uses before the workspace fetch resolves. */
1525
+ declare const DEFAULT_LLM_CALLS_CONFIG: ResolvedLlmCallsConfig;
1526
+ /**
1527
+ * Resolve the user-authored LLM-calls config to a fully-defaulted shape used
1528
+ * by the UI to derive the LLM calls tab.
1529
+ *
1530
+ * - Missing or empty `kinds` falls back to `['llm']`.
1531
+ * - Missing `attributes.<field>` falls back to the corresponding default
1532
+ * attribute path.
1533
+ * - Missing `metrics[].format` defaults to `'string'`.
1534
+ * - Missing `metrics[].placements` defaults to `['body']`.
1535
+ */
1536
+ declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
1429
1537
  /** Top-level config authored in `agent-evals.config.ts`. */
1430
1538
  type AgentEvalsConfig = {
1431
1539
  /** Root directory used to resolve all relative paths. Defaults to `process.cwd()`. */workspaceRoot?: string; /** Glob patterns (relative to `workspaceRoot`) used to discover eval files. */
@@ -1455,6 +1563,32 @@ type AgentEvalsConfig = {
1455
1563
  * definition taking precedence for matching `key` or `path` entries.
1456
1564
  */
1457
1565
  traceDisplay?: TraceDisplayInputConfig;
1566
+ /**
1567
+ * Configuration for the "LLM calls" tab in the case-run drawer.
1568
+ *
1569
+ * Determines which trace spans are treated as LLM calls (`kinds`), how
1570
+ * structured fields like `model` and `usage.inputTokens` are read from
1571
+ * span attributes, and which custom user-defined metrics are surfaced on
1572
+ * each call. All fields are optional and fall back to the documented
1573
+ * defaults; the LLM calls tab is shown automatically when at least one
1574
+ * matching span exists in a case run.
1575
+ *
1576
+ * @example
1577
+ * ```ts
1578
+ * llmCalls: {
1579
+ * kinds: ['llm', 'ai-sdk.generateText'],
1580
+ * attributes: {
1581
+ * cachedInputTokens: 'usage.cache_read_input_tokens',
1582
+ * },
1583
+ * metrics: [
1584
+ * { label: 'Tokens/sec', path: 'tokensPerSecond', format: 'number',
1585
+ * numberFormat: { decimalPlaces: 1 }, placements: ['header', 'body'] },
1586
+ * { label: 'Retries', path: 'retryCount', format: 'number' },
1587
+ * ],
1588
+ * }
1589
+ * ```
1590
+ */
1591
+ llmCalls?: LlmCallsConfigInput;
1458
1592
  /**
1459
1593
  * Optional controls for the operation cache. When omitted, the cache is
1460
1594
  * enabled and stored under `<workspaceRoot>/.agent-evals/cache`.
@@ -1463,9 +1597,15 @@ type AgentEvalsConfig = {
1463
1597
  /** Disable the cache entirely; spans with `cache` options execute as if uncached. */enabled?: boolean; /** Override the directory used to persist cache entries. */
1464
1598
  dir?: string;
1465
1599
  /**
1466
- * Maximum entries retained in each per-eval cache file. Defaults to `100`;
1467
- * non-positive or non-finite values fall back to the default.
1600
+ * Default maximum entries retained for each cache namespace. Defaults to
1601
+ * `100`; non-positive or non-finite values fall back to the default.
1602
+ */
1603
+ maxEntriesPerNamespace?: number;
1604
+ /**
1605
+ * Exact namespace-specific retention caps. Values override
1606
+ * `maxEntriesPerNamespace` for matching namespaces.
1468
1607
  */
1608
+ maxEntriesByNamespace?: Record<string, number>; /** Legacy alias for `maxEntriesPerNamespace`, retained so older config files keep working. */
1469
1609
  maxEntriesPerEval?: number;
1470
1610
  };
1471
1611
  };
@@ -1509,13 +1649,118 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
1509
1649
  transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
1510
1650
  }, z$1.core.$strip>>>;
1511
1651
  }, z$1.core.$strip>>;
1652
+ llmCalls: z$1.ZodOptional<z$1.ZodObject<{
1653
+ kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
1654
+ attributes: z$1.ZodOptional<z$1.ZodObject<{
1655
+ model: z$1.ZodOptional<z$1.ZodString>;
1656
+ provider: z$1.ZodOptional<z$1.ZodString>;
1657
+ inputTokens: z$1.ZodOptional<z$1.ZodString>;
1658
+ outputTokens: z$1.ZodOptional<z$1.ZodString>;
1659
+ cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
1660
+ cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
1661
+ reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
1662
+ totalTokens: z$1.ZodOptional<z$1.ZodString>;
1663
+ cost: z$1.ZodOptional<z$1.ZodString>;
1664
+ inputCost: z$1.ZodOptional<z$1.ZodString>;
1665
+ outputCost: z$1.ZodOptional<z$1.ZodString>;
1666
+ cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
1667
+ cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
1668
+ reasoningCost: z$1.ZodOptional<z$1.ZodString>;
1669
+ steps: z$1.ZodOptional<z$1.ZodString>;
1670
+ finishReason: z$1.ZodOptional<z$1.ZodString>;
1671
+ input: z$1.ZodOptional<z$1.ZodString>;
1672
+ output: z$1.ZodOptional<z$1.ZodString>;
1673
+ reasoning: z$1.ZodOptional<z$1.ZodString>;
1674
+ toolCalls: z$1.ZodOptional<z$1.ZodString>;
1675
+ }, z$1.core.$strip>>;
1676
+ metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
1677
+ label: z$1.ZodString;
1678
+ tooltip: z$1.ZodOptional<z$1.ZodString>;
1679
+ path: z$1.ZodString;
1680
+ format: z$1.ZodOptional<z$1.ZodEnum<{
1681
+ string: "string";
1682
+ number: "number";
1683
+ boolean: "boolean";
1684
+ duration: "duration";
1685
+ json: "json";
1686
+ }>>;
1687
+ numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
1688
+ placements: z$1.ZodOptional<z$1.ZodArray<z$1.ZodEnum<{
1689
+ header: "header";
1690
+ body: "body";
1691
+ }>>>;
1692
+ }, z$1.core.$strip>>>;
1693
+ }, z$1.core.$strip>>;
1512
1694
  cache: z$1.ZodOptional<z$1.ZodObject<{
1513
1695
  enabled: z$1.ZodOptional<z$1.ZodBoolean>;
1514
1696
  dir: z$1.ZodOptional<z$1.ZodString>;
1697
+ maxEntriesPerNamespace: z$1.ZodPipe<z$1.ZodTransform<number | undefined, unknown>, z$1.ZodOptional<z$1.ZodNumber>>;
1698
+ maxEntriesByNamespace: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodNumber>>;
1515
1699
  maxEntriesPerEval: z$1.ZodPipe<z$1.ZodTransform<number | undefined, unknown>, z$1.ZodOptional<z$1.ZodNumber>>;
1516
1700
  }, z$1.core.$strip>>;
1517
1701
  }, z$1.core.$strip>;
1518
1702
  //#endregion
1703
+ //#region ../shared/src/utils/extractLlmCalls.d.ts
1704
+ /** Resolved value for one user-defined metric on an LLM call row. */
1705
+ type LlmCallMetricValue = {
1706
+ label: string;
1707
+ tooltip: string | undefined;
1708
+ rawValue: unknown;
1709
+ format: LlmCallMetricFormat;
1710
+ numberFormat: NumberDisplayOptions | undefined;
1711
+ placements: LlmCallMetricPlacement[];
1712
+ };
1713
+ /** Single entry rendered as one expandable row in the LLM calls tab. */
1714
+ type LlmCallEntry = {
1715
+ id: string;
1716
+ name: string;
1717
+ kind: string;
1718
+ status: EvalTraceSpan['status'];
1719
+ model: string | null;
1720
+ provider: string | null;
1721
+ inputTokens: number | null;
1722
+ outputTokens: number | null;
1723
+ cachedInputTokens: number | null;
1724
+ cacheCreationInputTokens: number | null;
1725
+ reasoningTokens: number | null;
1726
+ totalTokens: number | null;
1727
+ costUsd: number | null;
1728
+ inputCostUsd: number | null;
1729
+ outputCostUsd: number | null;
1730
+ cachedInputCostUsd: number | null;
1731
+ cacheCreationInputCostUsd: number | null;
1732
+ reasoningCostUsd: number | null; /** Number of inference rounds. Derived from the array length when `stepDetails` is set. */
1733
+ stepCount: number | null; /** Per-step breakdown when the configured `steps` attribute resolves to an array. */
1734
+ stepDetails: unknown[] | null;
1735
+ finishReason: string | null;
1736
+ latencyMs: number | null;
1737
+ input: unknown;
1738
+ output: unknown;
1739
+ reasoning: unknown;
1740
+ toolCalls: unknown;
1741
+ metrics: LlmCallMetricValue[];
1742
+ warnings: EvalTraceSpanWarning[];
1743
+ error: EvalTraceSpanError | null;
1744
+ };
1745
+ /**
1746
+ * Filter `spans` down to LLM calls and project each one to the structured
1747
+ * shape consumed by the LLM calls tab.
1748
+ *
1749
+ * Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
1750
+ * (`model`, token counts, cost, etc.) are read via `getNestedAttribute` from
1751
+ * the configured paths, with safe coercion to `string | null` / `number |
1752
+ * null`. `totalTokens` falls back to a sum of input + output + cached when no
1753
+ * explicit total attribute is present. The `steps` attribute path may resolve
1754
+ * to either a number (rendered as the inference-round count) or an array of
1755
+ * per-step detail objects (rendered as a Steps section in the body, with
1756
+ * `stepCount` derived from the array length). `latencyMs` is `null` while the
1757
+ * span is still running. User-defined `metrics` whose path resolves to
1758
+ * `undefined` are dropped, but `null`, `0`, and `false` are preserved as
1759
+ * legitimate values worth displaying. Original span order is preserved so the
1760
+ * LLM calls tab matches the ordering in the Trace tab.
1761
+ */
1762
+ declare function extractLlmCalls(spans: EvalTraceSpan[], config: ResolvedLlmCallsConfig): LlmCallEntry[];
1763
+ //#endregion
1519
1764
  //#region ../shared/src/schemas/cache.d.ts
1520
1765
  /**
1521
1766
  * Mode that controls how the cache is consulted for a given run.
@@ -1535,6 +1780,7 @@ type CacheMode = z$1.infer<typeof cacheModeSchema>;
1535
1780
  declare const spanCacheOptionsSchema: z$1.ZodObject<{
1536
1781
  key: z$1.ZodUnknown;
1537
1782
  namespace: z$1.ZodOptional<z$1.ZodString>;
1783
+ serializeFileBytes: z$1.ZodOptional<z$1.ZodBoolean>;
1538
1784
  }, z$1.core.$strip>;
1539
1785
  /** Options accepted by an `evalTracer.span` call to opt the span into caching. */
1540
1786
  type SpanCacheOptions = z$1.infer<typeof spanCacheOptionsSchema>;
@@ -1545,6 +1791,38 @@ declare const cacheOperationTypeSchema: z$1.ZodEnum<{
1545
1791
  }>;
1546
1792
  /** Category of operation stored in the eval cache. */
1547
1793
  type CacheOperationType = z$1.infer<typeof cacheOperationTypeSchema>;
1794
+ /** Status of a cache lookup recorded on a span or case scope. */
1795
+ declare const cacheStatusSchema: z$1.ZodEnum<{
1796
+ bypass: "bypass";
1797
+ refresh: "refresh";
1798
+ hit: "hit";
1799
+ miss: "miss";
1800
+ }>;
1801
+ /** Status of a cache lookup recorded on a span or case scope. */
1802
+ type CacheStatus = z$1.infer<typeof cacheStatusSchema>;
1803
+ /**
1804
+ * Reference to a value-cache lookup performed via `evalTracer.cache(...)`.
1805
+ *
1806
+ * Refs are appended to the active span's `cache.refs` attribute when the call
1807
+ * happens inside a `traceSpan(...)` body, or to the case scope's
1808
+ * `caseCacheRefs` bucket when the call is made directly from the case body.
1809
+ */
1810
+ declare const traceCacheRefSchema: z$1.ZodObject<{
1811
+ type: z$1.ZodLiteral<"value">;
1812
+ name: z$1.ZodString;
1813
+ namespace: z$1.ZodString;
1814
+ key: z$1.ZodString;
1815
+ status: z$1.ZodEnum<{
1816
+ bypass: "bypass";
1817
+ refresh: "refresh";
1818
+ hit: "hit";
1819
+ miss: "miss";
1820
+ }>;
1821
+ storedAt: z$1.ZodOptional<z$1.ZodString>;
1822
+ age: z$1.ZodOptional<z$1.ZodNumber>;
1823
+ }, z$1.core.$strip>;
1824
+ /** Reference to a value-cache lookup performed via `evalTracer.cache(...)`. */
1825
+ type TraceCacheRef = z$1.infer<typeof traceCacheRefSchema>;
1548
1826
  /** Summary of a single persisted cache entry, used by list/delete endpoints. */
1549
1827
  declare const cacheListItemSchema: z$1.ZodObject<{
1550
1828
  key: z$1.ZodString;
@@ -1824,6 +2102,93 @@ declare const cacheFileSchema: z$1.ZodObject<{
1824
2102
  /** Persisted per-owner cache file contents. */
1825
2103
  type CacheFile = z$1.infer<typeof cacheFileSchema>;
1826
2104
  //#endregion
2105
+ //#region ../shared/src/utils/extractCacheHits.d.ts
2106
+ /**
2107
+ * Single cache-hit entry rendered as one row in the case drawer's
2108
+ * "Cache hits" tab.
2109
+ *
2110
+ * `origin === 'span'` rows came from a span's `cache.status` attribute or from
2111
+ * a `cache.refs` ref attached to a span body. `origin === 'caseRoot'` rows
2112
+ * came from `evalTracer.cache(...)` calls made directly from the case body
2113
+ * (no surrounding `traceSpan`), which would otherwise be invisible.
2114
+ */
2115
+ type CacheHitEntry = {
2116
+ id: string;
2117
+ source: 'span' | 'value';
2118
+ origin: 'span' | 'caseRoot';
2119
+ name: string;
2120
+ namespace: string;
2121
+ key: string;
2122
+ storedAt: string | undefined;
2123
+ age: number | undefined;
2124
+ spanId: string | undefined;
2125
+ };
2126
+ /**
2127
+ * Collect every `status === 'hit'` cache event recorded for a case run.
2128
+ *
2129
+ * Walks `spans` for span-level cache hits (`attributes['cache.status'] ===
2130
+ * 'hit'`) and per-span value-cache refs (`attributes['cache.refs']`), then
2131
+ * appends spanless value-cache refs persisted on the case scope. Non-hit
2132
+ * statuses (`miss`/`refresh`/`bypass`) are skipped — they remain visible
2133
+ * inline in the Trace tab.
2134
+ */
2135
+ declare function extractCacheHits(spans: EvalTraceSpan[], caseCacheRefs: TraceCacheRef[]): CacheHitEntry[];
2136
+ //#endregion
2137
+ //#region ../shared/src/schemas/sse.d.ts
2138
+ declare const sseEventTypeSchema: z$1.ZodEnum<{
2139
+ "discovery.updated": "discovery.updated";
2140
+ "run.started": "run.started";
2141
+ "run.summary": "run.summary";
2142
+ "case.started": "case.started";
2143
+ "case.updated": "case.updated";
2144
+ "case.finished": "case.finished";
2145
+ "trace.span": "trace.span";
2146
+ "run.finished": "run.finished";
2147
+ "run.cancelled": "run.cancelled";
2148
+ "run.error": "run.error";
2149
+ }>;
2150
+ /** Server-sent event name emitted by the runner or backend. */
2151
+ type SseEventType = z$1.infer<typeof sseEventTypeSchema>;
2152
+ /** Schema for the SSE envelope used to stream run updates to clients. */
2153
+ declare const sseEnvelopeSchema: z$1.ZodObject<{
2154
+ type: z$1.ZodString;
2155
+ runId: z$1.ZodOptional<z$1.ZodString>;
2156
+ timestamp: z$1.ZodString;
2157
+ payload: z$1.ZodUnknown;
2158
+ }, z$1.core.$strip>;
2159
+ /** Wire format for a streamed event emitted during eval execution. */
2160
+ type SseEnvelope = z$1.infer<typeof sseEnvelopeSchema>;
2161
+ //#endregion
2162
+ //#region ../shared/src/schemas/api.d.ts
2163
+ /** Schema for the API request that starts a new eval run. */
2164
+ declare const createRunRequestSchema: z$1.ZodObject<{
2165
+ target: z$1.ZodObject<{
2166
+ mode: z$1.ZodEnum<{
2167
+ all: "all";
2168
+ evalIds: "evalIds";
2169
+ caseIds: "caseIds";
2170
+ }>;
2171
+ evalIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2172
+ caseIds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
2173
+ }, z$1.core.$strip>;
2174
+ trials: z$1.ZodNumber;
2175
+ cache: z$1.ZodOptional<z$1.ZodObject<{
2176
+ mode: z$1.ZodDefault<z$1.ZodEnum<{
2177
+ use: "use";
2178
+ bypass: "bypass";
2179
+ refresh: "refresh";
2180
+ }>>;
2181
+ }, z$1.core.$strip>>;
2182
+ }, z$1.core.$strip>;
2183
+ /** Request payload accepted by the run creation endpoint. */
2184
+ type CreateRunRequest = z$1.infer<typeof createRunRequestSchema>;
2185
+ /** Schema for updating a UI-authored manual score on one persisted case. */
2186
+ declare const updateManualScoreRequestSchema: z$1.ZodObject<{
2187
+ value: z$1.ZodNullable<z$1.ZodNumber>;
2188
+ }, z$1.core.$strip>;
2189
+ /** Request payload accepted by the manual score update endpoint. */
2190
+ type UpdateManualScoreRequest = z$1.infer<typeof updateManualScoreRequestSchema>;
2191
+ //#endregion
1827
2192
  //#region ../sdk/src/types.d.ts
1828
2193
  /** Single authored eval case with its stable identifier and input payload. */
1829
2194
  type EvalCase<TInput> = {
@@ -1831,7 +2196,7 @@ type EvalCase<TInput> = {
1831
2196
  input: TInput;
1832
2197
  tags?: string[];
1833
2198
  };
1834
- /** Runtime output values collected from `setEvalOutput` and `deriveFromTracing`. */
2199
+ /** Runtime output values collected from output helpers and `deriveFromTracing`. */
1835
2200
  type EvalOutputs = Record<string, unknown>;
1836
2201
  /**
1837
2202
  * Schema used to validate and type an eval's collected runtime outputs.
@@ -1884,9 +2249,31 @@ type EvalTraceTree = {
1884
2249
  flattenDfs: () => EvalTraceSpan[];
1885
2250
  checkpoints: Map<string, unknown>;
1886
2251
  };
2252
+ /** Type-safe output writer passed to an eval's `execute` function. */
2253
+ type EvalSetOutput<TOutputs extends EvalOutputs = EvalOutputs> = <TKey extends Extract<keyof TOutputs, string>>(
2254
+ /**
2255
+ * Output field to record. For narrowed output maps, this must be one of the
2256
+ * known output keys.
2257
+ */
2258
+
2259
+ key: TKey,
2260
+ /**
2261
+ * Value for the output field. For narrowed output maps, this must match the
2262
+ * field's declared output type.
2263
+ */
2264
+
2265
+ value: TOutputs[TKey]) => void;
1887
2266
  /** Context passed to an eval's `execute` function for a single case run. */
1888
- type EvalExecuteContext<TInput> = {
1889
- input: TInput;
2267
+ type EvalExecuteContext<TInput, TOutputs extends EvalOutputs = EvalOutputs> = {
2268
+ /** Authored input for the active eval case. */input: TInput;
2269
+ /**
2270
+ * Record or replace an output value for the current case scope.
2271
+ *
2272
+ * When the eval has a narrowed outputs generic, keys and values are typed
2273
+ * from that output map. The recorded values are still validated by
2274
+ * `outputsSchema` before computed scores run.
2275
+ */
2276
+ setOutput: EvalSetOutput<TOutputs>;
1890
2277
  };
1891
2278
  /** Context passed to `deriveFromTracing` after execution has completed. */
1892
2279
  type EvalDeriveContext<TInput> = {
@@ -1928,8 +2315,31 @@ type EvalManualScoreDef = EvalColumnOverride & {
1928
2315
  */
1929
2316
  passThreshold?: number;
1930
2317
  };
1931
- /** Complete authored eval definition consumed by `defineEval`. */
1932
- type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs> = {
2318
+ type EvalDefinitionOutputSchemaConfig<TOutputs extends EvalOutputs> = [EvalOutputs] extends [TOutputs] ? {
2319
+ /**
2320
+ * Optional schema for runtime outputs collected through output helpers
2321
+ * and `deriveFromTracing`.
2322
+ *
2323
+ * The runner validates configured output fields before scoring. For
2324
+ * Zod object schemas, only declared keys are passed to the schema;
2325
+ * parsed fields are merged back into the raw output map, so schema
2326
+ * defaults and transforms apply to configured fields while
2327
+ * unconfigured outputs are kept unchanged. Validation failures mark
2328
+ * the case as failed and skip computed scores.
2329
+ */
2330
+ outputsSchema?: EvalOutputsSchema<TOutputs>;
2331
+ } : {
2332
+ /**
2333
+ * Required schema for typed runtime outputs collected through output
2334
+ * helpers and `deriveFromTracing`.
2335
+ *
2336
+ * When `EvalDefinition` or `defineEval` receives an explicit narrowed
2337
+ * outputs generic, this schema is required so scorer inputs are backed
2338
+ * by runtime validation before computed scores run.
2339
+ */
2340
+ outputsSchema: EvalOutputsSchema<TOutputs>;
2341
+ };
2342
+ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs> = {
1933
2343
  id: string;
1934
2344
  title?: string;
1935
2345
  /**
@@ -1939,17 +2349,6 @@ type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs
1939
2349
  * eval once using a synthetic case with empty object input.
1940
2350
  */
1941
2351
  cases?: EvalCase<TInput>[] | (() => Promise<EvalCase<TInput>[]>);
1942
- /**
1943
- * Optional schema for runtime outputs collected through `setEvalOutput` and
1944
- * `deriveFromTracing`.
1945
- *
1946
- * The runner validates configured output fields before scoring. For Zod
1947
- * object schemas, only declared keys are passed to the schema; parsed fields
1948
- * are merged back into the raw output map, so schema defaults and transforms
1949
- * apply to configured fields while unconfigured outputs are kept unchanged.
1950
- * Validation failures mark the case as failed and skip computed scores.
1951
- */
1952
- outputsSchema?: EvalOutputsSchema<TOutputs>;
1953
2352
  columns?: EvalColumns;
1954
2353
  /**
1955
2354
  * Per-eval trace attribute display rules for the UI.
@@ -1959,7 +2358,7 @@ type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs
1959
2358
  * `key` is provided.
1960
2359
  */
1961
2360
  traceDisplay?: TraceDisplayInputConfig;
1962
- execute: (ctx: EvalExecuteContext<TInput>) => Promise<void> | void;
2361
+ execute: (ctx: EvalExecuteContext<TInput, TOutputs>) => Promise<void> | void;
1963
2362
  deriveFromTracing?: (ctx: EvalDeriveContext<TInput>) => Partial<TOutputs> | Promise<Partial<TOutputs>>;
1964
2363
  scores?: Record<string, EvalScoreDef<TInput, TOutputs>>;
1965
2364
  /**
@@ -1994,13 +2393,21 @@ type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs
1994
2393
  *
1995
2394
  * Each chart declares its `type` (`area | line | bar`) and one or more
1996
2395
  * `metrics`. Built-in metrics (`passRate`, `durationMs`) aggregate
1997
- * the run summary. Column metrics aggregate a score or numeric `setEvalOutput`
1998
- * column across the run using an `aggregate` reducer (`avg`, `sum`, `min`,
1999
- * `max`, `latest`, `passThresholdRate`). `passThresholdRate` requires a
2000
- * score column with `passThreshold`.
2396
+ * the run summary. Column metrics aggregate a score or numeric output column
2397
+ * across the run using an `aggregate` reducer (`avg`, `sum`, `min`, `max`,
2398
+ * `latest`, `passThresholdRate`). `passThresholdRate` requires a score column
2399
+ * with `passThreshold`.
2001
2400
  */
2002
2401
  charts?: EvalChartsConfig;
2003
2402
  };
2403
+ /**
2404
+ * Complete authored eval definition consumed by `defineEval`.
2405
+ *
2406
+ * `outputsSchema` is optional for the default loose output map. When the
2407
+ * `TOutputs` generic is narrowed, `outputsSchema` is required so the runtime
2408
+ * validates collected outputs before exposing them as typed scorer inputs.
2409
+ */
2410
+ type EvalDefinition<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs> = EvalDefinitionBase<TInput, TOutputs> & EvalDefinitionOutputSchemaConfig<TOutputs>;
2004
2411
  //#endregion
2005
2412
  //#region ../sdk/src/defineEval.d.ts
2006
2413
  /**
@@ -2058,7 +2465,9 @@ type CacheRecordingFrame = {
2058
2465
  };
2059
2466
  /** Mutable per-case runtime state stored in async local storage. */
2060
2467
  type EvalCaseScope = {
2061
- caseId: string; /** Authored input for the current case, when provided by the runner. */
2468
+ caseId: string; /** Stable prefix used by `nextEvalId()` for this eval case scope. */
2469
+ idPrefix: string | undefined; /** Monotonic per-scope counter used by `nextEvalId()`. */
2470
+ nextEvalIdCounter: number; /** Authored input for the current case, when provided by the runner. */
2062
2471
  input?: unknown;
2063
2472
  outputs: Record<string, unknown>; /** Structured assertion failures recorded for the current case. */
2064
2473
  assertionFailures: AssertionFailure[];
@@ -2077,6 +2486,12 @@ type EvalCaseScope = {
2077
2486
  */
2078
2487
  replayingDepth: number; /** Runner-provided cache adapter + mode; absent when caching is disabled. */
2079
2488
  cacheContext: CacheScopeContext | undefined;
2489
+ /**
2490
+ * Value-cache refs recorded by `evalTracer.cache(...)` calls made with no
2491
+ * active span. Span-bound refs are appended to the owning span's
2492
+ * `cache.refs` attribute instead.
2493
+ */
2494
+ caseCacheRefs: TraceCacheRef[];
2080
2495
  };
2081
2496
  /** Error thrown when an eval assertion fails during case execution. */
2082
2497
  declare class EvalAssertionError extends Error {
@@ -2109,7 +2524,8 @@ declare function getEvalCaseInput(path: string): unknown;
2109
2524
  declare function setScopeCacheContext(scope: EvalCaseScope, context: CacheScopeContext): void;
2110
2525
  /** Optional inputs accepted when starting a new eval case scope. */
2111
2526
  type RunInEvalScopeOptions = {
2112
- /** Authored input for the active eval case. */input?: unknown; /** Cache adapter + mode attached to the scope before `fn` runs. */
2527
+ /** Authored input for the active eval case. */input?: unknown; /** Stable prefix used when generating scoped IDs with `nextEvalId()`. */
2528
+ idPrefix?: string; /** Cache adapter + mode attached to the scope before `fn` runs. */
2113
2529
  cacheContext?: CacheScopeContext;
2114
2530
  };
2115
2531
  /**
@@ -2121,6 +2537,15 @@ declare function runInEvalScope<T>(caseId: string, fn: () => Promise<T> | T, opt
2121
2537
  scope: EvalCaseScope;
2122
2538
  error: Error | undefined;
2123
2539
  }>;
2540
+ /**
2541
+ * Return the next deterministic ID for the active eval case execution.
2542
+ *
2543
+ * The runner derives the ID prefix from the eval file, eval id, and case id,
2544
+ * then this helper appends a per-scope sequence number. Calls outside an
2545
+ * active eval case scope throw so accidental product-code usage is caught
2546
+ * immediately.
2547
+ */
2548
+ declare function nextEvalId(): string;
2124
2549
  /**
2125
2550
  * Record or replace an output value for the current case scope.
2126
2551
  *
@@ -2171,43 +2596,46 @@ type CaptureEvalSpanErrorOptions = {
2171
2596
  level?: CaptureEvalSpanErrorLevel;
2172
2597
  };
2173
2598
  //#endregion
2174
- //#region ../sdk/src/cacheRecording.d.ts
2175
- /** Cache reference appended to the active span by `evalTracer.cache(...)`. */
2176
- type TraceCacheRef = {
2177
- type: 'value';
2178
- name: string;
2179
- namespace: string;
2180
- key: string;
2181
- status: 'hit' | 'miss' | 'refresh' | 'bypass';
2182
- storedAt?: string;
2183
- age?: number;
2184
- };
2185
- //#endregion
2186
2599
  //#region ../sdk/src/valueCache.d.ts
2187
2600
  /** Info accepted by `evalTracer.cache(info, fn)` for spanless value caching. */
2188
2601
  type TraceCacheInfo = {
2189
2602
  /** Display name used for cache listings and the default namespace. */name: string; /** Arbitrary JSON-safe value used to derive the cache key. */
2190
2603
  key: unknown; /** Override the default namespace (`${evalId}__${name}`). */
2191
2604
  namespace?: string;
2605
+ /**
2606
+ * Include native `Blob`/`File` bytes in the cache key. By default only stable
2607
+ * metadata (`type`, `size`, plus `name`/`lastModified` for `File`) is used.
2608
+ */
2609
+ serializeFileBytes?: boolean;
2192
2610
  };
2193
2611
  //#endregion
2194
2612
  //#region ../sdk/src/cacheKey.d.ts
2613
+ /** Components folded into a deterministic cache key hash. */
2195
2614
  type CacheKeyHashInput = {
2196
- namespace: string;
2197
- codeFingerprint: string;
2615
+ /** Cache namespace, usually derived from the eval id and operation name. */namespace: string; /** Eval source fingerprint used to invalidate cache entries on code edits. */
2616
+ codeFingerprint: string; /** User-authored cache key value. */
2198
2617
  key: unknown;
2199
2618
  };
2619
+ /** Optional controls for cache key hashing. */
2620
+ type CacheKeyHashOptions = {
2621
+ /**
2622
+ * When true, native `Blob` and `File` values are read asynchronously and
2623
+ * hashed by bytes plus stable metadata. Defaults to metadata-only hashing.
2624
+ */
2625
+ serializeFileBytes?: boolean;
2626
+ };
2200
2627
  /**
2201
2628
  * Hash the components of a cache key into a deterministic hex digest.
2202
2629
  *
2203
- * Native `Blob` and `File` values are read asynchronously and hashed by
2204
- * content. Use `hashCacheKeySync` only when the key contains no async values.
2630
+ * Native `Blob` and `File` values use stable metadata by default. Pass
2631
+ * `serializeFileBytes: true` to read them asynchronously and include their byte
2632
+ * hash in the key.
2205
2633
  */
2206
- declare function hashCacheKey(input: CacheKeyHashInput): Promise<string>;
2634
+ declare function hashCacheKey(input: CacheKeyHashInput, options?: CacheKeyHashOptions): Promise<string>;
2207
2635
  /**
2208
2636
  * Synchronously hash cache key components. This supports JSON-like data and
2209
2637
  * in-memory binary values such as `Buffer`, `ArrayBuffer`, and typed arrays,
2210
- * but cannot content-hash native `Blob` or `File` values.
2638
+ * plus stable metadata for native `Blob` and `File` values.
2211
2639
  */
2212
2640
  declare function hashCacheKeySync(input: CacheKeyHashInput): string;
2213
2641
  //#endregion
@@ -2296,8 +2724,8 @@ type TraceSpanInfoUncached = TraceSpanInfoBase & {
2296
2724
  /**
2297
2725
  * Info accepted by `evalTracer.span(info, fn)` when opting in to caching.
2298
2726
  *
2299
- * Cached spans return `Promise<unknown>` because the replayed value comes from
2300
- * a JSON round-trip on cache hit. Narrow the value yourself when you need a
2727
+ * Cached spans return `Promise<unknown>` because the replayed value is revived
2728
+ * from persisted cache data on hit. Narrow the value yourself when you need a
2301
2729
  * typed return.
2302
2730
  */
2303
2731
  type TraceSpanInfoCached = TraceSpanInfoBase & {
@@ -2388,9 +2816,23 @@ type EvalRunner = {
2388
2816
  subscribe(runId: string, listener: (event: SseEnvelope) => void): () => void; /** Subscribe to discovery updates triggered by file changes or manual refresh. */
2389
2817
  subscribeDiscovery(listener: (event: SseEnvelope) => void): () => void; /** Stop background filesystem watchers owned by this runner instance. */
2390
2818
  close(): Promise<void>; /** Resolve the workspace root backing this runner instance. */
2391
- getWorkspaceRoot(): string; /** Resolve a persisted artifact path when artifact storage is supported. */
2819
+ getWorkspaceRoot(): string;
2820
+ /**
2821
+ * Resolved LLM-calls config used by the UI to derive the LLM calls tab.
2822
+ *
2823
+ * Returns the workspace's `llmCalls` config block from
2824
+ * `agent-evals.config.ts` with all defaults applied.
2825
+ */
2826
+ getLlmCallsConfig(): ResolvedLlmCallsConfig; /** Resolve a persisted artifact path when artifact storage is supported. */
2392
2827
  getArtifactPath(artifactId: string): string | undefined; /** Return summaries for every persisted cache entry in the workspace. */
2393
2828
  listCache(): Promise<CacheListItem[]>;
2829
+ /**
2830
+ * Return the full persisted cache entry for `namespace` + `key`, including
2831
+ * its recording. Returns `null` when no entry matches. Used by the case
2832
+ * drawer's Cache hits tab to lazily fetch the cached return value when a
2833
+ * row is expanded.
2834
+ */
2835
+ getCacheEntry(namespace: string, key: string): Promise<CacheEntry | null>;
2394
2836
  /**
2395
2837
  * Remove cache entries matching `filter`, or all entries when no filter is
2396
2838
  * supplied.
@@ -2445,4 +2887,4 @@ declare function createRunner({
2445
2887
  */
2446
2888
  declare function runCli(argv: string[]): Promise<void>;
2447
2889
  //#endregion
2448
- export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type NumberDisplayOptions, type RepoFileRef, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, mergeEvalOutput, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
2890
+ export { type AgentEvalsConfig, type AssertionFailure, type CacheAdapter, type CacheEntry, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheEntrySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalScope, runManifestSchema, runSummarySchema, scoreTraceSchema, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };