@ls-stack/agent-eval 0.27.0 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-ByMLOds2.mjs → app-mBbAN-Gt.mjs} +15 -3
- package/dist/apps/web/dist/assets/index-8VE7b6RK.css +1 -0
- package/dist/apps/web/dist/assets/index-Czer_MdN.js +118 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-DRkwWgTj.mjs → cli-BQwRbqsL.mjs} +75 -4
- package/dist/index.d.mts +342 -90
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-DB0dwGrd.mjs → runOrchestration-ClWYWPen.mjs} +446 -203
- package/dist/{runner-39KGoaM1.mjs → runner-BQn_xf36.mjs} +1 -1
- package/dist/{runner-DRINcaN_.mjs → runner-DbVB66h9.mjs} +2 -2
- package/dist/src-CuirVcPY.mjs +3 -0
- package/package.json +6 -4
- package/skills/agent-eval/SKILL.md +52 -20
- package/dist/apps/web/dist/assets/index-DOXT0Y9V.css +0 -1
- package/dist/apps/web/dist/assets/index-DR2haqvV.js +0 -118
- package/dist/bin.d.mts +0 -1
- package/dist/runChild.d.mts +0 -1
- package/dist/src-BwKm3sKU.mjs +0 -3
package/dist/index.d.mts
CHANGED
|
@@ -99,6 +99,7 @@ declare const columnDefSchema: z$1.ZodObject<{
|
|
|
99
99
|
passThreshold: z$1.ZodOptional<z$1.ZodNumber>;
|
|
100
100
|
maxStars: z$1.ZodOptional<z$1.ZodNumber>;
|
|
101
101
|
hideInTable: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
102
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
102
103
|
align: z$1.ZodOptional<z$1.ZodEnum<{
|
|
103
104
|
left: "left";
|
|
104
105
|
center: "center";
|
|
@@ -381,13 +382,17 @@ type EvalStatAggregate = z$1.infer<typeof evalStatAggregateSchema>;
|
|
|
381
382
|
* `column` aggregates a score or numeric output column across the latest run.
|
|
382
383
|
*/
|
|
383
384
|
declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
385
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
384
386
|
kind: z$1.ZodLiteral<"cases">;
|
|
385
387
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
388
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
386
389
|
kind: z$1.ZodLiteral<"passRate">;
|
|
387
390
|
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
388
391
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
392
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
389
393
|
kind: z$1.ZodLiteral<"duration">;
|
|
390
394
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
395
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
391
396
|
kind: z$1.ZodLiteral<"column">;
|
|
392
397
|
key: z$1.ZodString;
|
|
393
398
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -419,13 +424,17 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
419
424
|
type EvalStatItem = z$1.infer<typeof evalStatItemSchema>;
|
|
420
425
|
/** Ordered list of stats rendered in the EvalCard stats row. */
|
|
421
426
|
declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
427
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
422
428
|
kind: z$1.ZodLiteral<"cases">;
|
|
423
429
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
430
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
424
431
|
kind: z$1.ZodLiteral<"passRate">;
|
|
425
432
|
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
426
433
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
434
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
427
435
|
kind: z$1.ZodLiteral<"duration">;
|
|
428
436
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
437
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
429
438
|
kind: z$1.ZodLiteral<"column">;
|
|
430
439
|
key: z$1.ZodString;
|
|
431
440
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -499,6 +508,7 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
499
508
|
passThreshold: z$1.ZodOptional<z$1.ZodNumber>;
|
|
500
509
|
maxStars: z$1.ZodOptional<z$1.ZodNumber>;
|
|
501
510
|
hideInTable: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
511
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
502
512
|
align: z$1.ZodOptional<z$1.ZodEnum<{
|
|
503
513
|
left: "left";
|
|
504
514
|
center: "center";
|
|
@@ -515,13 +525,17 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
515
525
|
unscored: "unscored";
|
|
516
526
|
}>>;
|
|
517
527
|
stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
528
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
518
529
|
kind: z$1.ZodLiteral<"cases">;
|
|
519
530
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
531
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
520
532
|
kind: z$1.ZodLiteral<"passRate">;
|
|
521
533
|
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
522
534
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
535
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
523
536
|
kind: z$1.ZodLiteral<"duration">;
|
|
524
537
|
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
538
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
525
539
|
kind: z$1.ZodLiteral<"column">;
|
|
526
540
|
key: z$1.ZodString;
|
|
527
541
|
label: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -551,6 +565,7 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
551
565
|
}, z$1.core.$strip>], "kind">>>;
|
|
552
566
|
charts: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
553
567
|
heading: z$1.ZodOptional<z$1.ZodString>;
|
|
568
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
554
569
|
type: z$1.ZodEnum<{
|
|
555
570
|
area: "area";
|
|
556
571
|
line: "line";
|
|
@@ -666,6 +681,7 @@ declare const caseRowSchema: z$1.ZodObject<{
|
|
|
666
681
|
type CaseRow = z$1.infer<typeof caseRowSchema>;
|
|
667
682
|
/** Structured assertion failure metadata captured for one case run. */
|
|
668
683
|
declare const assertionFailureSchema: z$1.ZodObject<{
|
|
684
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
669
685
|
message: z$1.ZodString;
|
|
670
686
|
stack: z$1.ZodOptional<z$1.ZodString>;
|
|
671
687
|
}, z$1.core.$strip>;
|
|
@@ -961,10 +977,12 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
961
977
|
fileName: z$1.ZodOptional<z$1.ZodString>;
|
|
962
978
|
}, z$1.core.$strip>]>]>>;
|
|
963
979
|
assertionFailures: z$1.ZodArray<z$1.ZodUnion<readonly [z$1.ZodObject<{
|
|
980
|
+
name: z$1.ZodOptional<z$1.ZodString>;
|
|
964
981
|
message: z$1.ZodString;
|
|
965
982
|
stack: z$1.ZodOptional<z$1.ZodString>;
|
|
966
983
|
}, z$1.core.$strip>, z$1.ZodPipe<z$1.ZodString, z$1.ZodTransform<{
|
|
967
984
|
message: string;
|
|
985
|
+
name?: string | undefined;
|
|
968
986
|
stack?: string | undefined;
|
|
969
987
|
}, string>>]>>;
|
|
970
988
|
logs: z$1.ZodDefault<z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -1008,6 +1026,8 @@ declare const caseDetailSchema: z$1.ZodObject<{
|
|
|
1008
1026
|
refresh: "refresh";
|
|
1009
1027
|
bypass: "bypass";
|
|
1010
1028
|
}>;
|
|
1029
|
+
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
1030
|
+
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
1011
1031
|
storedAt: z$1.ZodOptional<z$1.ZodString>;
|
|
1012
1032
|
age: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1013
1033
|
}, z$1.core.$strip>>>;
|
|
@@ -1187,6 +1207,7 @@ type EvalChartTooltipExtra = z$1.infer<typeof evalChartTooltipExtraSchema>;
|
|
|
1187
1207
|
*/
|
|
1188
1208
|
declare const evalChartConfigSchema: z$1.ZodObject<{
|
|
1189
1209
|
heading: z$1.ZodOptional<z$1.ZodString>;
|
|
1210
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
1190
1211
|
type: z$1.ZodEnum<{
|
|
1191
1212
|
area: "area";
|
|
1192
1213
|
line: "line";
|
|
@@ -1275,6 +1296,7 @@ type EvalChartConfig = z$1.infer<typeof evalChartConfigSchema>;
|
|
|
1275
1296
|
*/
|
|
1276
1297
|
declare const evalChartsConfigSchema: z$1.ZodArray<z$1.ZodObject<{
|
|
1277
1298
|
heading: z$1.ZodOptional<z$1.ZodString>;
|
|
1299
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
1278
1300
|
type: z$1.ZodEnum<{
|
|
1279
1301
|
area: "area";
|
|
1280
1302
|
line: "line";
|
|
@@ -1549,6 +1571,82 @@ declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<t
|
|
|
1549
1571
|
}>>]>;
|
|
1550
1572
|
/** Removal config for built-in eval-level outputs and UI metadata. */
|
|
1551
1573
|
type RemoveDefaultConfig = z$1.infer<typeof removeDefaultConfigSchema>;
|
|
1574
|
+
/** Single authored eval case with its stable identifier and input payload. */
|
|
1575
|
+
type EvalCase<TInput = unknown> = {
|
|
1576
|
+
id: string;
|
|
1577
|
+
input: TInput;
|
|
1578
|
+
tags?: string[];
|
|
1579
|
+
};
|
|
1580
|
+
/** Query helpers built from the flattened trace recorded for one eval case. */
|
|
1581
|
+
type EvalTraceTree = {
|
|
1582
|
+
spans: EvalTraceSpan[];
|
|
1583
|
+
rootSpans: EvalTraceSpan[];
|
|
1584
|
+
findSpan: (name: string) => EvalTraceSpan | undefined;
|
|
1585
|
+
findSpansByKind: (kind: string) => EvalTraceSpan[];
|
|
1586
|
+
flattenDfs: () => EvalTraceSpan[];
|
|
1587
|
+
checkpoints: Map<string, unknown>;
|
|
1588
|
+
};
|
|
1589
|
+
/** Context passed to `deriveFromTracing` after execution has completed. */
|
|
1590
|
+
type EvalDeriveContext<TInput = unknown> = {
|
|
1591
|
+
trace: EvalTraceTree;
|
|
1592
|
+
input: TInput;
|
|
1593
|
+
case: EvalCase<TInput>;
|
|
1594
|
+
};
|
|
1595
|
+
type MaybePromise<T> = T | Promise<T>;
|
|
1596
|
+
/** Function that derives one output value for a configured output key. */
|
|
1597
|
+
type EvalDeriveValueFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => MaybePromise<unknown>;
|
|
1598
|
+
/** Keyed `deriveFromTracing` config where each key derives one output value. */
|
|
1599
|
+
type EvalDeriveMap<TInput = unknown> = Record<string, EvalDeriveValueFn<TInput>>;
|
|
1600
|
+
/** Object-returning `deriveFromTracing` callback. */
|
|
1601
|
+
type EvalDeriveFn<TInput = unknown> = (ctx: EvalDeriveContext<TInput>) => Record<string, unknown> | Promise<Record<string, unknown>>;
|
|
1602
|
+
/** Trace-derived output config accepted globally and on eval definitions. */
|
|
1603
|
+
type EvalDeriveConfig<TInput = unknown> = EvalDeriveMap<TInput> | EvalDeriveFn<TInput>;
|
|
1604
|
+
/** Schema for keyed or object-returning trace-derived output config. */
|
|
1605
|
+
declare const evalDeriveConfigSchema: z$1.ZodType<EvalDeriveConfig>;
|
|
1606
|
+
/** UI overrides for a derived or scored column emitted by an eval. */
|
|
1607
|
+
type EvalColumnOverride = {
|
|
1608
|
+
/** Display label shown for the column in tables and detail views. */label?: string;
|
|
1609
|
+
/**
|
|
1610
|
+
* Presentation preset for the value.
|
|
1611
|
+
*
|
|
1612
|
+
* Use this to control how the UI renders the cell and infer table behavior,
|
|
1613
|
+
* for example `number`, `boolean`, `duration`, `markdown`, `json`, or
|
|
1614
|
+
* file/media previews.
|
|
1615
|
+
*/
|
|
1616
|
+
format?: ColumnFormat;
|
|
1617
|
+
/**
|
|
1618
|
+
* Extra options for `format: 'number'`.
|
|
1619
|
+
*
|
|
1620
|
+
* Use this to add a prefix or suffix, control minimum and maximum decimal
|
|
1621
|
+
* places, or switch to compact notation such as `1.2K`.
|
|
1622
|
+
*/
|
|
1623
|
+
numberFormat?: NumberDisplayOptions;
|
|
1624
|
+
/**
|
|
1625
|
+
* Hides the column from the runs table while keeping it available in detail
|
|
1626
|
+
* views and raw output data.
|
|
1627
|
+
*/
|
|
1628
|
+
hideInTable?: boolean;
|
|
1629
|
+
/**
|
|
1630
|
+
* Hides the column from the runs table when none of the rendered rows have a
|
|
1631
|
+
* value. Missing values, `null`, and empty strings count as no value; `0` and
|
|
1632
|
+
* `false` remain visible.
|
|
1633
|
+
*/
|
|
1634
|
+
hideIfNoValue?: boolean; /** Horizontal alignment used when rendering the column cells. */
|
|
1635
|
+
align?: 'left' | 'center' | 'right';
|
|
1636
|
+
/**
|
|
1637
|
+
* Maximum number of stars used when `format: 'stars'`.
|
|
1638
|
+
*
|
|
1639
|
+
* Values are still stored as normalized `0..1` numbers; the UI maps the
|
|
1640
|
+
* selected star count evenly across that range.
|
|
1641
|
+
*/
|
|
1642
|
+
maxStars?: number;
|
|
1643
|
+
};
|
|
1644
|
+
/** Column override map keyed by output or score field name. */
|
|
1645
|
+
type EvalColumns = Record<string, EvalColumnOverride>;
|
|
1646
|
+
/** Schema for UI overrides on derived or scored columns. */
|
|
1647
|
+
declare const evalColumnOverrideSchema: z$1.ZodType<EvalColumnOverride>;
|
|
1648
|
+
/** Schema for column override maps keyed by output or score field name. */
|
|
1649
|
+
declare const evalColumnsSchema: z$1.ZodType<EvalColumns>;
|
|
1552
1650
|
/** Render formats supported by an LLM-call metric in the UI. */
|
|
1553
1651
|
declare const llmCallMetricFormatSchema: z$1.ZodEnum<{
|
|
1554
1652
|
string: "string";
|
|
@@ -1662,21 +1760,44 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
|
|
|
1662
1760
|
/** User-defined API-call metric authored in `agent-evals.config.ts`. */
|
|
1663
1761
|
type ApiCallMetric = z$1.infer<typeof apiCallMetricSchema>;
|
|
1664
1762
|
/**
|
|
1665
|
-
* Schema for
|
|
1666
|
-
|
|
1763
|
+
* Schema for pricing rates used to derive LLM-call costs from token counts.
|
|
1764
|
+
*/
|
|
1765
|
+
declare const llmCallPricingRateSchema: z$1.ZodObject<{
|
|
1766
|
+
inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1767
|
+
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1768
|
+
cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1769
|
+
cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1770
|
+
cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1771
|
+
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1772
|
+
}, z$1.core.$strip>;
|
|
1773
|
+
/** Token pricing rates authored in `agent-evals.config.ts`. */
|
|
1774
|
+
type LlmCallPricingRate = z$1.infer<typeof llmCallPricingRateSchema>;
|
|
1775
|
+
/**
|
|
1776
|
+
* Schema for one model's pricing config. The object key is the exact model
|
|
1777
|
+
* name. Use `providers` when a model has provider-specific rates in addition
|
|
1778
|
+
* to, or instead of, generic model rates.
|
|
1667
1779
|
*/
|
|
1668
1780
|
declare const llmCallPricingSchema: z$1.ZodObject<{
|
|
1669
|
-
model: z$1.ZodString;
|
|
1670
|
-
provider: z$1.ZodOptional<z$1.ZodString>;
|
|
1671
1781
|
inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1672
1782
|
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1673
1783
|
cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1674
1784
|
cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1675
1785
|
cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1676
1786
|
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1787
|
+
provider: z$1.ZodOptional<z$1.ZodString>;
|
|
1788
|
+
providers: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
|
|
1789
|
+
inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1790
|
+
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1791
|
+
cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1792
|
+
cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1793
|
+
cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1794
|
+
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1795
|
+
}, z$1.core.$strip>>>;
|
|
1677
1796
|
}, z$1.core.$strip>;
|
|
1678
|
-
/** Model
|
|
1797
|
+
/** Model pricing config authored in `agent-evals.config.ts`. */
|
|
1679
1798
|
type LlmCallPricing = z$1.infer<typeof llmCallPricingSchema>;
|
|
1799
|
+
/** Model-keyed pricing registry authored in `agent-evals.config.ts`. */
|
|
1800
|
+
type LlmCallPricingRegistry = Record<string, LlmCallPricing>;
|
|
1680
1801
|
/** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
|
|
1681
1802
|
declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
1682
1803
|
kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -1698,15 +1819,22 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
|
1698
1819
|
toolCalls: z$1.ZodOptional<z$1.ZodString>;
|
|
1699
1820
|
}, z$1.core.$strip>>;
|
|
1700
1821
|
derivedAttributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodCustom<CallDerivedAttribute, CallDerivedAttribute>>>;
|
|
1701
|
-
pricing: z$1.ZodOptional<z$1.
|
|
1702
|
-
model: z$1.ZodString;
|
|
1703
|
-
provider: z$1.ZodOptional<z$1.ZodString>;
|
|
1822
|
+
pricing: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
|
|
1704
1823
|
inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1705
1824
|
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1706
1825
|
cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1707
1826
|
cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1708
1827
|
cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1709
1828
|
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1829
|
+
provider: z$1.ZodOptional<z$1.ZodString>;
|
|
1830
|
+
providers: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
|
|
1831
|
+
inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1832
|
+
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1833
|
+
cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1834
|
+
cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1835
|
+
cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1836
|
+
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1837
|
+
}, z$1.core.$strip>>>;
|
|
1710
1838
|
}, z$1.core.$strip>>>;
|
|
1711
1839
|
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
1712
1840
|
label: z$1.ZodString;
|
|
@@ -1855,7 +1983,7 @@ declare const DEFAULT_API_CALLS_CONFIG: ResolvedApiCallsConfig;
|
|
|
1855
1983
|
* - Missing `metrics[].format` defaults to `'string'`.
|
|
1856
1984
|
* - Missing `metrics[].placements` defaults to `['body']`.
|
|
1857
1985
|
* - Missing `pricing` defaults to an empty registry; built-in costs are only
|
|
1858
|
-
* derived from configured pricing and token counts.
|
|
1986
|
+
* derived from configured model-keyed pricing and token counts.
|
|
1859
1987
|
*/
|
|
1860
1988
|
declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
|
|
1861
1989
|
/**
|
|
@@ -1904,12 +2032,35 @@ type AgentEvalsConfig = {
|
|
|
1904
2032
|
* definition taking precedence for matching `key` or `path` entries.
|
|
1905
2033
|
*/
|
|
1906
2034
|
traceDisplay?: TraceDisplayInputConfig;
|
|
2035
|
+
/**
|
|
2036
|
+
* Workspace-wide output columns applied to every eval.
|
|
2037
|
+
*
|
|
2038
|
+
* Eval-level `columns` with the same key take precedence. Built-in default
|
|
2039
|
+
* columns are still added first unless removed with `removeDefaultConfig`.
|
|
2040
|
+
*/
|
|
2041
|
+
columns?: EvalColumns;
|
|
2042
|
+
/**
|
|
2043
|
+
* Workspace-wide trace-derived outputs applied to every eval case.
|
|
2044
|
+
*
|
|
2045
|
+
* Prefer the keyed map form for shared metrics:
|
|
2046
|
+
* `{ toolCalls: ({ trace }) => trace.findSpansByKind('tool').length }`.
|
|
2047
|
+
* The object-returning function form is also supported. Derived outputs
|
|
2048
|
+
* only fill keys that were not already recorded by eval execution.
|
|
2049
|
+
*/
|
|
2050
|
+
deriveFromTracing?: EvalDeriveConfig;
|
|
2051
|
+
/**
|
|
2052
|
+
* Workspace-wide stats prepended to every eval's stats row.
|
|
2053
|
+
*
|
|
2054
|
+
* Eval-level stats render after these, and built-in default stats are
|
|
2055
|
+
* appended last unless removed with `removeDefaultConfig`.
|
|
2056
|
+
*/
|
|
2057
|
+
stats?: EvalStatsConfig;
|
|
1907
2058
|
/**
|
|
1908
2059
|
* Configuration for the "LLM calls" tab in the case-run drawer.
|
|
1909
2060
|
*
|
|
1910
2061
|
* Determines which trace spans are treated as LLM calls (`kinds`), how
|
|
1911
2062
|
* structured fields like `model` and `usage.inputTokens` are read from
|
|
1912
|
-
* span attributes, which pricing
|
|
2063
|
+
* span attributes, which pricing registry derives built-in costs, and which
|
|
1913
2064
|
* custom user-defined metrics are surfaced on each call. All fields are
|
|
1914
2065
|
* optional and fall back to the documented defaults; the LLM calls tab is
|
|
1915
2066
|
* shown automatically when at least one matching span exists in a case run.
|
|
@@ -1924,10 +2075,13 @@ type AgentEvalsConfig = {
|
|
|
1924
2075
|
* metrics: [
|
|
1925
2076
|
* { label: 'Retries', path: 'retryCount', format: 'number' },
|
|
1926
2077
|
* ],
|
|
1927
|
-
* pricing:
|
|
1928
|
-
*
|
|
1929
|
-
*
|
|
1930
|
-
*
|
|
2078
|
+
* pricing: {
|
|
2079
|
+
* 'gpt-4o-mini': {
|
|
2080
|
+
* provider: 'openai',
|
|
2081
|
+
* inputUsdPerMillion: 0.15,
|
|
2082
|
+
* outputUsdPerMillion: 0.6,
|
|
2083
|
+
* },
|
|
2084
|
+
* },
|
|
1931
2085
|
* }
|
|
1932
2086
|
* ```
|
|
1933
2087
|
*/
|
|
@@ -2036,6 +2190,47 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
2036
2190
|
transform: z$1.ZodOptional<z$1.ZodCustom<TraceAttributeTransform, TraceAttributeTransform>>;
|
|
2037
2191
|
}, z$1.core.$strip>>>;
|
|
2038
2192
|
}, z$1.core.$strip>>;
|
|
2193
|
+
columns: z$1.ZodOptional<z$1.ZodType<EvalColumns, unknown, z$1.core.$ZodTypeInternals<EvalColumns, unknown>>>;
|
|
2194
|
+
deriveFromTracing: z$1.ZodOptional<z$1.ZodType<EvalDeriveConfig<unknown>, unknown, z$1.core.$ZodTypeInternals<EvalDeriveConfig<unknown>, unknown>>>;
|
|
2195
|
+
stats: z$1.ZodOptional<z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
2196
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2197
|
+
kind: z$1.ZodLiteral<"cases">;
|
|
2198
|
+
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2199
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2200
|
+
kind: z$1.ZodLiteral<"passRate">;
|
|
2201
|
+
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2202
|
+
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2203
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2204
|
+
kind: z$1.ZodLiteral<"duration">;
|
|
2205
|
+
}, z$1.core.$strip>, z$1.ZodObject<{
|
|
2206
|
+
hideIfNoValue: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2207
|
+
kind: z$1.ZodLiteral<"column">;
|
|
2208
|
+
key: z$1.ZodString;
|
|
2209
|
+
label: z$1.ZodOptional<z$1.ZodString>;
|
|
2210
|
+
aggregate: z$1.ZodEnum<{
|
|
2211
|
+
last: "last";
|
|
2212
|
+
sum: "sum";
|
|
2213
|
+
avg: "avg";
|
|
2214
|
+
min: "min";
|
|
2215
|
+
max: "max";
|
|
2216
|
+
}>;
|
|
2217
|
+
format: z$1.ZodOptional<z$1.ZodEnum<{
|
|
2218
|
+
number: "number";
|
|
2219
|
+
boolean: "boolean";
|
|
2220
|
+
file: "file";
|
|
2221
|
+
duration: "duration";
|
|
2222
|
+
json: "json";
|
|
2223
|
+
markdown: "markdown";
|
|
2224
|
+
image: "image";
|
|
2225
|
+
audio: "audio";
|
|
2226
|
+
video: "video";
|
|
2227
|
+
percent: "percent";
|
|
2228
|
+
passFail: "passFail";
|
|
2229
|
+
stars: "stars";
|
|
2230
|
+
}>>;
|
|
2231
|
+
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
2232
|
+
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2233
|
+
}, z$1.core.$strip>], "kind">>>;
|
|
2039
2234
|
llmCalls: z$1.ZodOptional<z$1.ZodObject<{
|
|
2040
2235
|
kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
2041
2236
|
attributes: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -2056,15 +2251,22 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
2056
2251
|
toolCalls: z$1.ZodOptional<z$1.ZodString>;
|
|
2057
2252
|
}, z$1.core.$strip>>;
|
|
2058
2253
|
derivedAttributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodCustom<CallDerivedAttribute, CallDerivedAttribute>>>;
|
|
2059
|
-
pricing: z$1.ZodOptional<z$1.
|
|
2060
|
-
model: z$1.ZodString;
|
|
2061
|
-
provider: z$1.ZodOptional<z$1.ZodString>;
|
|
2254
|
+
pricing: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
|
|
2062
2255
|
inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2063
2256
|
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2064
2257
|
cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2065
2258
|
cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2066
2259
|
cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2067
2260
|
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2261
|
+
provider: z$1.ZodOptional<z$1.ZodString>;
|
|
2262
|
+
providers: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodObject<{
|
|
2263
|
+
inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2264
|
+
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2265
|
+
cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2266
|
+
cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2267
|
+
cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2268
|
+
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2269
|
+
}, z$1.core.$strip>>>;
|
|
2068
2270
|
}, z$1.core.$strip>>>;
|
|
2069
2271
|
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
2070
2272
|
label: z$1.ZodString;
|
|
@@ -2327,6 +2529,8 @@ declare const traceCacheRefSchema: z$1.ZodObject<{
|
|
|
2327
2529
|
hit: "hit";
|
|
2328
2530
|
miss: "miss";
|
|
2329
2531
|
}>;
|
|
2532
|
+
read: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2533
|
+
stored: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
2330
2534
|
storedAt: z$1.ZodOptional<z$1.ZodString>;
|
|
2331
2535
|
age: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2332
2536
|
}, z$1.core.$strip>;
|
|
@@ -2344,7 +2548,6 @@ declare const cacheListItemSchema: z$1.ZodObject<{
|
|
|
2344
2548
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
2345
2549
|
spanKind: z$1.ZodOptional<z$1.ZodString>;
|
|
2346
2550
|
storedAt: z$1.ZodString;
|
|
2347
|
-
codeFingerprint: z$1.ZodString;
|
|
2348
2551
|
sizeBytes: z$1.ZodNumber;
|
|
2349
2552
|
}, z$1.core.$strip>;
|
|
2350
2553
|
/** Summary row for a single cache entry. */
|
|
@@ -2469,7 +2672,6 @@ declare const cacheEntrySchema: z$1.ZodObject<{
|
|
|
2469
2672
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
2470
2673
|
spanKind: z$1.ZodOptional<z$1.ZodString>;
|
|
2471
2674
|
storedAt: z$1.ZodString;
|
|
2472
|
-
codeFingerprint: z$1.ZodString;
|
|
2473
2675
|
recording: z$1.ZodObject<{
|
|
2474
2676
|
returnValue: z$1.ZodUnknown;
|
|
2475
2677
|
finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
|
|
@@ -2542,7 +2744,6 @@ declare const cacheDebugKeyEntrySchema: z$1.ZodObject<{
|
|
|
2542
2744
|
}>;
|
|
2543
2745
|
operationName: z$1.ZodString;
|
|
2544
2746
|
storedAt: z$1.ZodString;
|
|
2545
|
-
codeFingerprint: z$1.ZodString;
|
|
2546
2747
|
rawKey: z$1.ZodUnknown;
|
|
2547
2748
|
}, z$1.core.$strip>;
|
|
2548
2749
|
/** Debug-only raw cache key entry. May contain sensitive prompt/input data. */
|
|
@@ -2560,7 +2761,6 @@ declare const cacheEntryWithDebugKeySchema: z$1.ZodObject<{
|
|
|
2560
2761
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
2561
2762
|
spanKind: z$1.ZodOptional<z$1.ZodString>;
|
|
2562
2763
|
storedAt: z$1.ZodString;
|
|
2563
|
-
codeFingerprint: z$1.ZodString;
|
|
2564
2764
|
recording: z$1.ZodObject<{
|
|
2565
2765
|
returnValue: z$1.ZodUnknown;
|
|
2566
2766
|
finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
|
|
@@ -2629,7 +2829,6 @@ declare const cacheEntryWithDebugKeySchema: z$1.ZodObject<{
|
|
|
2629
2829
|
}>;
|
|
2630
2830
|
operationName: z$1.ZodString;
|
|
2631
2831
|
storedAt: z$1.ZodString;
|
|
2632
|
-
codeFingerprint: z$1.ZodString;
|
|
2633
2832
|
rawKey: z$1.ZodUnknown;
|
|
2634
2833
|
}, z$1.core.$strip>>;
|
|
2635
2834
|
}, z$1.core.$strip>;
|
|
@@ -2651,7 +2850,6 @@ declare const cacheFileSchema: z$1.ZodObject<{
|
|
|
2651
2850
|
spanName: z$1.ZodOptional<z$1.ZodString>;
|
|
2652
2851
|
spanKind: z$1.ZodOptional<z$1.ZodString>;
|
|
2653
2852
|
storedAt: z$1.ZodString;
|
|
2654
|
-
codeFingerprint: z$1.ZodString;
|
|
2655
2853
|
recording: z$1.ZodObject<{
|
|
2656
2854
|
returnValue: z$1.ZodUnknown;
|
|
2657
2855
|
finalAttributes: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnknown>;
|
|
@@ -2728,7 +2926,6 @@ declare const cacheDebugKeyFileSchema: z$1.ZodObject<{
|
|
|
2728
2926
|
}>;
|
|
2729
2927
|
operationName: z$1.ZodString;
|
|
2730
2928
|
storedAt: z$1.ZodString;
|
|
2731
|
-
codeFingerprint: z$1.ZodString;
|
|
2732
2929
|
rawKey: z$1.ZodUnknown;
|
|
2733
2930
|
}, z$1.core.$strip>>;
|
|
2734
2931
|
}, z$1.core.$strip>;
|
|
@@ -2742,16 +2939,19 @@ type CacheDebugKeyFile = z$1.infer<typeof cacheDebugKeyFileSchema>;
|
|
|
2742
2939
|
*
|
|
2743
2940
|
* `action === 'hit'` rows reused an existing persisted cache entry.
|
|
2744
2941
|
* `action === 'added'` rows came from a miss or refresh that wrote a persisted
|
|
2745
|
-
* cache entry during the run. `
|
|
2746
|
-
*
|
|
2747
|
-
*
|
|
2942
|
+
* cache entry during the run. `action === 'notStored'` rows executed a cached
|
|
2943
|
+
* operation but did not persist it because storage was disabled for that eval
|
|
2944
|
+
* scope. `origin === 'caseRoot'` rows came from `evalTracer.cache(...)` calls
|
|
2945
|
+
* made directly from the case body (no surrounding `traceSpan`), which would
|
|
2946
|
+
* otherwise be invisible.
|
|
2748
2947
|
*/
|
|
2749
2948
|
type CacheActivityEntry = {
|
|
2750
2949
|
id: string;
|
|
2751
2950
|
source: 'span' | 'value';
|
|
2752
2951
|
origin: 'span' | 'caseRoot';
|
|
2753
|
-
action: 'hit' | 'added';
|
|
2952
|
+
action: 'hit' | 'added' | 'notStored';
|
|
2754
2953
|
status: 'hit' | 'miss' | 'refresh';
|
|
2954
|
+
stored: boolean;
|
|
2755
2955
|
name: string;
|
|
2756
2956
|
namespace: string;
|
|
2757
2957
|
key: string;
|
|
@@ -2840,12 +3040,6 @@ declare const updateManualScoreRequestSchema: z$1.ZodObject<{
|
|
|
2840
3040
|
type UpdateManualScoreRequest = z$1.infer<typeof updateManualScoreRequestSchema>;
|
|
2841
3041
|
//#endregion
|
|
2842
3042
|
//#region ../sdk/src/types.d.ts
|
|
2843
|
-
/** Single authored eval case with its stable identifier and input payload. */
|
|
2844
|
-
type EvalCase<TInput> = {
|
|
2845
|
-
id: string;
|
|
2846
|
-
input: TInput;
|
|
2847
|
-
tags?: string[];
|
|
2848
|
-
};
|
|
2849
3043
|
/** Runtime output values collected from output helpers and `deriveFromTracing`. */
|
|
2850
3044
|
type EvalOutputs = Record<string, unknown>;
|
|
2851
3045
|
/**
|
|
@@ -2861,48 +3055,22 @@ type EvalStartTime = Date | number | string;
|
|
|
2861
3055
|
* `deriveFromTracing` finish, before computed scores run.
|
|
2862
3056
|
*/
|
|
2863
3057
|
type EvalOutputsSchema<TOutputs extends EvalOutputs> = z$1.ZodType<TOutputs>;
|
|
2864
|
-
/**
|
|
2865
|
-
type
|
|
2866
|
-
/** Display label shown for the column in tables and detail views. */label?: string;
|
|
3058
|
+
/** Per-eval controls for SDK operation caching. */
|
|
3059
|
+
type EvalCacheConfig = {
|
|
2867
3060
|
/**
|
|
2868
|
-
*
|
|
3061
|
+
* Whether cached spans and value caches may read existing persisted entries.
|
|
2869
3062
|
*
|
|
2870
|
-
*
|
|
2871
|
-
*
|
|
2872
|
-
* file/media previews.
|
|
3063
|
+
* Defaults to `true`. Set to `false` when this eval should always execute
|
|
3064
|
+
* cached operations instead of replaying previous results.
|
|
2873
3065
|
*/
|
|
2874
|
-
|
|
3066
|
+
read?: boolean;
|
|
2875
3067
|
/**
|
|
2876
|
-
*
|
|
3068
|
+
* Whether cached spans and value caches may persist entries after execution.
|
|
2877
3069
|
*
|
|
2878
|
-
*
|
|
2879
|
-
*
|
|
3070
|
+
* Defaults to `true`. Set to `false` when this eval may reuse existing cache
|
|
3071
|
+
* entries but must not create or refresh stored cache files.
|
|
2880
3072
|
*/
|
|
2881
|
-
|
|
2882
|
-
/**
|
|
2883
|
-
* Hides the column from the runs table while keeping it available in detail
|
|
2884
|
-
* views and raw output data.
|
|
2885
|
-
*/
|
|
2886
|
-
hideInTable?: boolean; /** Horizontal alignment used when rendering the column cells. */
|
|
2887
|
-
align?: 'left' | 'center' | 'right';
|
|
2888
|
-
/**
|
|
2889
|
-
* Maximum number of stars used when `format: 'stars'`.
|
|
2890
|
-
*
|
|
2891
|
-
* Values are still stored as normalized `0..1` numbers; the UI maps the
|
|
2892
|
-
* selected star count evenly across that range.
|
|
2893
|
-
*/
|
|
2894
|
-
maxStars?: number;
|
|
2895
|
-
};
|
|
2896
|
-
/** Column override map keyed by output or score field name. */
|
|
2897
|
-
type EvalColumns = Record<string, EvalColumnOverride>;
|
|
2898
|
-
/** Query helpers built from the flattened trace recorded for one eval case. */
|
|
2899
|
-
type EvalTraceTree = {
|
|
2900
|
-
spans: EvalTraceSpan[];
|
|
2901
|
-
rootSpans: EvalTraceSpan[];
|
|
2902
|
-
findSpan: (name: string) => EvalTraceSpan | undefined;
|
|
2903
|
-
findSpansByKind: (kind: string) => EvalTraceSpan[];
|
|
2904
|
-
flattenDfs: () => EvalTraceSpan[];
|
|
2905
|
-
checkpoints: Map<string, unknown>;
|
|
3073
|
+
store?: boolean;
|
|
2906
3074
|
};
|
|
2907
3075
|
/** Type-safe output writer passed to an eval's `execute` function. */
|
|
2908
3076
|
type EvalSetOutput<TOutputs extends EvalOutputs = EvalOutputs> = <TKey extends Extract<keyof TOutputs, string>>(
|
|
@@ -2930,12 +3098,6 @@ type EvalExecuteContext<TInput, TOutputs extends EvalOutputs = EvalOutputs> = {
|
|
|
2930
3098
|
*/
|
|
2931
3099
|
setOutput: EvalSetOutput<TOutputs>;
|
|
2932
3100
|
};
|
|
2933
|
-
/** Context passed to `deriveFromTracing` after execution has completed. */
|
|
2934
|
-
type EvalDeriveContext<TInput> = {
|
|
2935
|
-
trace: EvalTraceTree;
|
|
2936
|
-
input: TInput;
|
|
2937
|
-
case: EvalCase<TInput>;
|
|
2938
|
-
};
|
|
2939
3101
|
/** Context passed to score functions after outputs have been collected. */
|
|
2940
3102
|
type EvalScoreContext<TInput, TOutputs extends EvalOutputs = EvalOutputs> = {
|
|
2941
3103
|
input: TInput;
|
|
@@ -2995,8 +3157,27 @@ type EvalDefinitionOutputSchemaConfig<TOutputs extends EvalOutputs> = [EvalOutpu
|
|
|
2995
3157
|
outputsSchema: EvalOutputsSchema<TOutputs>;
|
|
2996
3158
|
};
|
|
2997
3159
|
type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOutputs> = {
|
|
3160
|
+
/**
|
|
3161
|
+
* Stable eval identifier within the authored eval file.
|
|
3162
|
+
*
|
|
3163
|
+
* The runner combines this value with the workspace-relative file path to
|
|
3164
|
+
* form the eval key used for targeting, persisted runs, and UI navigation.
|
|
3165
|
+
*/
|
|
2998
3166
|
id: string;
|
|
3167
|
+
/**
|
|
3168
|
+
* Human-readable eval name shown in the CLI and web UI.
|
|
3169
|
+
*
|
|
3170
|
+
* When omitted, consumers fall back to `id`.
|
|
3171
|
+
*/
|
|
2999
3172
|
title?: string;
|
|
3173
|
+
/**
|
|
3174
|
+
* Per-eval cache controls. Both `read` and `store` default to `true`.
|
|
3175
|
+
*
|
|
3176
|
+
* `read: false` skips cache lookups for this eval. `store: false` prevents
|
|
3177
|
+
* new or refreshed entries from being written while still allowing reads
|
|
3178
|
+
* unless `read` is also disabled.
|
|
3179
|
+
*/
|
|
3180
|
+
cache?: EvalCacheConfig;
|
|
3000
3181
|
/**
|
|
3001
3182
|
* Authored cases for this eval.
|
|
3002
3183
|
*
|
|
@@ -3004,6 +3185,13 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
|
|
|
3004
3185
|
* eval once using a synthetic case with empty object input.
|
|
3005
3186
|
*/
|
|
3006
3187
|
cases?: EvalCase<TInput>[] | (() => Promise<EvalCase<TInput>[]>);
|
|
3188
|
+
/**
|
|
3189
|
+
* Output and score column display overrides for this eval.
|
|
3190
|
+
*
|
|
3191
|
+
* Use this to label, format, group, hide, or otherwise customize columns
|
|
3192
|
+
* produced by default config, output helpers, `deriveFromTracing`, scores,
|
|
3193
|
+
* or manual scores.
|
|
3194
|
+
*/
|
|
3007
3195
|
columns?: EvalColumns;
|
|
3008
3196
|
/**
|
|
3009
3197
|
* Per-eval trace attribute display rules for the UI.
|
|
@@ -3038,8 +3226,30 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
|
|
|
3038
3226
|
* elapsed time from the configured `startTime`.
|
|
3039
3227
|
*/
|
|
3040
3228
|
freezeTime?: boolean;
|
|
3229
|
+
/**
|
|
3230
|
+
* Run one eval case.
|
|
3231
|
+
*
|
|
3232
|
+
* The callback receives the authored case input and a typed `setOutput`
|
|
3233
|
+
* helper. It may record outputs, run assertions, start traced work, and
|
|
3234
|
+
* return either synchronously or asynchronously. Thrown errors fail the
|
|
3235
|
+
* active case and skip later computed scores for that case.
|
|
3236
|
+
*/
|
|
3041
3237
|
execute: (ctx: EvalExecuteContext<TInput, TOutputs>) => Promise<void> | void;
|
|
3042
|
-
|
|
3238
|
+
/**
|
|
3239
|
+
* Derive additional output fields from the case trace after `execute`.
|
|
3240
|
+
*
|
|
3241
|
+
* Prefer the keyed map form when each key has one derivation. The
|
|
3242
|
+
* object-returning callback form is also supported. Derived values only fill
|
|
3243
|
+
* keys not already recorded during execution.
|
|
3244
|
+
*/
|
|
3245
|
+
deriveFromTracing?: EvalDeriveConfig<TInput>;
|
|
3246
|
+
/**
|
|
3247
|
+
* Computed score columns for each case.
|
|
3248
|
+
*
|
|
3249
|
+
* Each key becomes a persisted score column. A score can be a bare callback
|
|
3250
|
+
* or an object with UI metadata and an optional `passThreshold`; thresholds
|
|
3251
|
+
* fail a case only when the computed value is strictly below the threshold.
|
|
3252
|
+
*/
|
|
3043
3253
|
scores?: Record<string, EvalScoreDef<TInput, TOutputs>>;
|
|
3044
3254
|
/**
|
|
3045
3255
|
* Score columns whose values are entered in the web UI after a run.
|
|
@@ -3061,8 +3271,8 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
|
|
|
3061
3271
|
* column across the latest run's cases — `key` must match one of the eval's
|
|
3062
3272
|
* score or column keys, and only finite numeric values participate in the
|
|
3063
3273
|
* reduction. When no case has a numeric value for the key the stat renders
|
|
3064
|
-
* an em dash. `label`, `format`, and
|
|
3065
|
-
* `ColumnDef`.
|
|
3274
|
+
* an em dash, or hides when `hideIfNoValue` is true. `label`, `format`, and
|
|
3275
|
+
* `numberFormat` default to the matching `ColumnDef`.
|
|
3066
3276
|
*/
|
|
3067
3277
|
stats?: EvalStatsConfig;
|
|
3068
3278
|
/**
|
|
@@ -3077,7 +3287,8 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
|
|
|
3077
3287
|
* the run summary. Column metrics aggregate a score or numeric output column
|
|
3078
3288
|
* across the run using an `aggregate` reducer (`avg`, `sum`, `min`, `max`,
|
|
3079
3289
|
* `latest`, `passThresholdRate`). `passThresholdRate` requires a score column
|
|
3080
|
-
* with `passThreshold`.
|
|
3290
|
+
* with `passThreshold`. Set `hideIfNoValue` to hide a chart until at least
|
|
3291
|
+
* one metric has a numeric value in the rendered history window.
|
|
3081
3292
|
*/
|
|
3082
3293
|
charts?: EvalChartsConfig;
|
|
3083
3294
|
/**
|
|
@@ -3176,7 +3387,6 @@ type CacheDebugKeyWrite = {
|
|
|
3176
3387
|
rawKey: unknown;
|
|
3177
3388
|
operationType: CacheOperationType;
|
|
3178
3389
|
operationName: string;
|
|
3179
|
-
codeFingerprint: string;
|
|
3180
3390
|
};
|
|
3181
3391
|
/**
|
|
3182
3392
|
* Adapter used by the SDK to read and write cache entries.
|
|
@@ -3199,8 +3409,21 @@ type CacheAdapter = {
|
|
|
3199
3409
|
type CacheScopeContext = {
|
|
3200
3410
|
adapter: CacheAdapter;
|
|
3201
3411
|
mode: CacheMode;
|
|
3202
|
-
evalId: string;
|
|
3203
|
-
|
|
3412
|
+
evalId: string;
|
|
3413
|
+
/**
|
|
3414
|
+
* Whether cache lookups are allowed for this eval scope. Defaults to `true`.
|
|
3415
|
+
*
|
|
3416
|
+
* Run-level `bypass` and `refresh` modes still take precedence and skip
|
|
3417
|
+
* reads even when this is enabled.
|
|
3418
|
+
*/
|
|
3419
|
+
read?: boolean;
|
|
3420
|
+
/**
|
|
3421
|
+
* Whether cache writes are allowed for this eval scope. Defaults to `true`.
|
|
3422
|
+
*
|
|
3423
|
+
* Run-level `bypass` still takes precedence and skips writes even when this
|
|
3424
|
+
* is enabled.
|
|
3425
|
+
*/
|
|
3426
|
+
store?: boolean;
|
|
3204
3427
|
};
|
|
3205
3428
|
/** Active recording frame captured while a cached operation body executes. */
|
|
3206
3429
|
type CacheRecordingFrame = {
|
|
@@ -3397,7 +3620,7 @@ declare function incrementEvalOutput(key: string, delta: number): void;
|
|
|
3397
3620
|
declare function evalAssert(condition: unknown, message: string): asserts condition;
|
|
3398
3621
|
//#endregion
|
|
3399
3622
|
//#region ../sdk/src/cacheSerialization.d.ts
|
|
3400
|
-
declare const serializedCacheValueMarker = "
|
|
3623
|
+
declare const serializedCacheValueMarker = "__aecs";
|
|
3401
3624
|
declare const jsonSafeCacheValueVersion = "json-safe-v1";
|
|
3402
3625
|
type JsonSafeCacheValueType = 'ArrayBuffer' | 'BigInt' | 'Blob' | 'CompressedJson' | 'CompressedString' | 'Date' | 'Error' | 'File' | 'Float64Array' | 'Headers' | 'Map' | 'Number' | 'Object' | 'RegExp' | 'Set' | 'URL' | 'URLSearchParams' | 'Undefined';
|
|
3403
3626
|
type JsonSafeSerializedCacheValue = {
|
|
@@ -3409,16 +3632,32 @@ type JsonSafeSerializedCacheValue = {
|
|
|
3409
3632
|
};
|
|
3410
3633
|
/** JSON-safe persisted representation for one rich cached value. */
|
|
3411
3634
|
type SerializedCacheValue = JsonSafeSerializedCacheValue;
|
|
3635
|
+
/** Options controlling how rich cache values are persisted as JSON-safe data. */
|
|
3636
|
+
type CacheSerializationOptions = {
|
|
3637
|
+
/**
|
|
3638
|
+
* Preserve JavaScript `undefined` values with explicit tagged wrappers.
|
|
3639
|
+
*
|
|
3640
|
+
* Disabled by default so undefined object fields, array items, map entries,
|
|
3641
|
+
* and set items are omitted instead of being written to cache files.
|
|
3642
|
+
*/
|
|
3643
|
+
preserveUndefined?: boolean;
|
|
3644
|
+
};
|
|
3412
3645
|
/**
|
|
3413
3646
|
* Serialize one cached value while keeping plain JSON as plain JSON.
|
|
3414
3647
|
*
|
|
3415
|
-
* Rich runtime values use small tagged wrappers.
|
|
3648
|
+
* Rich runtime values use small tagged wrappers. Undefined values are omitted
|
|
3649
|
+
* by default; pass `preserveUndefined: true` to round-trip them explicitly.
|
|
3416
3650
|
*/
|
|
3417
|
-
declare function serializeCacheValue(value: unknown): Promise<unknown>;
|
|
3651
|
+
declare function serializeCacheValue(value: unknown, options?: CacheSerializationOptions | undefined): Promise<unknown>;
|
|
3418
3652
|
/** Revive one cached value, while preserving legacy JSON-round-tripped data. */
|
|
3419
3653
|
declare function deserializeCacheValue(value: unknown): unknown;
|
|
3420
|
-
/**
|
|
3421
|
-
|
|
3654
|
+
/**
|
|
3655
|
+
* Serialize all rich values captured in a cache recording before persistence.
|
|
3656
|
+
*
|
|
3657
|
+
* Undefined values are omitted by default; pass `preserveUndefined: true` to
|
|
3658
|
+
* retain the legacy explicit undefined wrappers in the recording payload.
|
|
3659
|
+
*/
|
|
3660
|
+
declare function serializeCacheRecording(recording: CacheRecording, options?: CacheSerializationOptions | undefined): Promise<CacheRecording>;
|
|
3422
3661
|
/** Revive all rich values captured in a cache recording after lookup. */
|
|
3423
3662
|
declare function deserializeCacheRecording(recording: CacheRecording): CacheRecording;
|
|
3424
3663
|
//#endregion
|
|
@@ -3637,6 +3876,15 @@ type CacheClearFilter = {
|
|
|
3637
3876
|
key?: string;
|
|
3638
3877
|
};
|
|
3639
3878
|
//#endregion
|
|
3879
|
+
//#region ../runner/src/recalculateDerivedAttributes.d.ts
|
|
3880
|
+
type RecalculateDerivedAttributesResult = {
|
|
3881
|
+
updated: true;
|
|
3882
|
+
caseDetail: CaseDetail;
|
|
3883
|
+
} | {
|
|
3884
|
+
updated: false;
|
|
3885
|
+
reason: string;
|
|
3886
|
+
};
|
|
3887
|
+
//#endregion
|
|
3640
3888
|
//#region ../runner/src/runner.d.ts
|
|
3641
3889
|
/** Imperative runner interface used by the server and CLI. */
|
|
3642
3890
|
type EvalRunner = {
|
|
@@ -3703,7 +3951,11 @@ type EvalRunner = {
|
|
|
3703
3951
|
*/
|
|
3704
3952
|
recomputeStatusesForEval(evalKey: string): Promise<{
|
|
3705
3953
|
updatedRuns: number;
|
|
3706
|
-
}>;
|
|
3954
|
+
}>; /** Recalculate configured LLM/API derived attributes for one persisted case trace. */
|
|
3955
|
+
recalculateDerivedAttributesForCase(params: {
|
|
3956
|
+
runId: string;
|
|
3957
|
+
caseId: string;
|
|
3958
|
+
}): Promise<RecalculateDerivedAttributesResult>;
|
|
3707
3959
|
/**
|
|
3708
3960
|
* Delete terminal persisted runs that touch one eval from memory and disk.
|
|
3709
3961
|
* Accepts the exact eval key, with a legacy fallback for unique eval ids.
|
|
@@ -3754,4 +4006,4 @@ declare function createRunner({
|
|
|
3754
4006
|
*/
|
|
3755
4007
|
declare function runCli(argv: string[]): Promise<void>;
|
|
3756
4008
|
//#endregion
|
|
3757
|
-
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallsConfigInput, type NumberDisplayOptions, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
4009
|
+
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheSerializationOptions, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, type DiscoveryIssue, EvalAssertionError, type EvalCacheConfig, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveConfig, type EvalDeriveContext, type EvalDeriveFn, type EvalDeriveMap, type EvalDeriveValueFn, type EvalDisplayStatus, type EvalExecuteContext, type EvalExpectation, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallPricingRate, type LlmCallPricingRegistry, type LlmCallsConfigInput, type NumberDisplayOptions, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|