@ls-stack/agent-eval 0.19.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-hAlVvT-Q.mjs → app-CmwmcUgG.mjs} +3 -3
- package/dist/apps/web/dist/assets/index-EXO08yya.js +118 -0
- package/dist/apps/web/dist/assets/index-r0dVFK0B.css +1 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-3zANEAhG.mjs → cli-DumvanQI.mjs} +10 -80
- package/dist/index.d.mts +131 -14
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-BBg_VUH5.mjs → runOrchestration-zYAcAPtS.mjs} +567 -24
- package/dist/{runner-DxlahWDo.mjs → runner-BcwyX9CO.mjs} +1 -1
- package/dist/{runner-RmZPRz-h.mjs → runner-Dy_PECaf.mjs} +2 -2
- package/dist/src-BoAJb4wC.mjs +3 -0
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +35 -14
- package/dist/apps/web/dist/assets/index-C761goIh.css +0 -1
- package/dist/apps/web/dist/assets/index-DS552a3u.js +0 -118
- package/dist/src-BC4OrajN.mjs +0 -3
package/dist/index.d.mts
CHANGED
|
@@ -39,8 +39,9 @@ type NumberDisplayOptions = {
|
|
|
39
39
|
/** Number notation used when rendering the value. */notation?: 'standard' | 'compact'; /** Compact style used when `notation: 'compact'` is enabled. */
|
|
40
40
|
compactDisplay?: 'short' | 'long'; /** String prepended to the rendered number, such as `$`. */
|
|
41
41
|
prefix?: string; /** String appended to the rendered number, such as ` ms`. */
|
|
42
|
-
suffix?: string; /**
|
|
43
|
-
|
|
42
|
+
suffix?: string; /** Minimum number of decimal places to render. */
|
|
43
|
+
minDecimalPlaces?: number; /** Maximum number of decimal places to render. */
|
|
44
|
+
maxDecimalPlaces?: number;
|
|
44
45
|
};
|
|
45
46
|
/** Schema for numeric presentation options used by number-formatted values. */
|
|
46
47
|
declare const numberDisplayOptionsSchema: z$1.ZodType<NumberDisplayOptions>;
|
|
@@ -98,7 +99,6 @@ declare const columnDefSchema: z$1.ZodObject<{
|
|
|
98
99
|
passThreshold: z$1.ZodOptional<z$1.ZodNumber>;
|
|
99
100
|
maxStars: z$1.ZodOptional<z$1.ZodNumber>;
|
|
100
101
|
hideInTable: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
101
|
-
sortable: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
102
102
|
align: z$1.ZodOptional<z$1.ZodEnum<{
|
|
103
103
|
left: "left";
|
|
104
104
|
center: "center";
|
|
@@ -412,6 +412,7 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
|
|
|
412
412
|
passFail: "passFail";
|
|
413
413
|
stars: "stars";
|
|
414
414
|
}>>;
|
|
415
|
+
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
415
416
|
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
416
417
|
}, z$1.core.$strip>], "kind">;
|
|
417
418
|
/** Single stat rendered in the EvalCard stats row. */
|
|
@@ -449,6 +450,7 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
|
|
|
449
450
|
passFail: "passFail";
|
|
450
451
|
stars: "stars";
|
|
451
452
|
}>>;
|
|
453
|
+
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
452
454
|
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
453
455
|
}, z$1.core.$strip>], "kind">>;
|
|
454
456
|
/** Ordered list of stats rendered in the EvalCard stats row. */
|
|
@@ -496,7 +498,6 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
496
498
|
passThreshold: z$1.ZodOptional<z$1.ZodNumber>;
|
|
497
499
|
maxStars: z$1.ZodOptional<z$1.ZodNumber>;
|
|
498
500
|
hideInTable: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
499
|
-
sortable: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
500
501
|
align: z$1.ZodOptional<z$1.ZodEnum<{
|
|
501
502
|
left: "left";
|
|
502
503
|
center: "center";
|
|
@@ -544,6 +545,7 @@ declare const evalSummarySchema: z$1.ZodObject<{
|
|
|
544
545
|
passFail: "passFail";
|
|
545
546
|
stars: "stars";
|
|
546
547
|
}>>;
|
|
548
|
+
numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
|
|
547
549
|
accent: z$1.ZodOptional<z$1.ZodBoolean>;
|
|
548
550
|
}, z$1.core.$strip>], "kind">>>;
|
|
549
551
|
charts: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -1473,6 +1475,36 @@ declare const trialSelectionModeSchema: z$1.ZodEnum<{
|
|
|
1473
1475
|
}>;
|
|
1474
1476
|
/** Strategy used to collapse repeated trials into one stored case result. */
|
|
1475
1477
|
type TrialSelectionMode = z$1.infer<typeof trialSelectionModeSchema>;
|
|
1478
|
+
/** Built-in eval-level output/column keys. */
|
|
1479
|
+
declare const defaultConfigKeySchema: z$1.ZodEnum<{
|
|
1480
|
+
apiCalls: "apiCalls";
|
|
1481
|
+
costUsd: "costUsd";
|
|
1482
|
+
llmTurns: "llmTurns";
|
|
1483
|
+
inputTokens: "inputTokens";
|
|
1484
|
+
outputTokens: "outputTokens";
|
|
1485
|
+
totalTokens: "totalTokens";
|
|
1486
|
+
cachedInputTokens: "cachedInputTokens";
|
|
1487
|
+
cacheCreationInputTokens: "cacheCreationInputTokens";
|
|
1488
|
+
reasoningTokens: "reasoningTokens";
|
|
1489
|
+
llmLatencyMs: "llmLatencyMs";
|
|
1490
|
+
}>;
|
|
1491
|
+
/** Built-in eval-level output/column key. */
|
|
1492
|
+
type DefaultConfigKey = z$1.infer<typeof defaultConfigKeySchema>;
|
|
1493
|
+
/** Removal config for built-in eval-level outputs and UI metadata. */
|
|
1494
|
+
declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<true>, z$1.ZodArray<z$1.ZodEnum<{
|
|
1495
|
+
apiCalls: "apiCalls";
|
|
1496
|
+
costUsd: "costUsd";
|
|
1497
|
+
llmTurns: "llmTurns";
|
|
1498
|
+
inputTokens: "inputTokens";
|
|
1499
|
+
outputTokens: "outputTokens";
|
|
1500
|
+
totalTokens: "totalTokens";
|
|
1501
|
+
cachedInputTokens: "cachedInputTokens";
|
|
1502
|
+
cacheCreationInputTokens: "cacheCreationInputTokens";
|
|
1503
|
+
reasoningTokens: "reasoningTokens";
|
|
1504
|
+
llmLatencyMs: "llmLatencyMs";
|
|
1505
|
+
}>>]>;
|
|
1506
|
+
/** Removal config for built-in eval-level outputs and UI metadata. */
|
|
1507
|
+
type RemoveDefaultConfig = z$1.infer<typeof removeDefaultConfigSchema>;
|
|
1476
1508
|
/** Render formats supported by an LLM-call metric in the UI. */
|
|
1477
1509
|
declare const llmCallMetricFormatSchema: z$1.ZodEnum<{
|
|
1478
1510
|
string: "string";
|
|
@@ -1561,6 +1593,21 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
|
|
|
1561
1593
|
}, z$1.core.$strip>;
|
|
1562
1594
|
/** User-defined API-call metric authored in `agent-evals.config.ts`. */
|
|
1563
1595
|
type ApiCallMetric = z$1.infer<typeof apiCallMetricSchema>;
|
|
1596
|
+
/**
|
|
1597
|
+
* Schema for one model/provider pricing entry used to derive LLM-call costs
|
|
1598
|
+
* from token counts when a span does not already record explicit USD costs.
|
|
1599
|
+
*/
|
|
1600
|
+
declare const llmCallPricingSchema: z$1.ZodObject<{
|
|
1601
|
+
model: z$1.ZodString;
|
|
1602
|
+
provider: z$1.ZodOptional<z$1.ZodString>;
|
|
1603
|
+
inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1604
|
+
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1605
|
+
cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1606
|
+
cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1607
|
+
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1608
|
+
}, z$1.core.$strip>;
|
|
1609
|
+
/** Model/provider pricing entry authored in `agent-evals.config.ts`. */
|
|
1610
|
+
type LlmCallPricing = z$1.infer<typeof llmCallPricingSchema>;
|
|
1564
1611
|
/** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
|
|
1565
1612
|
declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
1566
1613
|
kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -1573,6 +1620,7 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
|
1573
1620
|
cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1574
1621
|
reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1575
1622
|
totalTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1623
|
+
tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
|
|
1576
1624
|
cost: z$1.ZodOptional<z$1.ZodString>;
|
|
1577
1625
|
inputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1578
1626
|
outputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -1586,6 +1634,15 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
|
1586
1634
|
reasoning: z$1.ZodOptional<z$1.ZodString>;
|
|
1587
1635
|
toolCalls: z$1.ZodOptional<z$1.ZodString>;
|
|
1588
1636
|
}, z$1.core.$strip>>;
|
|
1637
|
+
pricing: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
1638
|
+
model: z$1.ZodString;
|
|
1639
|
+
provider: z$1.ZodOptional<z$1.ZodString>;
|
|
1640
|
+
inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1641
|
+
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1642
|
+
cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1643
|
+
cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1644
|
+
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1645
|
+
}, z$1.core.$strip>>>;
|
|
1589
1646
|
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
1590
1647
|
label: z$1.ZodString;
|
|
1591
1648
|
tooltip: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -1659,6 +1716,7 @@ type ResolvedLlmCallsConfig = {
|
|
|
1659
1716
|
cacheCreationInputTokens: string;
|
|
1660
1717
|
reasoningTokens: string;
|
|
1661
1718
|
totalTokens: string;
|
|
1719
|
+
tokensPerSecond: string;
|
|
1662
1720
|
cost: string;
|
|
1663
1721
|
inputCost: string;
|
|
1664
1722
|
outputCost: string;
|
|
@@ -1673,6 +1731,7 @@ type ResolvedLlmCallsConfig = {
|
|
|
1673
1731
|
toolCalls: string;
|
|
1674
1732
|
};
|
|
1675
1733
|
metrics: ResolvedLlmCallMetric[];
|
|
1734
|
+
pricing: ResolvedLlmCallPricing[];
|
|
1676
1735
|
};
|
|
1677
1736
|
/** Resolved API-calls config sent to the UI with all defaults applied. */
|
|
1678
1737
|
type ResolvedApiCallsConfig = {
|
|
@@ -1709,6 +1768,16 @@ type ResolvedApiCallMetric = {
|
|
|
1709
1768
|
numberFormat?: NumberDisplayOptions;
|
|
1710
1769
|
placements: ApiCallMetricPlacement[];
|
|
1711
1770
|
};
|
|
1771
|
+
/** Fully-resolved pricing entry used by the LLM calls extractor. */
|
|
1772
|
+
type ResolvedLlmCallPricing = {
|
|
1773
|
+
model: string;
|
|
1774
|
+
provider?: string;
|
|
1775
|
+
inputUsdPerMillion?: number;
|
|
1776
|
+
outputUsdPerMillion?: number;
|
|
1777
|
+
cachedInputUsdPerMillion?: number;
|
|
1778
|
+
cacheCreationInputUsdPerMillion?: number;
|
|
1779
|
+
reasoningUsdPerMillion?: number;
|
|
1780
|
+
};
|
|
1712
1781
|
/** Default LLM-calls config the UI uses before the workspace fetch resolves. */
|
|
1713
1782
|
declare const DEFAULT_LLM_CALLS_CONFIG: ResolvedLlmCallsConfig;
|
|
1714
1783
|
/** Default API-calls config the UI uses before the workspace fetch resolves. */
|
|
@@ -1722,6 +1791,8 @@ declare const DEFAULT_API_CALLS_CONFIG: ResolvedApiCallsConfig;
|
|
|
1722
1791
|
* attribute path.
|
|
1723
1792
|
* - Missing `metrics[].format` defaults to `'string'`.
|
|
1724
1793
|
* - Missing `metrics[].placements` defaults to `['body']`.
|
|
1794
|
+
* - Missing `pricing` defaults to an empty registry; explicit span costs still
|
|
1795
|
+
* take precedence over derived costs.
|
|
1725
1796
|
*/
|
|
1726
1797
|
declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
|
|
1727
1798
|
/**
|
|
@@ -1788,14 +1859,25 @@ type AgentEvalsConfig = {
|
|
|
1788
1859
|
* cachedInputTokens: 'usage.cache_read_input_tokens',
|
|
1789
1860
|
* },
|
|
1790
1861
|
* metrics: [
|
|
1791
|
-
* { label: 'Tokens/sec', path: 'tokensPerSecond', format: 'number',
|
|
1792
|
-
* numberFormat: { decimalPlaces: 1 }, placements: ['header', 'body'] },
|
|
1793
1862
|
* { label: 'Retries', path: 'retryCount', format: 'number' },
|
|
1794
1863
|
* ],
|
|
1864
|
+
* pricing: [
|
|
1865
|
+
* { model: 'gpt-4o-mini', provider: 'openai',
|
|
1866
|
+
* inputUsdPerMillion: 0.15, outputUsdPerMillion: 0.6 },
|
|
1867
|
+
* ],
|
|
1795
1868
|
* }
|
|
1796
1869
|
* ```
|
|
1797
1870
|
*/
|
|
1798
1871
|
llmCalls?: LlmCallsConfigInput;
|
|
1872
|
+
/**
|
|
1873
|
+
* Remove built-in eval-level outputs, columns, stats, and charts.
|
|
1874
|
+
*
|
|
1875
|
+
* Defaults are derived from trace spans using the resolved `llmCalls` and
|
|
1876
|
+
* `apiCalls` extraction configs. Set to `true` to remove all defaults, or
|
|
1877
|
+
* pass specific keys such as `['costUsd', 'apiCalls']` to remove only those
|
|
1878
|
+
* defaults globally. Per-eval removal is additive.
|
|
1879
|
+
*/
|
|
1880
|
+
removeDefaultConfig?: RemoveDefaultConfig;
|
|
1799
1881
|
/**
|
|
1800
1882
|
* Configuration for the "API calls" tab in the case-run drawer.
|
|
1801
1883
|
*
|
|
@@ -1902,6 +1984,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
1902
1984
|
cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1903
1985
|
reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1904
1986
|
totalTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1987
|
+
tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
|
|
1905
1988
|
cost: z$1.ZodOptional<z$1.ZodString>;
|
|
1906
1989
|
inputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1907
1990
|
outputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -1915,6 +1998,15 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
1915
1998
|
reasoning: z$1.ZodOptional<z$1.ZodString>;
|
|
1916
1999
|
toolCalls: z$1.ZodOptional<z$1.ZodString>;
|
|
1917
2000
|
}, z$1.core.$strip>>;
|
|
2001
|
+
pricing: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
2002
|
+
model: z$1.ZodString;
|
|
2003
|
+
provider: z$1.ZodOptional<z$1.ZodString>;
|
|
2004
|
+
inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2005
|
+
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2006
|
+
cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2007
|
+
cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2008
|
+
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2009
|
+
}, z$1.core.$strip>>>;
|
|
1918
2010
|
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
1919
2011
|
label: z$1.ZodString;
|
|
1920
2012
|
tooltip: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -1933,6 +2025,18 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
1933
2025
|
}>>>;
|
|
1934
2026
|
}, z$1.core.$strip>>>;
|
|
1935
2027
|
}, z$1.core.$strip>>;
|
|
2028
|
+
removeDefaultConfig: z$1.ZodOptional<z$1.ZodUnion<readonly [z$1.ZodLiteral<true>, z$1.ZodArray<z$1.ZodEnum<{
|
|
2029
|
+
apiCalls: "apiCalls";
|
|
2030
|
+
costUsd: "costUsd";
|
|
2031
|
+
llmTurns: "llmTurns";
|
|
2032
|
+
inputTokens: "inputTokens";
|
|
2033
|
+
outputTokens: "outputTokens";
|
|
2034
|
+
totalTokens: "totalTokens";
|
|
2035
|
+
cachedInputTokens: "cachedInputTokens";
|
|
2036
|
+
cacheCreationInputTokens: "cacheCreationInputTokens";
|
|
2037
|
+
reasoningTokens: "reasoningTokens";
|
|
2038
|
+
llmLatencyMs: "llmLatencyMs";
|
|
2039
|
+
}>>]>>;
|
|
1936
2040
|
apiCalls: z$1.ZodOptional<z$1.ZodObject<{
|
|
1937
2041
|
kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
1938
2042
|
attributes: z$1.ZodOptional<z$1.ZodObject<{
|
|
@@ -2001,6 +2105,7 @@ type LlmCallEntry = {
|
|
|
2001
2105
|
cacheCreationInputTokens: number | null;
|
|
2002
2106
|
reasoningTokens: number | null;
|
|
2003
2107
|
totalTokens: number | null;
|
|
2108
|
+
tokensPerSecond: number | null;
|
|
2004
2109
|
costUsd: number | null;
|
|
2005
2110
|
inputCostUsd: number | null;
|
|
2006
2111
|
outputCostUsd: number | null;
|
|
@@ -2024,9 +2129,11 @@ type LlmCallEntry = {
|
|
|
2024
2129
|
* shape consumed by the LLM calls tab.
|
|
2025
2130
|
*
|
|
2026
2131
|
* Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
|
|
2027
|
-
* (`model`, token counts, cost, etc.) are read via
|
|
2028
|
-
* the configured paths, with safe coercion to
|
|
2029
|
-
*
|
|
2132
|
+
* (`model`, token counts, explicit cost, etc.) are read via
|
|
2133
|
+
* `getNestedAttribute` from the configured paths, with safe coercion to
|
|
2134
|
+
* `string | null` / `number | null`. When explicit USD costs are absent,
|
|
2135
|
+
* configured model pricing derives per-token-type costs from token counts.
|
|
2136
|
+
* `totalTokens` falls back to a sum of input + output + cached when no
|
|
2030
2137
|
* explicit total attribute is present. The `steps` attribute path may resolve
|
|
2031
2138
|
* to either a number (rendered as the inference-round count) or an array of
|
|
2032
2139
|
* per-step detail objects (rendered as a Steps section in the body, with
|
|
@@ -2679,7 +2786,7 @@ type EvalColumnOverride = {
|
|
|
2679
2786
|
/**
|
|
2680
2787
|
* Extra options for `format: 'number'`.
|
|
2681
2788
|
*
|
|
2682
|
-
* Use this to add a prefix or suffix,
|
|
2789
|
+
* Use this to add a prefix or suffix, control minimum and maximum decimal
|
|
2683
2790
|
* places, or switch to compact notation such as `1.2K`.
|
|
2684
2791
|
*/
|
|
2685
2792
|
numberFormat?: NumberDisplayOptions;
|
|
@@ -2687,8 +2794,7 @@ type EvalColumnOverride = {
|
|
|
2687
2794
|
* Hides the column from the runs table while keeping it available in detail
|
|
2688
2795
|
* views and raw output data.
|
|
2689
2796
|
*/
|
|
2690
|
-
hideInTable?: boolean; /**
|
|
2691
|
-
sortable?: boolean; /** Horizontal alignment used when rendering the column cells. */
|
|
2797
|
+
hideInTable?: boolean; /** Horizontal alignment used when rendering the column cells. */
|
|
2692
2798
|
align?: 'left' | 'center' | 'right';
|
|
2693
2799
|
/**
|
|
2694
2800
|
* Maximum number of stars used when `format: 'stars'`.
|
|
@@ -2849,7 +2955,8 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
|
|
|
2849
2955
|
* column across the latest run's cases — `key` must match one of the eval's
|
|
2850
2956
|
* score or column keys, and only finite numeric values participate in the
|
|
2851
2957
|
* reduction. When no case has a numeric value for the key the stat renders
|
|
2852
|
-
* an em dash. `label` and `
|
|
2958
|
+
* an em dash. `label`, `format`, and `numberFormat` default to the matching
|
|
2959
|
+
* `ColumnDef`.
|
|
2853
2960
|
*/
|
|
2854
2961
|
stats?: EvalStatsConfig;
|
|
2855
2962
|
/**
|
|
@@ -2867,6 +2974,16 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
|
|
|
2867
2974
|
* with `passThreshold`.
|
|
2868
2975
|
*/
|
|
2869
2976
|
charts?: EvalChartsConfig;
|
|
2977
|
+
/**
|
|
2978
|
+
* Remove built-in eval-level outputs, columns, stats, and charts.
|
|
2979
|
+
*
|
|
2980
|
+
* By default the runner derives usage fields from trace spans using the
|
|
2981
|
+
* workspace `llmCalls` and `apiCalls` configs. Set to `true` to remove all
|
|
2982
|
+
* defaults for this eval, or pass specific keys such as
|
|
2983
|
+
* `['costUsd', 'apiCalls']` to remove only those defaults. Per-eval removals
|
|
2984
|
+
* are combined with global removals.
|
|
2985
|
+
*/
|
|
2986
|
+
removeDefaultConfig?: true | DefaultConfigKey[];
|
|
2870
2987
|
};
|
|
2871
2988
|
/**
|
|
2872
2989
|
* Complete authored eval definition consumed by `defineEval`.
|
|
@@ -3460,4 +3577,4 @@ declare function createRunner({
|
|
|
3460
3577
|
*/
|
|
3461
3578
|
declare function runCli(argv: string[]): Promise<void>;
|
|
3462
3579
|
//#endregion
|
|
3463
|
-
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
3580
|
+
export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallsConfigInput, type NumberDisplayOptions, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as
|
|
2
|
-
import { n as createRunner, t as runCli } from "./cli-
|
|
3
|
-
import "./src-
|
|
4
|
-
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
1
|
+
import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as setEvalOutput, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as incrementEvalOutput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as runInEvalRuntimeScope, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as nextEvalId, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as startEvalBackgroundJob, Mt as cacheModeSchema, N as getEvalTitle, Nn as repoFile, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as runInEvalScope, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as defineEval, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getEvalCaseInput, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as mergeEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as appendToEvalOutput, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalLog, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as setScopeCacheContext, jt as cacheListItemSchema, k as extractCacheHits, kn as runInExistingEvalScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vt as evalChartAxisSchema, wn as isInEvalScope, wt as evalChartTypeSchema, xn as getCurrentScope, xt as evalChartConfigSchema, yn as evalAssert, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-zYAcAPtS.mjs";
|
|
2
|
+
import { n as createRunner, t as runCli } from "./cli-DumvanQI.mjs";
|
|
3
|
+
import "./src-BoAJb4wC.mjs";
|
|
4
|
+
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, vn as configureEvalRunLogs, w as createFsCacheStore, z as runSummarySchema } from "./runOrchestration-zYAcAPtS.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { relative } from "node:path";
|