@ls-stack/agent-eval 0.21.0 → 0.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-CmwmcUgG.mjs → app-moDHbg1O.mjs} +3 -3
- package/dist/apps/web/dist/assets/index-AUDD3rNB.js +118 -0
- package/dist/apps/web/dist/index.html +1 -1
- package/dist/bin.mjs +1 -1
- package/dist/{cli-DumvanQI.mjs → cli-C0EtHhEO.mjs} +3 -3
- package/dist/index.d.mts +46 -53
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-zYAcAPtS.mjs → runOrchestration-D1edUDhp.mjs} +109 -78
- package/dist/{runner-Dy_PECaf.mjs → runner-C9nP2VKL.mjs} +2 -2
- package/dist/{runner-BcwyX9CO.mjs → runner-CyRhIzci.mjs} +1 -1
- package/dist/src-D-HuV8I-.mjs +3 -0
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +23 -17
- package/dist/apps/web/dist/assets/index-EXO08yya.js +0 -118
- package/dist/src-BoAJb4wC.mjs +0 -3
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
|
|
26
26
|
rel="stylesheet"
|
|
27
27
|
/>
|
|
28
|
-
<script type="module" crossorigin src="/assets/index-
|
|
28
|
+
<script type="module" crossorigin src="/assets/index-AUDD3rNB.js"></script>
|
|
29
29
|
<link rel="stylesheet" crossorigin href="/assets/index-r0dVFK0B.css">
|
|
30
30
|
</head>
|
|
31
31
|
<body>
|
package/dist/bin.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { C as validateCharts, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, N as getEvalTitle, P as getEvalDisplayStatus, S as normalizeScoreDef, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as loadConfig, c as persistCaseDetail, d as recomputePersistedCaseStatus, et as resolveApiCallsConfig, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, tt as resolveLlmCallsConfig, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, w as createFsCacheStore, x as buildDeclaredColumnDefs, y as resolveEvalDefaultConfig, z as runSummarySchema } from "./runOrchestration-
|
|
1
|
+
import { C as validateCharts, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, N as getEvalTitle, P as getEvalDisplayStatus, S as normalizeScoreDef, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as loadConfig, c as persistCaseDetail, d as recomputePersistedCaseStatus, et as resolveApiCallsConfig, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, tt as resolveLlmCallsConfig, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, w as createFsCacheStore, x as buildDeclaredColumnDefs, y as resolveEvalDefaultConfig, z as runSummarySchema } from "./runOrchestration-D1edUDhp.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
|
|
4
4
|
import { dirname, join, relative, resolve } from "node:path";
|
|
@@ -959,8 +959,8 @@ async function commandApp(args) {
|
|
|
959
959
|
const { serve } = await import("@hono/node-server");
|
|
960
960
|
const bundledWebDist = resolve(currentDir, "apps/web/dist");
|
|
961
961
|
if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
|
|
962
|
-
const appModule = await import("./app-
|
|
963
|
-
const runnerModule = await import("./runner-
|
|
962
|
+
const appModule = await import("./app-moDHbg1O.mjs");
|
|
963
|
+
const runnerModule = await import("./runner-CyRhIzci.mjs");
|
|
964
964
|
if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
|
|
965
965
|
if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
|
|
966
966
|
await runnerModule.initRunner();
|
package/dist/index.d.mts
CHANGED
|
@@ -645,7 +645,7 @@ declare const caseRowSchema: z$1.ZodObject<{
|
|
|
645
645
|
cancelled: "cancelled";
|
|
646
646
|
pending: "pending";
|
|
647
647
|
}>;
|
|
648
|
-
|
|
648
|
+
durationMs: z$1.ZodNullable<z$1.ZodNumber>;
|
|
649
649
|
costUsd: z$1.ZodOptional<z$1.ZodNullable<z$1.ZodNumber>>;
|
|
650
650
|
columns: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnion<readonly [z$1.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z$1.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z$1.ZodUnion<readonly [z$1.ZodObject<{
|
|
651
651
|
source: z$1.ZodLiteral<"repo">;
|
|
@@ -1486,7 +1486,7 @@ declare const defaultConfigKeySchema: z$1.ZodEnum<{
|
|
|
1486
1486
|
cachedInputTokens: "cachedInputTokens";
|
|
1487
1487
|
cacheCreationInputTokens: "cacheCreationInputTokens";
|
|
1488
1488
|
reasoningTokens: "reasoningTokens";
|
|
1489
|
-
|
|
1489
|
+
llmDurationMs: "llmDurationMs";
|
|
1490
1490
|
}>;
|
|
1491
1491
|
/** Built-in eval-level output/column key. */
|
|
1492
1492
|
type DefaultConfigKey = z$1.infer<typeof defaultConfigKeySchema>;
|
|
@@ -1501,7 +1501,7 @@ declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<t
|
|
|
1501
1501
|
cachedInputTokens: "cachedInputTokens";
|
|
1502
1502
|
cacheCreationInputTokens: "cacheCreationInputTokens";
|
|
1503
1503
|
reasoningTokens: "reasoningTokens";
|
|
1504
|
-
|
|
1504
|
+
llmDurationMs: "llmDurationMs";
|
|
1505
1505
|
}>>]>;
|
|
1506
1506
|
/** Removal config for built-in eval-level outputs and UI metadata. */
|
|
1507
1507
|
type RemoveDefaultConfig = z$1.infer<typeof removeDefaultConfigSchema>;
|
|
@@ -1595,7 +1595,7 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
|
|
|
1595
1595
|
type ApiCallMetric = z$1.infer<typeof apiCallMetricSchema>;
|
|
1596
1596
|
/**
|
|
1597
1597
|
* Schema for one model/provider pricing entry used to derive LLM-call costs
|
|
1598
|
-
* from token counts
|
|
1598
|
+
* from token counts.
|
|
1599
1599
|
*/
|
|
1600
1600
|
declare const llmCallPricingSchema: z$1.ZodObject<{
|
|
1601
1601
|
model: z$1.ZodString;
|
|
@@ -1604,6 +1604,7 @@ declare const llmCallPricingSchema: z$1.ZodObject<{
|
|
|
1604
1604
|
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1605
1605
|
cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1606
1606
|
cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1607
|
+
cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1607
1608
|
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1608
1609
|
}, z$1.core.$strip>;
|
|
1609
1610
|
/** Model/provider pricing entry authored in `agent-evals.config.ts`. */
|
|
@@ -1618,15 +1619,9 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
|
1618
1619
|
outputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1619
1620
|
cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1620
1621
|
cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1622
|
+
cacheCreationInput1hTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1621
1623
|
reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1622
|
-
|
|
1623
|
-
tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
|
|
1624
|
-
cost: z$1.ZodOptional<z$1.ZodString>;
|
|
1625
|
-
inputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1626
|
-
outputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1627
|
-
cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1628
|
-
cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1629
|
-
reasoningCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1624
|
+
latencyMs: z$1.ZodOptional<z$1.ZodString>;
|
|
1630
1625
|
steps: z$1.ZodOptional<z$1.ZodString>;
|
|
1631
1626
|
finishReason: z$1.ZodOptional<z$1.ZodString>;
|
|
1632
1627
|
input: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -1641,6 +1636,7 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
|
|
|
1641
1636
|
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1642
1637
|
cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1643
1638
|
cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1639
|
+
cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1644
1640
|
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1645
1641
|
}, z$1.core.$strip>>>;
|
|
1646
1642
|
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -1714,15 +1710,9 @@ type ResolvedLlmCallsConfig = {
|
|
|
1714
1710
|
outputTokens: string;
|
|
1715
1711
|
cachedInputTokens: string;
|
|
1716
1712
|
cacheCreationInputTokens: string;
|
|
1713
|
+
cacheCreationInput1hTokens: string;
|
|
1717
1714
|
reasoningTokens: string;
|
|
1718
|
-
|
|
1719
|
-
tokensPerSecond: string;
|
|
1720
|
-
cost: string;
|
|
1721
|
-
inputCost: string;
|
|
1722
|
-
outputCost: string;
|
|
1723
|
-
cachedInputCost: string;
|
|
1724
|
-
cacheCreationInputCost: string;
|
|
1725
|
-
reasoningCost: string;
|
|
1715
|
+
latencyMs: string;
|
|
1726
1716
|
steps: string;
|
|
1727
1717
|
finishReason: string;
|
|
1728
1718
|
input: string;
|
|
@@ -1776,6 +1766,7 @@ type ResolvedLlmCallPricing = {
|
|
|
1776
1766
|
outputUsdPerMillion?: number;
|
|
1777
1767
|
cachedInputUsdPerMillion?: number;
|
|
1778
1768
|
cacheCreationInputUsdPerMillion?: number;
|
|
1769
|
+
cacheCreationInput1hUsdPerMillion?: number;
|
|
1779
1770
|
reasoningUsdPerMillion?: number;
|
|
1780
1771
|
};
|
|
1781
1772
|
/** Default LLM-calls config the UI uses before the workspace fetch resolves. */
|
|
@@ -1791,8 +1782,8 @@ declare const DEFAULT_API_CALLS_CONFIG: ResolvedApiCallsConfig;
|
|
|
1791
1782
|
* attribute path.
|
|
1792
1783
|
* - Missing `metrics[].format` defaults to `'string'`.
|
|
1793
1784
|
* - Missing `metrics[].placements` defaults to `['body']`.
|
|
1794
|
-
* - Missing `pricing` defaults to an empty registry;
|
|
1795
|
-
*
|
|
1785
|
+
* - Missing `pricing` defaults to an empty registry; built-in costs are only
|
|
1786
|
+
* derived from configured pricing and token counts.
|
|
1796
1787
|
*/
|
|
1797
1788
|
declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
|
|
1798
1789
|
/**
|
|
@@ -1846,10 +1837,10 @@ type AgentEvalsConfig = {
|
|
|
1846
1837
|
*
|
|
1847
1838
|
* Determines which trace spans are treated as LLM calls (`kinds`), how
|
|
1848
1839
|
* structured fields like `model` and `usage.inputTokens` are read from
|
|
1849
|
-
* span attributes,
|
|
1850
|
-
*
|
|
1851
|
-
* defaults; the LLM calls tab is
|
|
1852
|
-
* matching span exists in a case run.
|
|
1840
|
+
* span attributes, which pricing table derives built-in costs, and which
|
|
1841
|
+
* custom user-defined metrics are surfaced on each call. All fields are
|
|
1842
|
+
* optional and fall back to the documented defaults; the LLM calls tab is
|
|
1843
|
+
* shown automatically when at least one matching span exists in a case run.
|
|
1853
1844
|
*
|
|
1854
1845
|
* @example
|
|
1855
1846
|
* ```ts
|
|
@@ -1982,15 +1973,9 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
1982
1973
|
outputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1983
1974
|
cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1984
1975
|
cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1976
|
+
cacheCreationInput1hTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1985
1977
|
reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
|
|
1986
|
-
|
|
1987
|
-
tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
|
|
1988
|
-
cost: z$1.ZodOptional<z$1.ZodString>;
|
|
1989
|
-
inputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1990
|
-
outputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1991
|
-
cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1992
|
-
cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1993
|
-
reasoningCost: z$1.ZodOptional<z$1.ZodString>;
|
|
1978
|
+
latencyMs: z$1.ZodOptional<z$1.ZodString>;
|
|
1994
1979
|
steps: z$1.ZodOptional<z$1.ZodString>;
|
|
1995
1980
|
finishReason: z$1.ZodOptional<z$1.ZodString>;
|
|
1996
1981
|
input: z$1.ZodOptional<z$1.ZodString>;
|
|
@@ -2005,6 +1990,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
2005
1990
|
outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2006
1991
|
cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2007
1992
|
cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
1993
|
+
cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2008
1994
|
reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
|
|
2009
1995
|
}, z$1.core.$strip>>>;
|
|
2010
1996
|
metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
|
|
@@ -2035,7 +2021,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
|
|
|
2035
2021
|
cachedInputTokens: "cachedInputTokens";
|
|
2036
2022
|
cacheCreationInputTokens: "cacheCreationInputTokens";
|
|
2037
2023
|
reasoningTokens: "reasoningTokens";
|
|
2038
|
-
|
|
2024
|
+
llmDurationMs: "llmDurationMs";
|
|
2039
2025
|
}>>]>>;
|
|
2040
2026
|
apiCalls: z$1.ZodOptional<z$1.ZodObject<{
|
|
2041
2027
|
kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
|
|
@@ -2104,7 +2090,8 @@ type LlmCallEntry = {
|
|
|
2104
2090
|
cachedInputTokens: number | null;
|
|
2105
2091
|
cacheCreationInputTokens: number | null;
|
|
2106
2092
|
reasoningTokens: number | null;
|
|
2107
|
-
totalTokens: number | null;
|
|
2093
|
+
totalTokens: number | null; /** Time to first token for the LLM call in milliseconds, when reported by the span. */
|
|
2094
|
+
latencyMs: number | null;
|
|
2108
2095
|
tokensPerSecond: number | null;
|
|
2109
2096
|
costUsd: number | null;
|
|
2110
2097
|
inputCostUsd: number | null;
|
|
@@ -2114,8 +2101,8 @@ type LlmCallEntry = {
|
|
|
2114
2101
|
reasoningCostUsd: number | null; /** Number of inference rounds. Derived from the array length when `stepDetails` is set. */
|
|
2115
2102
|
stepCount: number | null; /** Per-step breakdown when the configured `steps` attribute resolves to an array. */
|
|
2116
2103
|
stepDetails: unknown[] | null;
|
|
2117
|
-
finishReason: string | null;
|
|
2118
|
-
|
|
2104
|
+
finishReason: string | null; /** Elapsed LLM call span duration in milliseconds. */
|
|
2105
|
+
durationMs: number | null;
|
|
2119
2106
|
input: unknown;
|
|
2120
2107
|
output: unknown;
|
|
2121
2108
|
reasoning: unknown;
|
|
@@ -2129,16 +2116,22 @@ type LlmCallEntry = {
|
|
|
2129
2116
|
* shape consumed by the LLM calls tab.
|
|
2130
2117
|
*
|
|
2131
2118
|
* Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
|
|
2132
|
-
* (`model`, token counts,
|
|
2119
|
+
* (`model`, token counts, latency, etc.) are read via
|
|
2133
2120
|
* `getNestedAttribute` from the configured paths, with safe coercion to
|
|
2134
|
-
* `string | null` / `number | null`.
|
|
2135
|
-
*
|
|
2136
|
-
* `
|
|
2137
|
-
*
|
|
2138
|
-
*
|
|
2139
|
-
*
|
|
2140
|
-
*
|
|
2141
|
-
*
|
|
2121
|
+
* `string | null` / `number | null`. `latencyMs` is an explicit
|
|
2122
|
+
* time-to-first-token attribute; full span elapsed time is reported separately
|
|
2123
|
+
* as `durationMs`. Built-in USD costs are derived only from configured model
|
|
2124
|
+
* pricing and token counts. `totalTokens` is always derived from input +
|
|
2125
|
+
* output tokens. Cached input and cache creation tokens are reported
|
|
2126
|
+
* separately because they are subsets of input/output usage. The main cache
|
|
2127
|
+
* creation token field is treated as the total write count; optional one-hour
|
|
2128
|
+
* cache creation tokens only split that total for cost calculation. Base input
|
|
2129
|
+
* cost uses input minus cache read/write tokens so cached tokens are not
|
|
2130
|
+
* charged twice. Cache read/write costs still contribute to the total USD cost
|
|
2131
|
+
* at their configured rates. The `steps` attribute path may resolve to an array
|
|
2132
|
+
* of per-step detail objects, with `stepCount` derived from the array length.
|
|
2133
|
+
* `durationMs` and `tokensPerSecond` are `null` while the span is still
|
|
2134
|
+
* running. User-defined `metrics` whose path resolves to
|
|
2142
2135
|
* `undefined` are dropped, but `null`, `0`, and `false` are preserved as
|
|
2143
2136
|
* legitimate values worth displaying. Original span order is preserved so the
|
|
2144
2137
|
* LLM calls tab matches the ordering in the Trace tab.
|
|
@@ -2163,8 +2156,8 @@ type ApiCallEntry = {
|
|
|
2163
2156
|
status: EvalTraceSpan['status'];
|
|
2164
2157
|
method: string | null;
|
|
2165
2158
|
url: string | null;
|
|
2166
|
-
statusCode: number | null;
|
|
2167
|
-
|
|
2159
|
+
statusCode: number | null; /** Elapsed API call duration in milliseconds. */
|
|
2160
|
+
durationMs: number | null;
|
|
2168
2161
|
request: unknown;
|
|
2169
2162
|
response: unknown;
|
|
2170
2163
|
requestBody: unknown;
|
|
@@ -2181,10 +2174,10 @@ type ApiCallEntry = {
|
|
|
2181
2174
|
*
|
|
2182
2175
|
* Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
|
|
2183
2176
|
* (`method`, `url`, `statusCode`, etc.) are read via `getNestedAttribute` from
|
|
2184
|
-
* the configured paths. `durationMs` takes precedence
|
|
2185
|
-
* fallback to the span start/end timestamps. User-defined `metrics`
|
|
2186
|
-
* resolves to `undefined` are dropped, but `null`, `0`, and `false`
|
|
2187
|
-
* preserved as legitimate values worth displaying. Original span order is
|
|
2177
|
+
* the configured paths. An explicit `durationMs` attribute takes precedence,
|
|
2178
|
+
* with a fallback to the span start/end timestamps. User-defined `metrics`
|
|
2179
|
+
* whose path resolves to `undefined` are dropped, but `null`, `0`, and `false`
|
|
2180
|
+
* are preserved as legitimate values worth displaying. Original span order is
|
|
2188
2181
|
* preserved so the API calls tab matches the ordering in the Trace tab.
|
|
2189
2182
|
*/
|
|
2190
2183
|
declare function extractApiCalls(spans: EvalTraceSpan[], config: ResolvedApiCallsConfig): ApiCallEntry[];
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as setEvalOutput, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as incrementEvalOutput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as runInEvalRuntimeScope, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as nextEvalId, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as startEvalBackgroundJob, Mt as cacheModeSchema, N as getEvalTitle, Nn as repoFile, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as runInEvalScope, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as defineEval, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getEvalCaseInput, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as mergeEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as appendToEvalOutput, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalLog, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as setScopeCacheContext, jt as cacheListItemSchema, k as extractCacheHits, kn as runInExistingEvalScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vt as evalChartAxisSchema, wn as isInEvalScope, wt as evalChartTypeSchema, xn as getCurrentScope, xt as evalChartConfigSchema, yn as evalAssert, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-
|
|
2
|
-
import { n as createRunner, t as runCli } from "./cli-
|
|
3
|
-
import "./src-
|
|
1
|
+
import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as setEvalOutput, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as incrementEvalOutput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as runInEvalRuntimeScope, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as nextEvalId, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as startEvalBackgroundJob, Mt as cacheModeSchema, N as getEvalTitle, Nn as repoFile, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as runInEvalScope, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as defineEval, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getEvalCaseInput, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as mergeEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as appendToEvalOutput, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalLog, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as setScopeCacheContext, jt as cacheListItemSchema, k as extractCacheHits, kn as runInExistingEvalScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vt as evalChartAxisSchema, wn as isInEvalScope, wt as evalChartTypeSchema, xn as getCurrentScope, xt as evalChartConfigSchema, yn as evalAssert, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-D1edUDhp.mjs";
|
|
2
|
+
import { n as createRunner, t as runCli } from "./cli-C0EtHhEO.mjs";
|
|
3
|
+
import "./src-D-HuV8I-.mjs";
|
|
4
4
|
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, vn as configureEvalRunLogs, w as createFsCacheStore, z as runSummarySchema } from "./runOrchestration-
|
|
1
|
+
import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, vn as configureEvalRunLogs, w as createFsCacheStore, z as runSummarySchema } from "./runOrchestration-D1edUDhp.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { relative } from "node:path";
|