@ls-stack/agent-eval 0.21.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-EXO08yya.js"></script>
28
+ <script type="module" crossorigin src="/assets/index-AUDD3rNB.js"></script>
29
29
  <link rel="stylesheet" crossorigin href="/assets/index-r0dVFK0B.css">
30
30
  </head>
31
31
  <body>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-DumvanQI.mjs";
2
+ import { t as runCli } from "./cli-C0EtHhEO.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { C as validateCharts, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, N as getEvalTitle, P as getEvalDisplayStatus, S as normalizeScoreDef, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as loadConfig, c as persistCaseDetail, d as recomputePersistedCaseStatus, et as resolveApiCallsConfig, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, tt as resolveLlmCallsConfig, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, w as createFsCacheStore, x as buildDeclaredColumnDefs, y as resolveEvalDefaultConfig, z as runSummarySchema } from "./runOrchestration-zYAcAPtS.mjs";
1
+ import { C as validateCharts, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, N as getEvalTitle, P as getEvalDisplayStatus, S as normalizeScoreDef, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as loadConfig, c as persistCaseDetail, d as recomputePersistedCaseStatus, et as resolveApiCallsConfig, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, tt as resolveLlmCallsConfig, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, w as createFsCacheStore, x as buildDeclaredColumnDefs, y as resolveEvalDefaultConfig, z as runSummarySchema } from "./runOrchestration-D1edUDhp.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { dirname, join, relative, resolve } from "node:path";
@@ -959,8 +959,8 @@ async function commandApp(args) {
959
959
  const { serve } = await import("@hono/node-server");
960
960
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
961
961
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
962
- const appModule = await import("./app-CmwmcUgG.mjs");
963
- const runnerModule = await import("./runner-BcwyX9CO.mjs");
962
+ const appModule = await import("./app-moDHbg1O.mjs");
963
+ const runnerModule = await import("./runner-CyRhIzci.mjs");
964
964
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
965
965
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
966
966
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -645,7 +645,7 @@ declare const caseRowSchema: z$1.ZodObject<{
645
645
  cancelled: "cancelled";
646
646
  pending: "pending";
647
647
  }>;
648
- latencyMs: z$1.ZodNullable<z$1.ZodNumber>;
648
+ durationMs: z$1.ZodNullable<z$1.ZodNumber>;
649
649
  costUsd: z$1.ZodOptional<z$1.ZodNullable<z$1.ZodNumber>>;
650
650
  columns: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnion<readonly [z$1.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z$1.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z$1.ZodUnion<readonly [z$1.ZodObject<{
651
651
  source: z$1.ZodLiteral<"repo">;
@@ -1486,7 +1486,7 @@ declare const defaultConfigKeySchema: z$1.ZodEnum<{
1486
1486
  cachedInputTokens: "cachedInputTokens";
1487
1487
  cacheCreationInputTokens: "cacheCreationInputTokens";
1488
1488
  reasoningTokens: "reasoningTokens";
1489
- llmLatencyMs: "llmLatencyMs";
1489
+ llmDurationMs: "llmDurationMs";
1490
1490
  }>;
1491
1491
  /** Built-in eval-level output/column key. */
1492
1492
  type DefaultConfigKey = z$1.infer<typeof defaultConfigKeySchema>;
@@ -1501,7 +1501,7 @@ declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<t
1501
1501
  cachedInputTokens: "cachedInputTokens";
1502
1502
  cacheCreationInputTokens: "cacheCreationInputTokens";
1503
1503
  reasoningTokens: "reasoningTokens";
1504
- llmLatencyMs: "llmLatencyMs";
1504
+ llmDurationMs: "llmDurationMs";
1505
1505
  }>>]>;
1506
1506
  /** Removal config for built-in eval-level outputs and UI metadata. */
1507
1507
  type RemoveDefaultConfig = z$1.infer<typeof removeDefaultConfigSchema>;
@@ -1595,7 +1595,7 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
1595
1595
  type ApiCallMetric = z$1.infer<typeof apiCallMetricSchema>;
1596
1596
  /**
1597
1597
  * Schema for one model/provider pricing entry used to derive LLM-call costs
1598
- * from token counts when a span does not already record explicit USD costs.
1598
+ * from token counts.
1599
1599
  */
1600
1600
  declare const llmCallPricingSchema: z$1.ZodObject<{
1601
1601
  model: z$1.ZodString;
@@ -1604,6 +1604,7 @@ declare const llmCallPricingSchema: z$1.ZodObject<{
1604
1604
  outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1605
1605
  cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1606
1606
  cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1607
+ cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1607
1608
  reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1608
1609
  }, z$1.core.$strip>;
1609
1610
  /** Model/provider pricing entry authored in `agent-evals.config.ts`. */
@@ -1618,15 +1619,9 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
1618
1619
  outputTokens: z$1.ZodOptional<z$1.ZodString>;
1619
1620
  cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
1620
1621
  cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
1622
+ cacheCreationInput1hTokens: z$1.ZodOptional<z$1.ZodString>;
1621
1623
  reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
1622
- totalTokens: z$1.ZodOptional<z$1.ZodString>;
1623
- tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
1624
- cost: z$1.ZodOptional<z$1.ZodString>;
1625
- inputCost: z$1.ZodOptional<z$1.ZodString>;
1626
- outputCost: z$1.ZodOptional<z$1.ZodString>;
1627
- cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
1628
- cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
1629
- reasoningCost: z$1.ZodOptional<z$1.ZodString>;
1624
+ latencyMs: z$1.ZodOptional<z$1.ZodString>;
1630
1625
  steps: z$1.ZodOptional<z$1.ZodString>;
1631
1626
  finishReason: z$1.ZodOptional<z$1.ZodString>;
1632
1627
  input: z$1.ZodOptional<z$1.ZodString>;
@@ -1641,6 +1636,7 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
1641
1636
  outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1642
1637
  cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1643
1638
  cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1639
+ cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1644
1640
  reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1645
1641
  }, z$1.core.$strip>>>;
1646
1642
  metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
@@ -1714,15 +1710,9 @@ type ResolvedLlmCallsConfig = {
1714
1710
  outputTokens: string;
1715
1711
  cachedInputTokens: string;
1716
1712
  cacheCreationInputTokens: string;
1713
+ cacheCreationInput1hTokens: string;
1717
1714
  reasoningTokens: string;
1718
- totalTokens: string;
1719
- tokensPerSecond: string;
1720
- cost: string;
1721
- inputCost: string;
1722
- outputCost: string;
1723
- cachedInputCost: string;
1724
- cacheCreationInputCost: string;
1725
- reasoningCost: string;
1715
+ latencyMs: string;
1726
1716
  steps: string;
1727
1717
  finishReason: string;
1728
1718
  input: string;
@@ -1776,6 +1766,7 @@ type ResolvedLlmCallPricing = {
1776
1766
  outputUsdPerMillion?: number;
1777
1767
  cachedInputUsdPerMillion?: number;
1778
1768
  cacheCreationInputUsdPerMillion?: number;
1769
+ cacheCreationInput1hUsdPerMillion?: number;
1779
1770
  reasoningUsdPerMillion?: number;
1780
1771
  };
1781
1772
  /** Default LLM-calls config the UI uses before the workspace fetch resolves. */
@@ -1791,8 +1782,8 @@ declare const DEFAULT_API_CALLS_CONFIG: ResolvedApiCallsConfig;
1791
1782
  * attribute path.
1792
1783
  * - Missing `metrics[].format` defaults to `'string'`.
1793
1784
  * - Missing `metrics[].placements` defaults to `['body']`.
1794
- * - Missing `pricing` defaults to an empty registry; explicit span costs still
1795
- * take precedence over derived costs.
1785
+ * - Missing `pricing` defaults to an empty registry; built-in costs are only
1786
+ * derived from configured pricing and token counts.
1796
1787
  */
1797
1788
  declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
1798
1789
  /**
@@ -1846,10 +1837,10 @@ type AgentEvalsConfig = {
1846
1837
  *
1847
1838
  * Determines which trace spans are treated as LLM calls (`kinds`), how
1848
1839
  * structured fields like `model` and `usage.inputTokens` are read from
1849
- * span attributes, and which custom user-defined metrics are surfaced on
1850
- * each call. All fields are optional and fall back to the documented
1851
- * defaults; the LLM calls tab is shown automatically when at least one
1852
- * matching span exists in a case run.
1840
+ * span attributes, which pricing table derives built-in costs, and which
1841
+ * custom user-defined metrics are surfaced on each call. All fields are
1842
+ * optional and fall back to the documented defaults; the LLM calls tab is
1843
+ * shown automatically when at least one matching span exists in a case run.
1853
1844
  *
1854
1845
  * @example
1855
1846
  * ```ts
@@ -1982,15 +1973,9 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
1982
1973
  outputTokens: z$1.ZodOptional<z$1.ZodString>;
1983
1974
  cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
1984
1975
  cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
1976
+ cacheCreationInput1hTokens: z$1.ZodOptional<z$1.ZodString>;
1985
1977
  reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
1986
- totalTokens: z$1.ZodOptional<z$1.ZodString>;
1987
- tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
1988
- cost: z$1.ZodOptional<z$1.ZodString>;
1989
- inputCost: z$1.ZodOptional<z$1.ZodString>;
1990
- outputCost: z$1.ZodOptional<z$1.ZodString>;
1991
- cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
1992
- cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
1993
- reasoningCost: z$1.ZodOptional<z$1.ZodString>;
1978
+ latencyMs: z$1.ZodOptional<z$1.ZodString>;
1994
1979
  steps: z$1.ZodOptional<z$1.ZodString>;
1995
1980
  finishReason: z$1.ZodOptional<z$1.ZodString>;
1996
1981
  input: z$1.ZodOptional<z$1.ZodString>;
@@ -2005,6 +1990,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
2005
1990
  outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2006
1991
  cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2007
1992
  cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1993
+ cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2008
1994
  reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2009
1995
  }, z$1.core.$strip>>>;
2010
1996
  metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
@@ -2035,7 +2021,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
2035
2021
  cachedInputTokens: "cachedInputTokens";
2036
2022
  cacheCreationInputTokens: "cacheCreationInputTokens";
2037
2023
  reasoningTokens: "reasoningTokens";
2038
- llmLatencyMs: "llmLatencyMs";
2024
+ llmDurationMs: "llmDurationMs";
2039
2025
  }>>]>>;
2040
2026
  apiCalls: z$1.ZodOptional<z$1.ZodObject<{
2041
2027
  kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -2104,7 +2090,8 @@ type LlmCallEntry = {
2104
2090
  cachedInputTokens: number | null;
2105
2091
  cacheCreationInputTokens: number | null;
2106
2092
  reasoningTokens: number | null;
2107
- totalTokens: number | null;
2093
+ totalTokens: number | null; /** Time to first token for the LLM call in milliseconds, when reported by the span. */
2094
+ latencyMs: number | null;
2108
2095
  tokensPerSecond: number | null;
2109
2096
  costUsd: number | null;
2110
2097
  inputCostUsd: number | null;
@@ -2114,8 +2101,8 @@ type LlmCallEntry = {
2114
2101
  reasoningCostUsd: number | null; /** Number of inference rounds. Derived from the array length when `stepDetails` is set. */
2115
2102
  stepCount: number | null; /** Per-step breakdown when the configured `steps` attribute resolves to an array. */
2116
2103
  stepDetails: unknown[] | null;
2117
- finishReason: string | null;
2118
- latencyMs: number | null;
2104
+ finishReason: string | null; /** Elapsed LLM call span duration in milliseconds. */
2105
+ durationMs: number | null;
2119
2106
  input: unknown;
2120
2107
  output: unknown;
2121
2108
  reasoning: unknown;
@@ -2129,16 +2116,22 @@ type LlmCallEntry = {
2129
2116
  * shape consumed by the LLM calls tab.
2130
2117
  *
2131
2118
  * Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
2132
- * (`model`, token counts, explicit cost, etc.) are read via
2119
+ * (`model`, token counts, latency, etc.) are read via
2133
2120
  * `getNestedAttribute` from the configured paths, with safe coercion to
2134
- * `string | null` / `number | null`. When explicit USD costs are absent,
2135
- * configured model pricing derives per-token-type costs from token counts.
2136
- * `totalTokens` falls back to a sum of input + output + cached when no
2137
- * explicit total attribute is present. The `steps` attribute path may resolve
2138
- * to either a number (rendered as the inference-round count) or an array of
2139
- * per-step detail objects (rendered as a Steps section in the body, with
2140
- * `stepCount` derived from the array length). `latencyMs` is `null` while the
2141
- * span is still running. User-defined `metrics` whose path resolves to
2121
+ * `string | null` / `number | null`. `latencyMs` is an explicit
2122
+ * time-to-first-token attribute; full span elapsed time is reported separately
2123
+ * as `durationMs`. Built-in USD costs are derived only from configured model
2124
+ * pricing and token counts. `totalTokens` is always derived from input +
2125
+ * output tokens. Cached input and cache creation tokens are reported
2126
+ * separately because they are subsets of input/output usage. The main cache
2127
+ * creation token field is treated as the total write count; optional one-hour
2128
+ * cache creation tokens only split that total for cost calculation. Base input
2129
+ * cost uses input minus cache read/write tokens so cached tokens are not
2130
+ * charged twice. Cache read/write costs still contribute to the total USD cost
2131
+ * at their configured rates. The `steps` attribute path may resolve to an array
2132
+ * of per-step detail objects, with `stepCount` derived from the array length.
2133
+ * `durationMs` and `tokensPerSecond` are `null` while the span is still
2134
+ * running. User-defined `metrics` whose path resolves to
2142
2135
  * `undefined` are dropped, but `null`, `0`, and `false` are preserved as
2143
2136
  * legitimate values worth displaying. Original span order is preserved so the
2144
2137
  * LLM calls tab matches the ordering in the Trace tab.
@@ -2163,8 +2156,8 @@ type ApiCallEntry = {
2163
2156
  status: EvalTraceSpan['status'];
2164
2157
  method: string | null;
2165
2158
  url: string | null;
2166
- statusCode: number | null;
2167
- latencyMs: number | null;
2159
+ statusCode: number | null; /** Elapsed API call duration in milliseconds. */
2160
+ durationMs: number | null;
2168
2161
  request: unknown;
2169
2162
  response: unknown;
2170
2163
  requestBody: unknown;
@@ -2181,10 +2174,10 @@ type ApiCallEntry = {
2181
2174
  *
2182
2175
  * Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
2183
2176
  * (`method`, `url`, `statusCode`, etc.) are read via `getNestedAttribute` from
2184
- * the configured paths. `durationMs` takes precedence for latency, with a
2185
- * fallback to the span start/end timestamps. User-defined `metrics` whose path
2186
- * resolves to `undefined` are dropped, but `null`, `0`, and `false` are
2187
- * preserved as legitimate values worth displaying. Original span order is
2177
+ * the configured paths. An explicit `durationMs` attribute takes precedence,
2178
+ * with a fallback to the span start/end timestamps. User-defined `metrics`
2179
+ * whose path resolves to `undefined` are dropped, but `null`, `0`, and `false`
2180
+ * are preserved as legitimate values worth displaying. Original span order is
2188
2181
  * preserved so the API calls tab matches the ordering in the Trace tab.
2189
2182
  */
2190
2183
  declare function extractApiCalls(spans: EvalTraceSpan[], config: ResolvedApiCallsConfig): ApiCallEntry[];
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as setEvalOutput, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as incrementEvalOutput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as runInEvalRuntimeScope, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as nextEvalId, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as startEvalBackgroundJob, Mt as cacheModeSchema, N as getEvalTitle, Nn as repoFile, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as runInEvalScope, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as defineEval, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getEvalCaseInput, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as mergeEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as appendToEvalOutput, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalLog, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as setScopeCacheContext, jt as cacheListItemSchema, k as extractCacheHits, kn as runInExistingEvalScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vt as evalChartAxisSchema, wn as isInEvalScope, wt as evalChartTypeSchema, xn as getCurrentScope, xt as evalChartConfigSchema, yn as evalAssert, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-zYAcAPtS.mjs";
2
- import { n as createRunner, t as runCli } from "./cli-DumvanQI.mjs";
3
- import "./src-BoAJb4wC.mjs";
1
+ import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as setEvalOutput, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as incrementEvalOutput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as runInEvalRuntimeScope, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as nextEvalId, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as startEvalBackgroundJob, Mt as cacheModeSchema, N as getEvalTitle, Nn as repoFile, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as runInEvalScope, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as defineEval, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getEvalCaseInput, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as mergeEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as appendToEvalOutput, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalLog, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as setScopeCacheContext, jt as cacheListItemSchema, k as extractCacheHits, kn as runInExistingEvalScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vt as evalChartAxisSchema, wn as isInEvalScope, wt as evalChartTypeSchema, xn as getCurrentScope, xt as evalChartConfigSchema, yn as evalAssert, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-D1edUDhp.mjs";
2
+ import { n as createRunner, t as runCli } from "./cli-C0EtHhEO.mjs";
3
+ import "./src-D-HuV8I-.mjs";
4
4
  export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, vn as configureEvalRunLogs, w as createFsCacheStore, z as runSummarySchema } from "./runOrchestration-zYAcAPtS.mjs";
1
+ import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, vn as configureEvalRunLogs, w as createFsCacheStore, z as runSummarySchema } from "./runOrchestration-D1edUDhp.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";