@ls-stack/agent-eval 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -39,8 +39,9 @@ type NumberDisplayOptions = {
39
39
  /** Number notation used when rendering the value. */notation?: 'standard' | 'compact'; /** Compact style used when `notation: 'compact'` is enabled. */
40
40
  compactDisplay?: 'short' | 'long'; /** String prepended to the rendered number, such as `$`. */
41
41
  prefix?: string; /** String appended to the rendered number, such as ` ms`. */
42
- suffix?: string; /** Fixed number of decimal places to render. */
43
- decimalPlaces?: number;
42
+ suffix?: string; /** Minimum number of decimal places to render. */
43
+ minDecimalPlaces?: number; /** Maximum number of decimal places to render. */
44
+ maxDecimalPlaces?: number;
44
45
  };
45
46
  /** Schema for numeric presentation options used by number-formatted values. */
46
47
  declare const numberDisplayOptionsSchema: z$1.ZodType<NumberDisplayOptions>;
@@ -98,7 +99,6 @@ declare const columnDefSchema: z$1.ZodObject<{
98
99
  passThreshold: z$1.ZodOptional<z$1.ZodNumber>;
99
100
  maxStars: z$1.ZodOptional<z$1.ZodNumber>;
100
101
  hideInTable: z$1.ZodOptional<z$1.ZodBoolean>;
101
- sortable: z$1.ZodOptional<z$1.ZodBoolean>;
102
102
  align: z$1.ZodOptional<z$1.ZodEnum<{
103
103
  left: "left";
104
104
  center: "center";
@@ -412,6 +412,7 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
412
412
  passFail: "passFail";
413
413
  stars: "stars";
414
414
  }>>;
415
+ numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
415
416
  accent: z$1.ZodOptional<z$1.ZodBoolean>;
416
417
  }, z$1.core.$strip>], "kind">;
417
418
  /** Single stat rendered in the EvalCard stats row. */
@@ -449,6 +450,7 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
449
450
  passFail: "passFail";
450
451
  stars: "stars";
451
452
  }>>;
453
+ numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
452
454
  accent: z$1.ZodOptional<z$1.ZodBoolean>;
453
455
  }, z$1.core.$strip>], "kind">>;
454
456
  /** Ordered list of stats rendered in the EvalCard stats row. */
@@ -496,7 +498,6 @@ declare const evalSummarySchema: z$1.ZodObject<{
496
498
  passThreshold: z$1.ZodOptional<z$1.ZodNumber>;
497
499
  maxStars: z$1.ZodOptional<z$1.ZodNumber>;
498
500
  hideInTable: z$1.ZodOptional<z$1.ZodBoolean>;
499
- sortable: z$1.ZodOptional<z$1.ZodBoolean>;
500
501
  align: z$1.ZodOptional<z$1.ZodEnum<{
501
502
  left: "left";
502
503
  center: "center";
@@ -544,6 +545,7 @@ declare const evalSummarySchema: z$1.ZodObject<{
544
545
  passFail: "passFail";
545
546
  stars: "stars";
546
547
  }>>;
548
+ numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
547
549
  accent: z$1.ZodOptional<z$1.ZodBoolean>;
548
550
  }, z$1.core.$strip>], "kind">>>;
549
551
  charts: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
@@ -643,7 +645,7 @@ declare const caseRowSchema: z$1.ZodObject<{
643
645
  cancelled: "cancelled";
644
646
  pending: "pending";
645
647
  }>;
646
- latencyMs: z$1.ZodNullable<z$1.ZodNumber>;
648
+ durationMs: z$1.ZodNullable<z$1.ZodNumber>;
647
649
  costUsd: z$1.ZodOptional<z$1.ZodNullable<z$1.ZodNumber>>;
648
650
  columns: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnion<readonly [z$1.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z$1.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z$1.ZodUnion<readonly [z$1.ZodObject<{
649
651
  source: z$1.ZodLiteral<"repo">;
@@ -1484,7 +1486,7 @@ declare const defaultConfigKeySchema: z$1.ZodEnum<{
1484
1486
  cachedInputTokens: "cachedInputTokens";
1485
1487
  cacheCreationInputTokens: "cacheCreationInputTokens";
1486
1488
  reasoningTokens: "reasoningTokens";
1487
- llmLatencyMs: "llmLatencyMs";
1489
+ llmDurationMs: "llmDurationMs";
1488
1490
  }>;
1489
1491
  /** Built-in eval-level output/column key. */
1490
1492
  type DefaultConfigKey = z$1.infer<typeof defaultConfigKeySchema>;
@@ -1499,7 +1501,7 @@ declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<t
1499
1501
  cachedInputTokens: "cachedInputTokens";
1500
1502
  cacheCreationInputTokens: "cacheCreationInputTokens";
1501
1503
  reasoningTokens: "reasoningTokens";
1502
- llmLatencyMs: "llmLatencyMs";
1504
+ llmDurationMs: "llmDurationMs";
1503
1505
  }>>]>;
1504
1506
  /** Removal config for built-in eval-level outputs and UI metadata. */
1505
1507
  type RemoveDefaultConfig = z$1.infer<typeof removeDefaultConfigSchema>;
@@ -1593,7 +1595,7 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
1593
1595
  type ApiCallMetric = z$1.infer<typeof apiCallMetricSchema>;
1594
1596
  /**
1595
1597
  * Schema for one model/provider pricing entry used to derive LLM-call costs
1596
- * from token counts when a span does not already record explicit USD costs.
1598
+ * from token counts.
1597
1599
  */
1598
1600
  declare const llmCallPricingSchema: z$1.ZodObject<{
1599
1601
  model: z$1.ZodString;
@@ -1602,6 +1604,7 @@ declare const llmCallPricingSchema: z$1.ZodObject<{
1602
1604
  outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1603
1605
  cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1604
1606
  cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1607
+ cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1605
1608
  reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1606
1609
  }, z$1.core.$strip>;
1607
1610
  /** Model/provider pricing entry authored in `agent-evals.config.ts`. */
@@ -1616,15 +1619,9 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
1616
1619
  outputTokens: z$1.ZodOptional<z$1.ZodString>;
1617
1620
  cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
1618
1621
  cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
1622
+ cacheCreationInput1hTokens: z$1.ZodOptional<z$1.ZodString>;
1619
1623
  reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
1620
- totalTokens: z$1.ZodOptional<z$1.ZodString>;
1621
- tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
1622
- cost: z$1.ZodOptional<z$1.ZodString>;
1623
- inputCost: z$1.ZodOptional<z$1.ZodString>;
1624
- outputCost: z$1.ZodOptional<z$1.ZodString>;
1625
- cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
1626
- cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
1627
- reasoningCost: z$1.ZodOptional<z$1.ZodString>;
1624
+ latencyMs: z$1.ZodOptional<z$1.ZodString>;
1628
1625
  steps: z$1.ZodOptional<z$1.ZodString>;
1629
1626
  finishReason: z$1.ZodOptional<z$1.ZodString>;
1630
1627
  input: z$1.ZodOptional<z$1.ZodString>;
@@ -1639,6 +1636,7 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
1639
1636
  outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1640
1637
  cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1641
1638
  cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1639
+ cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1642
1640
  reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1643
1641
  }, z$1.core.$strip>>>;
1644
1642
  metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
@@ -1712,15 +1710,9 @@ type ResolvedLlmCallsConfig = {
1712
1710
  outputTokens: string;
1713
1711
  cachedInputTokens: string;
1714
1712
  cacheCreationInputTokens: string;
1713
+ cacheCreationInput1hTokens: string;
1715
1714
  reasoningTokens: string;
1716
- totalTokens: string;
1717
- tokensPerSecond: string;
1718
- cost: string;
1719
- inputCost: string;
1720
- outputCost: string;
1721
- cachedInputCost: string;
1722
- cacheCreationInputCost: string;
1723
- reasoningCost: string;
1715
+ latencyMs: string;
1724
1716
  steps: string;
1725
1717
  finishReason: string;
1726
1718
  input: string;
@@ -1774,6 +1766,7 @@ type ResolvedLlmCallPricing = {
1774
1766
  outputUsdPerMillion?: number;
1775
1767
  cachedInputUsdPerMillion?: number;
1776
1768
  cacheCreationInputUsdPerMillion?: number;
1769
+ cacheCreationInput1hUsdPerMillion?: number;
1777
1770
  reasoningUsdPerMillion?: number;
1778
1771
  };
1779
1772
  /** Default LLM-calls config the UI uses before the workspace fetch resolves. */
@@ -1789,8 +1782,8 @@ declare const DEFAULT_API_CALLS_CONFIG: ResolvedApiCallsConfig;
1789
1782
  * attribute path.
1790
1783
  * - Missing `metrics[].format` defaults to `'string'`.
1791
1784
  * - Missing `metrics[].placements` defaults to `['body']`.
1792
- * - Missing `pricing` defaults to an empty registry; explicit span costs still
1793
- * take precedence over derived costs.
1785
+ * - Missing `pricing` defaults to an empty registry; built-in costs are only
1786
+ * derived from configured pricing and token counts.
1794
1787
  */
1795
1788
  declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
1796
1789
  /**
@@ -1844,10 +1837,10 @@ type AgentEvalsConfig = {
1844
1837
  *
1845
1838
  * Determines which trace spans are treated as LLM calls (`kinds`), how
1846
1839
  * structured fields like `model` and `usage.inputTokens` are read from
1847
- * span attributes, and which custom user-defined metrics are surfaced on
1848
- * each call. All fields are optional and fall back to the documented
1849
- * defaults; the LLM calls tab is shown automatically when at least one
1850
- * matching span exists in a case run.
1840
+ * span attributes, which pricing table derives built-in costs, and which
1841
+ * custom user-defined metrics are surfaced on each call. All fields are
1842
+ * optional and fall back to the documented defaults; the LLM calls tab is
1843
+ * shown automatically when at least one matching span exists in a case run.
1851
1844
  *
1852
1845
  * @example
1853
1846
  * ```ts
@@ -1980,15 +1973,9 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
1980
1973
  outputTokens: z$1.ZodOptional<z$1.ZodString>;
1981
1974
  cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
1982
1975
  cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
1976
+ cacheCreationInput1hTokens: z$1.ZodOptional<z$1.ZodString>;
1983
1977
  reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
1984
- totalTokens: z$1.ZodOptional<z$1.ZodString>;
1985
- tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
1986
- cost: z$1.ZodOptional<z$1.ZodString>;
1987
- inputCost: z$1.ZodOptional<z$1.ZodString>;
1988
- outputCost: z$1.ZodOptional<z$1.ZodString>;
1989
- cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
1990
- cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
1991
- reasoningCost: z$1.ZodOptional<z$1.ZodString>;
1978
+ latencyMs: z$1.ZodOptional<z$1.ZodString>;
1992
1979
  steps: z$1.ZodOptional<z$1.ZodString>;
1993
1980
  finishReason: z$1.ZodOptional<z$1.ZodString>;
1994
1981
  input: z$1.ZodOptional<z$1.ZodString>;
@@ -2003,6 +1990,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
2003
1990
  outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2004
1991
  cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2005
1992
  cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
1993
+ cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2006
1994
  reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
2007
1995
  }, z$1.core.$strip>>>;
2008
1996
  metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
@@ -2033,7 +2021,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
2033
2021
  cachedInputTokens: "cachedInputTokens";
2034
2022
  cacheCreationInputTokens: "cacheCreationInputTokens";
2035
2023
  reasoningTokens: "reasoningTokens";
2036
- llmLatencyMs: "llmLatencyMs";
2024
+ llmDurationMs: "llmDurationMs";
2037
2025
  }>>]>>;
2038
2026
  apiCalls: z$1.ZodOptional<z$1.ZodObject<{
2039
2027
  kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -2102,7 +2090,8 @@ type LlmCallEntry = {
2102
2090
  cachedInputTokens: number | null;
2103
2091
  cacheCreationInputTokens: number | null;
2104
2092
  reasoningTokens: number | null;
2105
- totalTokens: number | null;
2093
+ totalTokens: number | null; /** Time to first token for the LLM call in milliseconds, when reported by the span. */
2094
+ latencyMs: number | null;
2106
2095
  tokensPerSecond: number | null;
2107
2096
  costUsd: number | null;
2108
2097
  inputCostUsd: number | null;
@@ -2112,8 +2101,8 @@ type LlmCallEntry = {
2112
2101
  reasoningCostUsd: number | null; /** Number of inference rounds. Derived from the array length when `stepDetails` is set. */
2113
2102
  stepCount: number | null; /** Per-step breakdown when the configured `steps` attribute resolves to an array. */
2114
2103
  stepDetails: unknown[] | null;
2115
- finishReason: string | null;
2116
- latencyMs: number | null;
2104
+ finishReason: string | null; /** Elapsed LLM call span duration in milliseconds. */
2105
+ durationMs: number | null;
2117
2106
  input: unknown;
2118
2107
  output: unknown;
2119
2108
  reasoning: unknown;
@@ -2127,16 +2116,22 @@ type LlmCallEntry = {
2127
2116
  * shape consumed by the LLM calls tab.
2128
2117
  *
2129
2118
  * Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
2130
- * (`model`, token counts, explicit cost, etc.) are read via
2119
+ * (`model`, token counts, latency, etc.) are read via
2131
2120
  * `getNestedAttribute` from the configured paths, with safe coercion to
2132
- * `string | null` / `number | null`. When explicit USD costs are absent,
2133
- * configured model pricing derives per-token-type costs from token counts.
2134
- * `totalTokens` falls back to a sum of input + output + cached when no
2135
- * explicit total attribute is present. The `steps` attribute path may resolve
2136
- * to either a number (rendered as the inference-round count) or an array of
2137
- * per-step detail objects (rendered as a Steps section in the body, with
2138
- * `stepCount` derived from the array length). `latencyMs` is `null` while the
2139
- * span is still running. User-defined `metrics` whose path resolves to
2121
+ * `string | null` / `number | null`. `latencyMs` is an explicit
2122
+ * time-to-first-token attribute; full span elapsed time is reported separately
2123
+ * as `durationMs`. Built-in USD costs are derived only from configured model
2124
+ * pricing and token counts. `totalTokens` is always derived from input +
2125
+ * output tokens. Cached input and cache creation tokens are reported
2126
+ * separately because they are subsets of input/output usage. The main cache
2127
+ * creation token field is treated as the total write count; optional one-hour
2128
+ * cache creation tokens only split that total for cost calculation. Base input
2129
+ * cost uses input minus cache read/write tokens so cached tokens are not
2130
+ * charged twice. Cache read/write costs still contribute to the total USD cost
2131
+ * at their configured rates. The `steps` attribute path may resolve to an array
2132
+ * of per-step detail objects, with `stepCount` derived from the array length.
2133
+ * `durationMs` and `tokensPerSecond` are `null` while the span is still
2134
+ * running. User-defined `metrics` whose path resolves to
2140
2135
  * `undefined` are dropped, but `null`, `0`, and `false` are preserved as
2141
2136
  * legitimate values worth displaying. Original span order is preserved so the
2142
2137
  * LLM calls tab matches the ordering in the Trace tab.
@@ -2161,8 +2156,8 @@ type ApiCallEntry = {
2161
2156
  status: EvalTraceSpan['status'];
2162
2157
  method: string | null;
2163
2158
  url: string | null;
2164
- statusCode: number | null;
2165
- latencyMs: number | null;
2159
+ statusCode: number | null; /** Elapsed API call duration in milliseconds. */
2160
+ durationMs: number | null;
2166
2161
  request: unknown;
2167
2162
  response: unknown;
2168
2163
  requestBody: unknown;
@@ -2179,10 +2174,10 @@ type ApiCallEntry = {
2179
2174
  *
2180
2175
  * Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
2181
2176
  * (`method`, `url`, `statusCode`, etc.) are read via `getNestedAttribute` from
2182
- * the configured paths. `durationMs` takes precedence for latency, with a
2183
- * fallback to the span start/end timestamps. User-defined `metrics` whose path
2184
- * resolves to `undefined` are dropped, but `null`, `0`, and `false` are
2185
- * preserved as legitimate values worth displaying. Original span order is
2177
+ * the configured paths. An explicit `durationMs` attribute takes precedence,
2178
+ * with a fallback to the span start/end timestamps. User-defined `metrics`
2179
+ * whose path resolves to `undefined` are dropped, but `null`, `0`, and `false`
2180
+ * are preserved as legitimate values worth displaying. Original span order is
2186
2181
  * preserved so the API calls tab matches the ordering in the Trace tab.
2187
2182
  */
2188
2183
  declare function extractApiCalls(spans: EvalTraceSpan[], config: ResolvedApiCallsConfig): ApiCallEntry[];
@@ -2784,7 +2779,7 @@ type EvalColumnOverride = {
2784
2779
  /**
2785
2780
  * Extra options for `format: 'number'`.
2786
2781
  *
2787
- * Use this to add a prefix or suffix, force a fixed number of decimal
2782
+ * Use this to add a prefix or suffix, control minimum and maximum decimal
2788
2783
  * places, or switch to compact notation such as `1.2K`.
2789
2784
  */
2790
2785
  numberFormat?: NumberDisplayOptions;
@@ -2792,8 +2787,7 @@ type EvalColumnOverride = {
2792
2787
  * Hides the column from the runs table while keeping it available in detail
2793
2788
  * views and raw output data.
2794
2789
  */
2795
- hideInTable?: boolean; /** Whether the UI should allow sorting rows by this column. */
2796
- sortable?: boolean; /** Horizontal alignment used when rendering the column cells. */
2790
+ hideInTable?: boolean; /** Horizontal alignment used when rendering the column cells. */
2797
2791
  align?: 'left' | 'center' | 'right';
2798
2792
  /**
2799
2793
  * Maximum number of stars used when `format: 'stars'`.
@@ -2954,7 +2948,8 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
2954
2948
  * column across the latest run's cases — `key` must match one of the eval's
2955
2949
  * score or column keys, and only finite numeric values participate in the
2956
2950
  * reduction. When no case has a numeric value for the key the stat renders
2957
- * an em dash. `label` and `format` default to the matching `ColumnDef`.
2951
+ * an em dash. `label`, `format`, and `numberFormat` default to the matching
2952
+ * `ColumnDef`.
2958
2953
  */
2959
2954
  stats?: EvalStatsConfig;
2960
2955
  /**
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as setEvalOutput, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as incrementEvalOutput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as runInEvalRuntimeScope, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as nextEvalId, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as startEvalBackgroundJob, Mt as cacheModeSchema, N as getEvalTitle, Nn as repoFile, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as runInEvalScope, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as defineEval, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getEvalCaseInput, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as mergeEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as appendToEvalOutput, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalLog, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as setScopeCacheContext, jt as cacheListItemSchema, k as extractCacheHits, kn as runInExistingEvalScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vt as evalChartAxisSchema, wn as isInEvalScope, wt as evalChartTypeSchema, xn as getCurrentScope, xt as evalChartConfigSchema, yn as evalAssert, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-Cv1kiOAG.mjs";
2
- import { n as createRunner, t as runCli } from "./cli-weogme5U.mjs";
3
- import "./src-B879LZfo.mjs";
1
+ import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as setEvalOutput, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as incrementEvalOutput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as runInEvalRuntimeScope, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as nextEvalId, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as startEvalBackgroundJob, Mt as cacheModeSchema, N as getEvalTitle, Nn as repoFile, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as runInEvalScope, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as defineEval, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getEvalCaseInput, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as mergeEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as appendToEvalOutput, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalLog, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as setScopeCacheContext, jt as cacheListItemSchema, k as extractCacheHits, kn as runInExistingEvalScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vt as evalChartAxisSchema, wn as isInEvalScope, wt as evalChartTypeSchema, xn as getCurrentScope, xt as evalChartConfigSchema, yn as evalAssert, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-D1edUDhp.mjs";
2
+ import { n as createRunner, t as runCli } from "./cli-C0EtHhEO.mjs";
3
+ import "./src-D-HuV8I-.mjs";
4
4
  export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, vn as configureEvalRunLogs, w as createFsCacheStore, z as runSummarySchema } from "./runOrchestration-Cv1kiOAG.mjs";
1
+ import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, vn as configureEvalRunLogs, w as createFsCacheStore, z as runSummarySchema } from "./runOrchestration-D1edUDhp.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";