npm - @ls-stack/agent-eval - Versions diffs - 0.20.0 → 0.22.0 - Mend

@ls-stack/agent-eval 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/{app-DsiLU65H.mjs → app-moDHbg1O.mjs} +3 -3
package/dist/apps/web/dist/assets/index-AUDD3rNB.js +118 -0
package/dist/apps/web/dist/assets/{index-CvR6QCLa.css → index-r0dVFK0B.css} +1 -1
package/dist/apps/web/dist/index.html +2 -2
package/dist/bin.mjs +1 -1
package/dist/{cli-weogme5U.mjs → cli-C0EtHhEO.mjs} +3 -3
package/dist/index.d.mts +56 -61
package/dist/index.mjs +3 -3
package/dist/runChild.mjs +1 -1
package/dist/{runOrchestration-Cv1kiOAG.mjs → runOrchestration-D1edUDhp.mjs} +155 -140
package/dist/{runner-DzrMtgBu.mjs → runner-C9nP2VKL.mjs} +2 -2
package/dist/{runner-B25oRQxX.mjs → runner-CyRhIzci.mjs} +1 -1
package/dist/src-D-HuV8I-.mjs +3 -0
package/package.json +1 -1
package/skills/agent-eval/SKILL.md +30 -20
package/dist/apps/web/dist/assets/index-Cba4MFa0.js +0 -118
package/dist/src-B879LZfo.mjs +0 -3

package/dist/index.d.mts CHANGED Viewed

@@ -39,8 +39,9 @@ type NumberDisplayOptions = {
   /** Number notation used when rendering the value. */notation?: 'standard' | 'compact'; /** Compact style used when `notation: 'compact'` is enabled. */
   compactDisplay?: 'short' | 'long'; /** String prepended to the rendered number, such as `$`. */
   prefix?: string; /** String appended to the rendered number, such as ` ms`. */
-  suffix?: string; /** Fixed number of decimal places to render. */
-  decimalPlaces?: number;
+  suffix?: string; /** Minimum number of decimal places to render. */
+  minDecimalPlaces?: number; /** Maximum number of decimal places to render. */
+  maxDecimalPlaces?: number;
 };
 /** Schema for numeric presentation options used by number-formatted values. */
 declare const numberDisplayOptionsSchema: z$1.ZodType<NumberDisplayOptions>;
@@ -98,7 +99,6 @@ declare const columnDefSchema: z$1.ZodObject<{
   passThreshold: z$1.ZodOptional<z$1.ZodNumber>;
   maxStars: z$1.ZodOptional<z$1.ZodNumber>;
   hideInTable: z$1.ZodOptional<z$1.ZodBoolean>;
-  sortable: z$1.ZodOptional<z$1.ZodBoolean>;
   align: z$1.ZodOptional<z$1.ZodEnum<{
     left: "left";
     center: "center";
@@ -412,6 +412,7 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
     passFail: "passFail";
     stars: "stars";
   }>>;
+  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
   accent: z$1.ZodOptional<z$1.ZodBoolean>;
 }, z$1.core.$strip>], "kind">;
 /** Single stat rendered in the EvalCard stats row. */
@@ -449,6 +450,7 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
     passFail: "passFail";
     stars: "stars";
   }>>;
+  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
   accent: z$1.ZodOptional<z$1.ZodBoolean>;
 }, z$1.core.$strip>], "kind">>;
 /** Ordered list of stats rendered in the EvalCard stats row. */
@@ -496,7 +498,6 @@ declare const evalSummarySchema: z$1.ZodObject<{
     passThreshold: z$1.ZodOptional<z$1.ZodNumber>;
     maxStars: z$1.ZodOptional<z$1.ZodNumber>;
     hideInTable: z$1.ZodOptional<z$1.ZodBoolean>;
-    sortable: z$1.ZodOptional<z$1.ZodBoolean>;
     align: z$1.ZodOptional<z$1.ZodEnum<{
       left: "left";
       center: "center";
@@ -544,6 +545,7 @@ declare const evalSummarySchema: z$1.ZodObject<{
       passFail: "passFail";
       stars: "stars";
     }>>;
+    numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
     accent: z$1.ZodOptional<z$1.ZodBoolean>;
   }, z$1.core.$strip>], "kind">>>;
   charts: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
@@ -643,7 +645,7 @@ declare const caseRowSchema: z$1.ZodObject<{
     cancelled: "cancelled";
     pending: "pending";
   }>;
-  latencyMs: z$1.ZodNullable<z$1.ZodNumber>;
+  durationMs: z$1.ZodNullable<z$1.ZodNumber>;
   costUsd: z$1.ZodOptional<z$1.ZodNullable<z$1.ZodNumber>>;
   columns: z$1.ZodRecord<z$1.ZodString, z$1.ZodUnion<readonly [z$1.ZodType<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown, z$1.core.$ZodTypeInternals<string | number | boolean | Record<string, unknown> | unknown[] | null, unknown>>, z$1.ZodUnion<readonly [z$1.ZodObject<{
     source: z$1.ZodLiteral<"repo">;
@@ -1484,7 +1486,7 @@ declare const defaultConfigKeySchema: z$1.ZodEnum<{
   cachedInputTokens: "cachedInputTokens";
   cacheCreationInputTokens: "cacheCreationInputTokens";
   reasoningTokens: "reasoningTokens";
-  llmLatencyMs: "llmLatencyMs";
+  llmDurationMs: "llmDurationMs";
 }>;
 /** Built-in eval-level output/column key. */
 type DefaultConfigKey = z$1.infer<typeof defaultConfigKeySchema>;
@@ -1499,7 +1501,7 @@ declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<t
   cachedInputTokens: "cachedInputTokens";
   cacheCreationInputTokens: "cacheCreationInputTokens";
   reasoningTokens: "reasoningTokens";
-  llmLatencyMs: "llmLatencyMs";
+  llmDurationMs: "llmDurationMs";
 }>>]>;
 /** Removal config for built-in eval-level outputs and UI metadata. */
 type RemoveDefaultConfig = z$1.infer<typeof removeDefaultConfigSchema>;
@@ -1593,7 +1595,7 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
 type ApiCallMetric = z$1.infer<typeof apiCallMetricSchema>;
 /**
  * Schema for one model/provider pricing entry used to derive LLM-call costs
- * from token counts when a span does not already record explicit USD costs.
+ * from token counts.
  */
 declare const llmCallPricingSchema: z$1.ZodObject<{
   model: z$1.ZodString;
@@ -1602,6 +1604,7 @@ declare const llmCallPricingSchema: z$1.ZodObject<{
   outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
   cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
   cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+  cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
   reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
 }, z$1.core.$strip>;
 /** Model/provider pricing entry authored in `agent-evals.config.ts`. */
@@ -1616,15 +1619,9 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
     outputTokens: z$1.ZodOptional<z$1.ZodString>;
     cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
     cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
+    cacheCreationInput1hTokens: z$1.ZodOptional<z$1.ZodString>;
     reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
-    totalTokens: z$1.ZodOptional<z$1.ZodString>;
-    tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
-    cost: z$1.ZodOptional<z$1.ZodString>;
-    inputCost: z$1.ZodOptional<z$1.ZodString>;
-    outputCost: z$1.ZodOptional<z$1.ZodString>;
-    cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
-    cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
-    reasoningCost: z$1.ZodOptional<z$1.ZodString>;
+    latencyMs: z$1.ZodOptional<z$1.ZodString>;
     steps: z$1.ZodOptional<z$1.ZodString>;
     finishReason: z$1.ZodOptional<z$1.ZodString>;
     input: z$1.ZodOptional<z$1.ZodString>;
@@ -1639,6 +1636,7 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
     outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
     cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
     cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+    cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
     reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
   }, z$1.core.$strip>>>;
   metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
@@ -1712,15 +1710,9 @@ type ResolvedLlmCallsConfig = {
     outputTokens: string;
     cachedInputTokens: string;
     cacheCreationInputTokens: string;
+    cacheCreationInput1hTokens: string;
     reasoningTokens: string;
-    totalTokens: string;
-    tokensPerSecond: string;
-    cost: string;
-    inputCost: string;
-    outputCost: string;
-    cachedInputCost: string;
-    cacheCreationInputCost: string;
-    reasoningCost: string;
+    latencyMs: string;
     steps: string;
     finishReason: string;
     input: string;
@@ -1774,6 +1766,7 @@ type ResolvedLlmCallPricing = {
   outputUsdPerMillion?: number;
   cachedInputUsdPerMillion?: number;
   cacheCreationInputUsdPerMillion?: number;
+  cacheCreationInput1hUsdPerMillion?: number;
   reasoningUsdPerMillion?: number;
 };
 /** Default LLM-calls config the UI uses before the workspace fetch resolves. */
@@ -1789,8 +1782,8 @@ declare const DEFAULT_API_CALLS_CONFIG: ResolvedApiCallsConfig;
  *   attribute path.
  * - Missing `metrics[].format` defaults to `'string'`.
  * - Missing `metrics[].placements` defaults to `['body']`.
- * - Missing `pricing` defaults to an empty registry; explicit span costs still
- *   take precedence over derived costs.
+ * - Missing `pricing` defaults to an empty registry; built-in costs are only
+ *   derived from configured pricing and token counts.
  */
 declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
 /**
@@ -1844,10 +1837,10 @@ type AgentEvalsConfig = {
    *
    * Determines which trace spans are treated as LLM calls (`kinds`), how
    * structured fields like `model` and `usage.inputTokens` are read from
-   * span attributes, and which custom user-defined metrics are surfaced on
-   * each call. All fields are optional and fall back to the documented
-   * defaults; the LLM calls tab is shown automatically when at least one
-   * matching span exists in a case run.
+   * span attributes, which pricing table derives built-in costs, and which
+   * custom user-defined metrics are surfaced on each call. All fields are
+   * optional and fall back to the documented defaults; the LLM calls tab is
+   * shown automatically when at least one matching span exists in a case run.
    *
    * @example
    * ```ts
@@ -1980,15 +1973,9 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
       outputTokens: z$1.ZodOptional<z$1.ZodString>;
       cachedInputTokens: z$1.ZodOptional<z$1.ZodString>;
       cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
+      cacheCreationInput1hTokens: z$1.ZodOptional<z$1.ZodString>;
       reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
-      totalTokens: z$1.ZodOptional<z$1.ZodString>;
-      tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
-      cost: z$1.ZodOptional<z$1.ZodString>;
-      inputCost: z$1.ZodOptional<z$1.ZodString>;
-      outputCost: z$1.ZodOptional<z$1.ZodString>;
-      cachedInputCost: z$1.ZodOptional<z$1.ZodString>;
-      cacheCreationInputCost: z$1.ZodOptional<z$1.ZodString>;
-      reasoningCost: z$1.ZodOptional<z$1.ZodString>;
+      latencyMs: z$1.ZodOptional<z$1.ZodString>;
       steps: z$1.ZodOptional<z$1.ZodString>;
       finishReason: z$1.ZodOptional<z$1.ZodString>;
       input: z$1.ZodOptional<z$1.ZodString>;
@@ -2003,6 +1990,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
       outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
       cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
       cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+      cacheCreationInput1hUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
       reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
     }, z$1.core.$strip>>>;
     metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
@@ -2033,7 +2021,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
     cachedInputTokens: "cachedInputTokens";
     cacheCreationInputTokens: "cacheCreationInputTokens";
     reasoningTokens: "reasoningTokens";
-    llmLatencyMs: "llmLatencyMs";
+    llmDurationMs: "llmDurationMs";
   }>>]>>;
   apiCalls: z$1.ZodOptional<z$1.ZodObject<{
     kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -2102,7 +2090,8 @@ type LlmCallEntry = {
   cachedInputTokens: number | null;
   cacheCreationInputTokens: number | null;
   reasoningTokens: number | null;
-  totalTokens: number | null;
+  totalTokens: number | null; /** Time to first token for the LLM call in milliseconds, when reported by the span. */
+  latencyMs: number | null;
   tokensPerSecond: number | null;
   costUsd: number | null;
   inputCostUsd: number | null;
@@ -2112,8 +2101,8 @@ type LlmCallEntry = {
   reasoningCostUsd: number | null; /** Number of inference rounds. Derived from the array length when `stepDetails` is set. */
   stepCount: number | null; /** Per-step breakdown when the configured `steps` attribute resolves to an array. */
   stepDetails: unknown[] | null;
-  finishReason: string | null;
-  latencyMs: number | null;
+  finishReason: string | null; /** Elapsed LLM call span duration in milliseconds. */
+  durationMs: number | null;
   input: unknown;
   output: unknown;
   reasoning: unknown;
@@ -2127,16 +2116,22 @@ type LlmCallEntry = {
  * shape consumed by the LLM calls tab.
  *
  * Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
- * (`model`, token counts, explicit cost, etc.) are read via
+ * (`model`, token counts, latency, etc.) are read via
  * `getNestedAttribute` from the configured paths, with safe coercion to
- * `string | null` / `number | null`. When explicit USD costs are absent,
- * configured model pricing derives per-token-type costs from token counts.
- * `totalTokens` falls back to a sum of input + output + cached when no
- * explicit total attribute is present. The `steps` attribute path may resolve
- * to either a number (rendered as the inference-round count) or an array of
- * per-step detail objects (rendered as a Steps section in the body, with
- * `stepCount` derived from the array length). `latencyMs` is `null` while the
- * span is still running. User-defined `metrics` whose path resolves to
+ * `string | null` / `number | null`. `latencyMs` is an explicit
+ * time-to-first-token attribute; full span elapsed time is reported separately
+ * as `durationMs`. Built-in USD costs are derived only from configured model
+ * pricing and token counts. `totalTokens` is always derived from input +
+ * output tokens. Cached input and cache creation tokens are reported
+ * separately because they are subsets of input/output usage. The main cache
+ * creation token field is treated as the total write count; optional one-hour
+ * cache creation tokens only split that total for cost calculation. Base input
+ * cost uses input minus cache read/write tokens so cached tokens are not
+ * charged twice. Cache read/write costs still contribute to the total USD cost
+ * at their configured rates. The `steps` attribute path may resolve to an array
+ * of per-step detail objects, with `stepCount` derived from the array length.
+ * `durationMs` and `tokensPerSecond` are `null` while the span is still
+ * running. User-defined `metrics` whose path resolves to
  * `undefined` are dropped, but `null`, `0`, and `false` are preserved as
  * legitimate values worth displaying. Original span order is preserved so the
  * LLM calls tab matches the ordering in the Trace tab.
@@ -2161,8 +2156,8 @@ type ApiCallEntry = {
   status: EvalTraceSpan['status'];
   method: string | null;
   url: string | null;
-  statusCode: number | null;
-  latencyMs: number | null;
+  statusCode: number | null; /** Elapsed API call duration in milliseconds. */
+  durationMs: number | null;
   request: unknown;
   response: unknown;
   requestBody: unknown;
@@ -2179,10 +2174,10 @@ type ApiCallEntry = {
  *
  * Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
  * (`method`, `url`, `statusCode`, etc.) are read via `getNestedAttribute` from
- * the configured paths. `durationMs` takes precedence for latency, with a
- * fallback to the span start/end timestamps. User-defined `metrics` whose path
- * resolves to `undefined` are dropped, but `null`, `0`, and `false` are
- * preserved as legitimate values worth displaying. Original span order is
+ * the configured paths. An explicit `durationMs` attribute takes precedence,
+ * with a fallback to the span start/end timestamps. User-defined `metrics`
+ * whose path resolves to `undefined` are dropped, but `null`, `0`, and `false`
+ * are preserved as legitimate values worth displaying. Original span order is
  * preserved so the API calls tab matches the ordering in the Trace tab.
  */
 declare function extractApiCalls(spans: EvalTraceSpan[], config: ResolvedApiCallsConfig): ApiCallEntry[];
@@ -2784,7 +2779,7 @@ type EvalColumnOverride = {
   /**
    * Extra options for `format: 'number'`.
    *
-   * Use this to add a prefix or suffix, force a fixed number of decimal
+   * Use this to add a prefix or suffix, control minimum and maximum decimal
    * places, or switch to compact notation such as `1.2K`.
    */
   numberFormat?: NumberDisplayOptions;
@@ -2792,8 +2787,7 @@ type EvalColumnOverride = {
    * Hides the column from the runs table while keeping it available in detail
    * views and raw output data.
    */
-  hideInTable?: boolean; /** Whether the UI should allow sorting rows by this column. */
-  sortable?: boolean; /** Horizontal alignment used when rendering the column cells. */
+  hideInTable?: boolean; /** Horizontal alignment used when rendering the column cells. */
   align?: 'left' | 'center' | 'right';
   /**
    * Maximum number of stars used when `format: 'stars'`.
@@ -2954,7 +2948,8 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
    * column across the latest run's cases — `key` must match one of the eval's
    * score or column keys, and only finite numeric values participate in the
    * reduction. When no case has a numeric value for the key the stat renders
-   * an em dash. `label` and `format` default to the matching `ColumnDef`.
+   * an em dash. `label`, `format`, and `numberFormat` default to the matching
+   * `ColumnDef`.
    */
   stats?: EvalStatsConfig;
   /**

package/dist/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as setEvalOutput, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as incrementEvalOutput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as runInEvalRuntimeScope, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as nextEvalId, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as startEvalBackgroundJob, Mt as cacheModeSchema, N as getEvalTitle, Nn as repoFile, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as runInEvalScope, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as defineEval, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getEvalCaseInput, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as mergeEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as appendToEvalOutput, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalLog, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as setScopeCacheContext, jt as cacheListItemSchema, k as extractCacheHits, kn as runInExistingEvalScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vt as evalChartAxisSchema, wn as isInEvalScope, wt as evalChartTypeSchema, xn as getCurrentScope, xt as evalChartConfigSchema, yn as evalAssert, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-Cv1kiOAG.mjs";
-import { n as createRunner, t as runCli } from "./cli-weogme5U.mjs";
-import "./src-B879LZfo.mjs";
+import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as setEvalOutput, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as incrementEvalOutput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as runInEvalRuntimeScope, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as nextEvalId, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as startEvalBackgroundJob, Mt as cacheModeSchema, N as getEvalTitle, Nn as repoFile, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as runInEvalScope, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as defineEval, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getEvalCaseInput, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as mergeEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as appendToEvalOutput, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalLog, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as setScopeCacheContext, jt as cacheListItemSchema, k as extractCacheHits, kn as runInExistingEvalScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vt as evalChartAxisSchema, wn as isInEvalScope, wt as evalChartTypeSchema, xn as getCurrentScope, xt as evalChartConfigSchema, yn as evalAssert, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-D1edUDhp.mjs";
+import { n as createRunner, t as runCli } from "./cli-C0EtHhEO.mjs";
+import "./src-D-HuV8I-.mjs";
 export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };

package/dist/runChild.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, vn as configureEvalRunLogs, w as createFsCacheStore, z as runSummarySchema } from "./runOrchestration-Cv1kiOAG.mjs";
+import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, vn as configureEvalRunLogs, w as createFsCacheStore, z as runSummarySchema } from "./runOrchestration-D1edUDhp.mjs";
 import { createHash } from "node:crypto";
 import { readFile } from "node:fs/promises";
 import { relative } from "node:path";