npm - @ls-stack/agent-eval - Versions diffs - 0.19.0 → 0.21.0 - Mend

@ls-stack/agent-eval 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/{app-hAlVvT-Q.mjs → app-CmwmcUgG.mjs} +3 -3
package/dist/apps/web/dist/assets/index-EXO08yya.js +118 -0
package/dist/apps/web/dist/assets/index-r0dVFK0B.css +1 -0
package/dist/apps/web/dist/index.html +2 -2
package/dist/bin.mjs +1 -1
package/dist/{cli-3zANEAhG.mjs → cli-DumvanQI.mjs} +10 -80
package/dist/index.d.mts +131 -14
package/dist/index.mjs +4 -4
package/dist/runChild.mjs +1 -1
package/dist/{runOrchestration-BBg_VUH5.mjs → runOrchestration-zYAcAPtS.mjs} +567 -24
package/dist/{runner-DxlahWDo.mjs → runner-BcwyX9CO.mjs} +1 -1
package/dist/{runner-RmZPRz-h.mjs → runner-Dy_PECaf.mjs} +2 -2
package/dist/src-BoAJb4wC.mjs +3 -0
package/package.json +1 -1
package/skills/agent-eval/SKILL.md +35 -14
package/dist/apps/web/dist/assets/index-C761goIh.css +0 -1
package/dist/apps/web/dist/assets/index-DS552a3u.js +0 -118
package/dist/src-BC4OrajN.mjs +0 -3

package/dist/index.d.mts CHANGED Viewed

@@ -39,8 +39,9 @@ type NumberDisplayOptions = {
   /** Number notation used when rendering the value. */notation?: 'standard' | 'compact'; /** Compact style used when `notation: 'compact'` is enabled. */
   compactDisplay?: 'short' | 'long'; /** String prepended to the rendered number, such as `$`. */
   prefix?: string; /** String appended to the rendered number, such as ` ms`. */
-  suffix?: string; /** Fixed number of decimal places to render. */
-  decimalPlaces?: number;
+  suffix?: string; /** Minimum number of decimal places to render. */
+  minDecimalPlaces?: number; /** Maximum number of decimal places to render. */
+  maxDecimalPlaces?: number;
 };
 /** Schema for numeric presentation options used by number-formatted values. */
 declare const numberDisplayOptionsSchema: z$1.ZodType<NumberDisplayOptions>;
@@ -98,7 +99,6 @@ declare const columnDefSchema: z$1.ZodObject<{
   passThreshold: z$1.ZodOptional<z$1.ZodNumber>;
   maxStars: z$1.ZodOptional<z$1.ZodNumber>;
   hideInTable: z$1.ZodOptional<z$1.ZodBoolean>;
-  sortable: z$1.ZodOptional<z$1.ZodBoolean>;
   align: z$1.ZodOptional<z$1.ZodEnum<{
     left: "left";
     center: "center";
@@ -412,6 +412,7 @@ declare const evalStatItemSchema: z$1.ZodDiscriminatedUnion<[z$1.ZodObject<{
     passFail: "passFail";
     stars: "stars";
   }>>;
+  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
   accent: z$1.ZodOptional<z$1.ZodBoolean>;
 }, z$1.core.$strip>], "kind">;
 /** Single stat rendered in the EvalCard stats row. */
@@ -449,6 +450,7 @@ declare const evalStatsConfigSchema: z$1.ZodArray<z$1.ZodDiscriminatedUnion<[z$1
     passFail: "passFail";
     stars: "stars";
   }>>;
+  numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
   accent: z$1.ZodOptional<z$1.ZodBoolean>;
 }, z$1.core.$strip>], "kind">>;
 /** Ordered list of stats rendered in the EvalCard stats row. */
@@ -496,7 +498,6 @@ declare const evalSummarySchema: z$1.ZodObject<{
     passThreshold: z$1.ZodOptional<z$1.ZodNumber>;
     maxStars: z$1.ZodOptional<z$1.ZodNumber>;
     hideInTable: z$1.ZodOptional<z$1.ZodBoolean>;
-    sortable: z$1.ZodOptional<z$1.ZodBoolean>;
     align: z$1.ZodOptional<z$1.ZodEnum<{
       left: "left";
       center: "center";
@@ -544,6 +545,7 @@ declare const evalSummarySchema: z$1.ZodObject<{
       passFail: "passFail";
       stars: "stars";
     }>>;
+    numberFormat: z$1.ZodOptional<z$1.ZodType<NumberDisplayOptions, unknown, z$1.core.$ZodTypeInternals<NumberDisplayOptions, unknown>>>;
     accent: z$1.ZodOptional<z$1.ZodBoolean>;
   }, z$1.core.$strip>], "kind">>>;
   charts: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
@@ -1473,6 +1475,36 @@ declare const trialSelectionModeSchema: z$1.ZodEnum<{
 }>;
 /** Strategy used to collapse repeated trials into one stored case result. */
 type TrialSelectionMode = z$1.infer<typeof trialSelectionModeSchema>;
+/** Built-in eval-level output/column keys. */
+declare const defaultConfigKeySchema: z$1.ZodEnum<{
+  apiCalls: "apiCalls";
+  costUsd: "costUsd";
+  llmTurns: "llmTurns";
+  inputTokens: "inputTokens";
+  outputTokens: "outputTokens";
+  totalTokens: "totalTokens";
+  cachedInputTokens: "cachedInputTokens";
+  cacheCreationInputTokens: "cacheCreationInputTokens";
+  reasoningTokens: "reasoningTokens";
+  llmLatencyMs: "llmLatencyMs";
+}>;
+/** Built-in eval-level output/column key. */
+type DefaultConfigKey = z$1.infer<typeof defaultConfigKeySchema>;
+/** Removal config for built-in eval-level outputs and UI metadata. */
+declare const removeDefaultConfigSchema: z$1.ZodUnion<readonly [z$1.ZodLiteral<true>, z$1.ZodArray<z$1.ZodEnum<{
+  apiCalls: "apiCalls";
+  costUsd: "costUsd";
+  llmTurns: "llmTurns";
+  inputTokens: "inputTokens";
+  outputTokens: "outputTokens";
+  totalTokens: "totalTokens";
+  cachedInputTokens: "cachedInputTokens";
+  cacheCreationInputTokens: "cacheCreationInputTokens";
+  reasoningTokens: "reasoningTokens";
+  llmLatencyMs: "llmLatencyMs";
+}>>]>;
+/** Removal config for built-in eval-level outputs and UI metadata. */
+type RemoveDefaultConfig = z$1.infer<typeof removeDefaultConfigSchema>;
 /** Render formats supported by an LLM-call metric in the UI. */
 declare const llmCallMetricFormatSchema: z$1.ZodEnum<{
   string: "string";
@@ -1561,6 +1593,21 @@ declare const apiCallMetricSchema: z$1.ZodObject<{
 }, z$1.core.$strip>;
 /** User-defined API-call metric authored in `agent-evals.config.ts`. */
 type ApiCallMetric = z$1.infer<typeof apiCallMetricSchema>;
+/**
+ * Schema for one model/provider pricing entry used to derive LLM-call costs
+ * from token counts when a span does not already record explicit USD costs.
+ */
+declare const llmCallPricingSchema: z$1.ZodObject<{
+  model: z$1.ZodString;
+  provider: z$1.ZodOptional<z$1.ZodString>;
+  inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+  outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+  cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+  cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+  reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+}, z$1.core.$strip>;
+/** Model/provider pricing entry authored in `agent-evals.config.ts`. */
+type LlmCallPricing = z$1.infer<typeof llmCallPricingSchema>;
 /** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
 declare const llmCallsConfigSchema: z$1.ZodObject<{
   kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
@@ -1573,6 +1620,7 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
     cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
     reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
     totalTokens: z$1.ZodOptional<z$1.ZodString>;
+    tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
     cost: z$1.ZodOptional<z$1.ZodString>;
     inputCost: z$1.ZodOptional<z$1.ZodString>;
     outputCost: z$1.ZodOptional<z$1.ZodString>;
@@ -1586,6 +1634,15 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
     reasoning: z$1.ZodOptional<z$1.ZodString>;
     toolCalls: z$1.ZodOptional<z$1.ZodString>;
   }, z$1.core.$strip>>;
+  pricing: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
+    model: z$1.ZodString;
+    provider: z$1.ZodOptional<z$1.ZodString>;
+    inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+    outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+    cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+    cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+    reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+  }, z$1.core.$strip>>>;
   metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
     label: z$1.ZodString;
     tooltip: z$1.ZodOptional<z$1.ZodString>;
@@ -1659,6 +1716,7 @@ type ResolvedLlmCallsConfig = {
     cacheCreationInputTokens: string;
     reasoningTokens: string;
     totalTokens: string;
+    tokensPerSecond: string;
     cost: string;
     inputCost: string;
     outputCost: string;
@@ -1673,6 +1731,7 @@ type ResolvedLlmCallsConfig = {
     toolCalls: string;
   };
   metrics: ResolvedLlmCallMetric[];
+  pricing: ResolvedLlmCallPricing[];
 };
 /** Resolved API-calls config sent to the UI with all defaults applied. */
 type ResolvedApiCallsConfig = {
@@ -1709,6 +1768,16 @@ type ResolvedApiCallMetric = {
   numberFormat?: NumberDisplayOptions;
   placements: ApiCallMetricPlacement[];
 };
+/** Fully-resolved pricing entry used by the LLM calls extractor. */
+type ResolvedLlmCallPricing = {
+  model: string;
+  provider?: string;
+  inputUsdPerMillion?: number;
+  outputUsdPerMillion?: number;
+  cachedInputUsdPerMillion?: number;
+  cacheCreationInputUsdPerMillion?: number;
+  reasoningUsdPerMillion?: number;
+};
 /** Default LLM-calls config the UI uses before the workspace fetch resolves. */
 declare const DEFAULT_LLM_CALLS_CONFIG: ResolvedLlmCallsConfig;
 /** Default API-calls config the UI uses before the workspace fetch resolves. */
@@ -1722,6 +1791,8 @@ declare const DEFAULT_API_CALLS_CONFIG: ResolvedApiCallsConfig;
  *   attribute path.
  * - Missing `metrics[].format` defaults to `'string'`.
  * - Missing `metrics[].placements` defaults to `['body']`.
+ * - Missing `pricing` defaults to an empty registry; explicit span costs still
+ *   take precedence over derived costs.
  */
 declare function resolveLlmCallsConfig(input: LlmCallsConfigInput | undefined): ResolvedLlmCallsConfig;
 /**
@@ -1788,14 +1859,25 @@ type AgentEvalsConfig = {
    *     cachedInputTokens: 'usage.cache_read_input_tokens',
    *   },
    *   metrics: [
-   *     { label: 'Tokens/sec', path: 'tokensPerSecond', format: 'number',
-   *       numberFormat: { decimalPlaces: 1 }, placements: ['header', 'body'] },
    *     { label: 'Retries', path: 'retryCount', format: 'number' },
    *   ],
+   *   pricing: [
+   *     { model: 'gpt-4o-mini', provider: 'openai',
+   *       inputUsdPerMillion: 0.15, outputUsdPerMillion: 0.6 },
+   *   ],
    * }
    * ```
    */
   llmCalls?: LlmCallsConfigInput;
+  /**
+   * Remove built-in eval-level outputs, columns, stats, and charts.
+   *
+   * Defaults are derived from trace spans using the resolved `llmCalls` and
+   * `apiCalls` extraction configs. Set to `true` to remove all defaults, or
+   * pass specific keys such as `['costUsd', 'apiCalls']` to remove only those
+   * defaults globally. Per-eval removal is additive.
+   */
+  removeDefaultConfig?: RemoveDefaultConfig;
   /**
    * Configuration for the "API calls" tab in the case-run drawer.
    *
@@ -1902,6 +1984,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
       cacheCreationInputTokens: z$1.ZodOptional<z$1.ZodString>;
       reasoningTokens: z$1.ZodOptional<z$1.ZodString>;
       totalTokens: z$1.ZodOptional<z$1.ZodString>;
+      tokensPerSecond: z$1.ZodOptional<z$1.ZodString>;
       cost: z$1.ZodOptional<z$1.ZodString>;
       inputCost: z$1.ZodOptional<z$1.ZodString>;
       outputCost: z$1.ZodOptional<z$1.ZodString>;
@@ -1915,6 +1998,15 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
       reasoning: z$1.ZodOptional<z$1.ZodString>;
       toolCalls: z$1.ZodOptional<z$1.ZodString>;
     }, z$1.core.$strip>>;
+    pricing: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
+      model: z$1.ZodString;
+      provider: z$1.ZodOptional<z$1.ZodString>;
+      inputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+      outputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+      cachedInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+      cacheCreationInputUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+      reasoningUsdPerMillion: z$1.ZodOptional<z$1.ZodNumber>;
+    }, z$1.core.$strip>>>;
     metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
       label: z$1.ZodString;
       tooltip: z$1.ZodOptional<z$1.ZodString>;
@@ -1933,6 +2025,18 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
       }>>>;
     }, z$1.core.$strip>>>;
   }, z$1.core.$strip>>;
+  removeDefaultConfig: z$1.ZodOptional<z$1.ZodUnion<readonly [z$1.ZodLiteral<true>, z$1.ZodArray<z$1.ZodEnum<{
+    apiCalls: "apiCalls";
+    costUsd: "costUsd";
+    llmTurns: "llmTurns";
+    inputTokens: "inputTokens";
+    outputTokens: "outputTokens";
+    totalTokens: "totalTokens";
+    cachedInputTokens: "cachedInputTokens";
+    cacheCreationInputTokens: "cacheCreationInputTokens";
+    reasoningTokens: "reasoningTokens";
+    llmLatencyMs: "llmLatencyMs";
+  }>>]>>;
   apiCalls: z$1.ZodOptional<z$1.ZodObject<{
     kinds: z$1.ZodOptional<z$1.ZodArray<z$1.ZodString>>;
     attributes: z$1.ZodOptional<z$1.ZodObject<{
@@ -2001,6 +2105,7 @@ type LlmCallEntry = {
   cacheCreationInputTokens: number | null;
   reasoningTokens: number | null;
   totalTokens: number | null;
+  tokensPerSecond: number | null;
   costUsd: number | null;
   inputCostUsd: number | null;
   outputCostUsd: number | null;
@@ -2024,9 +2129,11 @@ type LlmCallEntry = {
  * shape consumed by the LLM calls tab.
  *
  * Spans whose `kind` is not in `config.kinds` are dropped. Structured fields
- * (`model`, token counts, cost, etc.) are read via `getNestedAttribute` from
- * the configured paths, with safe coercion to `string | null` / `number |
- * null`. `totalTokens` falls back to a sum of input + output + cached when no
+ * (`model`, token counts, explicit cost, etc.) are read via
+ * `getNestedAttribute` from the configured paths, with safe coercion to
+ * `string | null` / `number | null`. When explicit USD costs are absent,
+ * configured model pricing derives per-token-type costs from token counts.
+ * `totalTokens` falls back to a sum of input + output + cached when no
  * explicit total attribute is present. The `steps` attribute path may resolve
  * to either a number (rendered as the inference-round count) or an array of
  * per-step detail objects (rendered as a Steps section in the body, with
@@ -2679,7 +2786,7 @@ type EvalColumnOverride = {
   /**
    * Extra options for `format: 'number'`.
    *
-   * Use this to add a prefix or suffix, force a fixed number of decimal
+   * Use this to add a prefix or suffix, control minimum and maximum decimal
    * places, or switch to compact notation such as `1.2K`.
    */
   numberFormat?: NumberDisplayOptions;
@@ -2687,8 +2794,7 @@ type EvalColumnOverride = {
    * Hides the column from the runs table while keeping it available in detail
    * views and raw output data.
    */
-  hideInTable?: boolean; /** Whether the UI should allow sorting rows by this column. */
-  sortable?: boolean; /** Horizontal alignment used when rendering the column cells. */
+  hideInTable?: boolean; /** Horizontal alignment used when rendering the column cells. */
   align?: 'left' | 'center' | 'right';
   /**
    * Maximum number of stars used when `format: 'stars'`.
@@ -2849,7 +2955,8 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
    * column across the latest run's cases — `key` must match one of the eval's
    * score or column keys, and only finite numeric values participate in the
    * reduction. When no case has a numeric value for the key the stat renders
-   * an em dash. `label` and `format` default to the matching `ColumnDef`.
+   * an em dash. `label`, `format`, and `numberFormat` default to the matching
+   * `ColumnDef`.
    */
   stats?: EvalStatsConfig;
   /**
@@ -2867,6 +2974,16 @@ type EvalDefinitionBase<TInput = unknown, TOutputs extends EvalOutputs = EvalOut
    * with `passThreshold`.
    */
   charts?: EvalChartsConfig;
+  /**
+   * Remove built-in eval-level outputs, columns, stats, and charts.
+   *
+   * By default the runner derives usage fields from trace spans using the
+   * workspace `llmCalls` and `apiCalls` configs. Set to `true` to remove all
+   * defaults for this eval, or pass specific keys such as
+   * `['costUsd', 'apiCalls']` to remove only those defaults. Per-eval removals
+   * are combined with global removals.
+   */
+  removeDefaultConfig?: true | DefaultConfigKey[];
 };
 /**
  * Complete authored eval definition consumed by `defineEval`.
@@ -3460,4 +3577,4 @@ declare function createRunner({
  */
 declare function runCli(argv: string[]): Promise<void>;
 //#endregion
-export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallsConfigInput, type NumberDisplayOptions, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
+export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallsConfigInput, type NumberDisplayOptions, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };

package/dist/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { $ as assertionFailureSchema, $t as runArtifactRefSchema, A as getNestedAttribute, An as getEvalRegistry, At as cacheRecordingSchema, B as agentEvalsConfigSchema, Bt as traceDisplayInputConfigSchema, C as createRunRequestSchema, Cn as runInEvalScope, Ct as cacheEntrySchema, D as extractCacheHits, Dn as startEvalBackgroundJob, Dt as cacheModeSchema, E as extractCacheEntries, En as setScopeCacheContext, Et as cacheListItemSchema, F as deriveStatusFromChildStatuses, Ft as traceAttributeDisplayFormatSchema, G as llmCallMetricFormatSchema, Gt as cellValueSchema, H as apiCallMetricPlacementSchema, Ht as traceSpanKindSchema, I as runManifestSchema, It as traceAttributeDisplayInputSchema, J as llmCallsConfigSchema, Jt as columnKindSchema, K as llmCallMetricPlacementSchema, Kt as columnDefSchema, L as runSummarySchema, Lt as traceAttributeDisplayPlacementSchema, M as getEvalDisplayStatus, Mt as serializedCacheSpanSchema, N as deriveScopedSummaryFromCases, Nt as spanCacheOptionsSchema, O as extractApiCalls, On as repoFile, Ot as cacheOperationTypeSchema, P as deriveStatusFromCaseRows, Pt as traceCacheRefSchema, Q as trialSelectionModeSchema, Qt as repoFileRefSchema, R as DEFAULT_API_CALLS_CONFIG, Rt as traceAttributeDisplaySchema, Sn as runInEvalRuntimeScope, St as cacheDebugKeyFileSchema, T as sseEnvelopeSchema, Tn as setEvalOutput, Tt as cacheFileSchema, U as apiCallMetricSchema, Ut as traceSpanSchema, V as apiCallMetricFormatSchema, Vt as traceSpanErrorSchema, W as apiCallsConfigSchema, Wt as traceSpanWarningSchema, X as resolveLlmCallsConfig, Xt as jsonCellSchema, Y as resolveApiCallsConfig, Yt as fileRefSchema, Z as runLogsConfigSchema, Zt as numberDisplayOptionsSchema, _n as getEvalCaseInput, _t as evalChartMetricSchema, an as hashCacheKey, at as evalStatsConfigSchema, bn as mergeEvalOutput, bt as evalChartsConfigSchema, cn as deserializeCacheValue, ct as runLogLevelSchema, dn as EvalAssertionError, dt as scoreTraceSchema, en as z, et as caseDetailSchema, fn as appendToEvalOutput, ft as evalChartAggregateSchema, gn as getCurrentScope, gt as evalChartConfigSchema, hn as evalLog, ht as evalChartColorSchema, in as evalTracer, it as evalStatItemSchema, j as getEvalTitle, jt as cacheStatusSchema, k as extractLlmCalls, kn as defineEval, kt as cacheRecordingOpSchema, ln as serializeCacheRecording, lt as runLogLocationSchema, mn as evalAssert, mt as evalChartBuiltinMetricSchema, nn as captureEvalSpanError, nt as evalFreshnessStatusSchema, on as hashCacheKeySync, ot as evalSummarySchema, pt as evalChartAxisSchema, q as llmCallMetricSchema, qt as columnFormatSchema, rn as evalSpan, rt as evalStatAggregateSchema, sn as deserializeCacheRecording, st as runLogEntrySchema, tn as buildTraceTree, tt as caseRowSchema, un as serializeCacheValue, ut as runLogPhaseSchema, vn as incrementEvalOutput, vt as evalChartTooltipExtraSchema, w as updateManualScoreRequestSchema, wn as runInExistingEvalScope, wt as cacheEntryWithDebugKeySchema, xn as nextEvalId, xt as cacheDebugKeyEntrySchema, yn as isInEvalScope, yt as evalChartTypeSchema, z as DEFAULT_LLM_CALLS_CONFIG, zt as traceDisplayConfigSchema } from "./runOrchestration-BBg_VUH5.mjs";
-import { n as createRunner, t as runCli } from "./cli-3zANEAhG.mjs";
-import "./src-BC4OrajN.mjs";
-export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
+import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as setEvalOutput, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as incrementEvalOutput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as runInEvalRuntimeScope, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as nextEvalId, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as getEvalRegistry, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as startEvalBackgroundJob, Mt as cacheModeSchema, N as getEvalTitle, Nn as repoFile, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as runInEvalScope, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as defineEval, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getEvalCaseInput, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as mergeEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as appendToEvalOutput, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalLog, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as setScopeCacheContext, jt as cacheListItemSchema, k as extractCacheHits, kn as runInExistingEvalScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vt as evalChartAxisSchema, wn as isInEvalScope, wt as evalChartTypeSchema, xn as getCurrentScope, xt as evalChartConfigSchema, yn as evalAssert, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-zYAcAPtS.mjs";
+import { n as createRunner, t as runCli } from "./cli-DumvanQI.mjs";
+import "./src-BoAJb4wC.mjs";
+export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };

package/dist/runChild.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { C as createRunRequestSchema, I as runManifestSchema, Kt as columnDefSchema, L as runSummarySchema, S as createFsCacheStore, at as evalStatsConfigSchema, bt as evalChartsConfigSchema, pn as configureEvalRunLogs, t as executeRun, v as parseEvalMetas, y as loadConfig } from "./runOrchestration-BBg_VUH5.mjs";
+import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, vn as configureEvalRunLogs, w as createFsCacheStore, z as runSummarySchema } from "./runOrchestration-zYAcAPtS.mjs";
 import { createHash } from "node:crypto";
 import { readFile } from "node:fs/promises";
 import { relative } from "node:path";