@ls-stack/agent-eval 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@
25
25
  href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
26
26
  rel="stylesheet"
27
27
  />
28
- <script type="module" crossorigin src="/assets/index-AUDD3rNB.js"></script>
28
+ <script type="module" crossorigin src="/assets/index-KbbX3NYr.js"></script>
29
29
  <link rel="stylesheet" crossorigin href="/assets/index-r0dVFK0B.css">
30
30
  </head>
31
31
  <body>
package/dist/bin.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import { t as runCli } from "./cli-D3QNOcPN.mjs";
2
+ import { t as runCli } from "./cli-Be0x8CS3.mjs";
3
3
  import { spawn } from "node:child_process";
4
4
  //#region src/bin.ts
5
5
  const moduleMocksFlag = "--experimental-test-module-mocks";
@@ -1,4 +1,4 @@
1
- import { C as validateCharts, F as deriveScopedSummaryFromCases, Ln as getEvalRegistry, N as getEvalTitle, P as getEvalDisplayStatus, S as normalizeScoreDef, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as loadConfig, c as persistCaseDetail, d as recomputePersistedCaseStatus, et as resolveApiCallsConfig, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, tt as resolveLlmCallsConfig, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, w as createFsCacheStore, x as buildDeclaredColumnDefs, y as resolveEvalDefaultConfig, z as runSummarySchema } from "./runOrchestration-CimthgI7.mjs";
1
+ import { B as runSummarySchema, C as validateCharts, F as getEvalDisplayStatus, I as deriveScopedSummaryFromCases, P as getEvalTitle, Rn as getEvalRegistry, S as normalizeScoreDef, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as loadConfig, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, l as persistRunState, m as buildEvalSummary, n as generateRunId, nt as resolveLlmCallsConfig, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, tt as resolveApiCallsConfig, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, w as createFsCacheStore, x as buildDeclaredColumnDefs, y as resolveEvalDefaultConfig } from "./runOrchestration-D697g6Qe.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
4
4
  import { dirname, join, relative, resolve } from "node:path";
@@ -959,8 +959,8 @@ async function commandApp(args) {
959
959
  const { serve } = await import("@hono/node-server");
960
960
  const bundledWebDist = resolve(currentDir, "apps/web/dist");
961
961
  if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
962
- const appModule = await import("./app-Cw79dJDr.mjs");
963
- const runnerModule = await import("./runner-B-SYzW8w.mjs");
962
+ const appModule = await import("./app-DYRmucgj.mjs");
963
+ const runnerModule = await import("./runner-jSujaSKt.mjs");
964
964
  if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
965
965
  if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
966
966
  await runnerModule.initRunner();
package/dist/index.d.mts CHANGED
@@ -1539,13 +1539,35 @@ declare const apiCallMetricPlacementSchema: z$1.ZodEnum<{
1539
1539
  }>;
1540
1540
  /** Placement option for an API-call metric. */
1541
1541
  type ApiCallMetricPlacement = z$1.infer<typeof apiCallMetricPlacementSchema>;
1542
+ /** Context passed to an LLM/API-call derived attribute function. */
1543
+ type CallDerivedAttributeContext = {
1544
+ /** Raw attributes from the matching trace span. */attributes: Record<string, unknown> | undefined; /** Matching trace span. */
1545
+ span: EvalTraceSpan; /** Dot-path helper for reading from `span.attributes`. */
1546
+ get: (path: string) => unknown;
1547
+ };
1548
+ /**
1549
+ * Runner-side function used to derive one new span attribute from a matching
1550
+ * LLM/API-call span. Return `undefined` to omit the attribute for that span.
1551
+ */
1552
+ type CallDerivedAttribute = (ctx: CallDerivedAttributeContext) => unknown;
1553
+ /** One resolved derived span attribute rule. */
1554
+ type ResolvedCallDerivedAttribute = {
1555
+ /** Dot-path where the derived value is persisted on `span.attributes`. */path: string;
1556
+ /**
1557
+ * Function that derives the persisted value for each matching span. Omitted
1558
+ * after this config is serialized to the browser.
1559
+ */
1560
+ compute?: CallDerivedAttribute;
1561
+ };
1542
1562
  /**
1543
1563
  * Schema for a single user-defined metric attached to LLM call rows.
1544
1564
  *
1545
1565
  * Each metric reads `path` from the span's `attributes` and renders the value
1546
- * with the configured `format` and `numberFormat`. `placements` controls
1547
- * whether the metric appears as a chip on the collapsed row header, as a row
1548
- * inside the expanded body, or both. Defaults to `['body']` when omitted.
1566
+ * with the configured `format` and `numberFormat`. Use
1567
+ * `llmCalls.derivedAttributes` when a metric should read a value computed from
1568
+ * other attributes. `placements` controls whether the metric appears as a chip
1569
+ * on the collapsed row header, as a row inside the expanded body, or both.
1570
+ * Defaults to `['body']` when omitted.
1549
1571
  */
1550
1572
  declare const llmCallMetricSchema: z$1.ZodObject<{
1551
1573
  label: z$1.ZodString;
@@ -1570,9 +1592,11 @@ type LlmCallMetric = z$1.infer<typeof llmCallMetricSchema>;
1570
1592
  * Schema for a single user-defined metric attached to API call rows.
1571
1593
  *
1572
1594
  * Each metric reads `path` from the span's `attributes` and renders the value
1573
- * with the configured `format` and `numberFormat`. `placements` controls
1574
- * whether the metric appears as a chip on the collapsed row header, as a row
1575
- * inside the expanded body, or both. Defaults to `['body']` when omitted.
1595
+ * with the configured `format` and `numberFormat`. Use
1596
+ * `apiCalls.derivedAttributes` when a metric should read a value computed from
1597
+ * other attributes. `placements` controls whether the metric appears as a chip
1598
+ * on the collapsed row header, as a row inside the expanded body, or both.
1599
+ * Defaults to `['body']` when omitted.
1576
1600
  */
1577
1601
  declare const apiCallMetricSchema: z$1.ZodObject<{
1578
1602
  label: z$1.ZodString;
@@ -1629,6 +1653,7 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
1629
1653
  reasoning: z$1.ZodOptional<z$1.ZodString>;
1630
1654
  toolCalls: z$1.ZodOptional<z$1.ZodString>;
1631
1655
  }, z$1.core.$strip>>;
1656
+ derivedAttributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodCustom<CallDerivedAttribute, CallDerivedAttribute>>>;
1632
1657
  pricing: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
1633
1658
  model: z$1.ZodString;
1634
1659
  provider: z$1.ZodOptional<z$1.ZodString>;
@@ -1674,6 +1699,7 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
1674
1699
  durationMs: z$1.ZodOptional<z$1.ZodString>;
1675
1700
  error: z$1.ZodOptional<z$1.ZodString>;
1676
1701
  }, z$1.core.$strip>>;
1702
+ derivedAttributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodCustom<CallDerivedAttribute, CallDerivedAttribute>>>;
1677
1703
  metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
1678
1704
  label: z$1.ZodString;
1679
1705
  tooltip: z$1.ZodOptional<z$1.ZodString>;
@@ -1720,6 +1746,7 @@ type ResolvedLlmCallsConfig = {
1720
1746
  reasoning: string;
1721
1747
  toolCalls: string;
1722
1748
  };
1749
+ derivedAttributes: ResolvedCallDerivedAttribute[];
1723
1750
  metrics: ResolvedLlmCallMetric[];
1724
1751
  pricing: ResolvedLlmCallPricing[];
1725
1752
  };
@@ -1738,6 +1765,7 @@ type ResolvedApiCallsConfig = {
1738
1765
  durationMs: string;
1739
1766
  error: string;
1740
1767
  };
1768
+ derivedAttributes: ResolvedCallDerivedAttribute[];
1741
1769
  metrics: ResolvedApiCallMetric[];
1742
1770
  };
1743
1771
  /** Fully-resolved LLM-call metric used by the runner and UI. */
@@ -1983,6 +2011,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
1983
2011
  reasoning: z$1.ZodOptional<z$1.ZodString>;
1984
2012
  toolCalls: z$1.ZodOptional<z$1.ZodString>;
1985
2013
  }, z$1.core.$strip>>;
2014
+ derivedAttributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodCustom<CallDerivedAttribute, CallDerivedAttribute>>>;
1986
2015
  pricing: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
1987
2016
  model: z$1.ZodString;
1988
2017
  provider: z$1.ZodOptional<z$1.ZodString>;
@@ -2037,6 +2066,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
2037
2066
  durationMs: z$1.ZodOptional<z$1.ZodString>;
2038
2067
  error: z$1.ZodOptional<z$1.ZodString>;
2039
2068
  }, z$1.core.$strip>>;
2069
+ derivedAttributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodCustom<CallDerivedAttribute, CallDerivedAttribute>>>;
2040
2070
  metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
2041
2071
  label: z$1.ZodString;
2042
2072
  tooltip: z$1.ZodOptional<z$1.ZodString>;
@@ -2067,6 +2097,20 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
2067
2097
  }, z$1.core.$strip>>;
2068
2098
  }, z$1.core.$strip>;
2069
2099
  //#endregion
2100
+ //#region ../shared/src/utils/deriveCallAttributes.d.ts
2101
+ /**
2102
+ * Persist configured derived attributes onto matching LLM/API spans.
2103
+ *
2104
+ * These derived attributes are applied before trace consumers run, so
2105
+ * `deriveFromTracing`, default usage extraction, trace display, and call
2106
+ * metrics can all read them by normal dot-path lookup.
2107
+ */
2108
+ declare function applyDerivedCallAttributes(params: {
2109
+ spans: EvalTraceSpan[];
2110
+ llmCallsConfig: ResolvedLlmCallsConfig;
2111
+ apiCallsConfig: ResolvedApiCallsConfig;
2112
+ }): EvalTraceSpan[];
2113
+ //#endregion
2070
2114
  //#region ../shared/src/utils/extractLlmCalls.d.ts
2071
2115
  /** Resolved value for one user-defined metric on an LLM call row. */
2072
2116
  type LlmCallMetricValue = {
@@ -3623,4 +3667,4 @@ declare function createRunner({
3623
3667
  */
3624
3668
  declare function runCli(argv: string[]): Promise<void>;
3625
3669
  //#endregion
3626
- export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallsConfigInput, type NumberDisplayOptions, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
3670
+ export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallsConfigInput, type NumberDisplayOptions, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as runInEvalScope, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as getEvalCaseInput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as mergeEvalOutput, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as isInEvalScope, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as repoFile, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, In as defineEval, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Ln as getEvalRegistry, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as setEvalOutput, Mt as cacheModeSchema, N as getEvalTitle, Nn as setScopeCacheContext, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as nextEvalId, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as startEvalBackgroundJob, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getCurrentScope, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as incrementEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as advanceEvalTime, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalAssert, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as runInExistingEvalScope, jt as cacheListItemSchema, k as extractCacheHits, kn as runInEvalRuntimeScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vn as appendToEvalOutput, vt as evalChartAxisSchema, wn as getEvalStartTime, wt as evalChartTypeSchema, xn as evalLog, xt as evalChartConfigSchema, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-CimthgI7.mjs";
2
- import { n as createRunner, t as runCli } from "./cli-D3QNOcPN.mjs";
3
- import "./src-CcXfWT4M.mjs";
4
- export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
1
+ import { $ as llmCallsConfigSchema, $t as columnFormatSchema, A as extractApiCalls, An as runInEvalRuntimeScope, At as cacheEntryWithDebugKeySchema, B as runSummarySchema, Bt as traceCacheRefSchema, Cn as getCurrentScope, Ct as evalChartMetricSchema, D as sseEnvelopeSchema, Dn as isInEvalScope, Dt as cacheDebugKeyEntrySchema, E as updateManualScoreRequestSchema, En as incrementEvalOutput, Et as evalChartsConfigSchema, F as getEvalDisplayStatus, Fn as startEvalBackgroundJob, Ft as cacheRecordingOpSchema, G as apiCallMetricPlacementSchema, Gt as traceDisplayConfigSchema, H as DEFAULT_LLM_CALLS_CONFIG, Ht as traceAttributeDisplayInputSchema, I as deriveScopedSummaryFromCases, In as repoFile, It as cacheRecordingSchema, J as defaultConfigKeySchema, Jt as traceSpanKindSchema, K as apiCallMetricSchema, Kt as traceDisplayInputConfigSchema, L as deriveStatusFromCaseRows, Ln as defineEval, Lt as cacheStatusSchema, M as applyDerivedCallAttributes, Mn as runInExistingEvalScope, Mt as cacheListItemSchema, N as getNestedAttribute, Nn as setEvalOutput, Nt as cacheModeSchema, O as extractCacheEntries, On as mergeEvalOutput, Ot as cacheDebugKeyFileSchema, P as getEvalTitle, Pn as setScopeCacheContext, Pt as cacheOperationTypeSchema, Q as llmCallPricingSchema, Qt as columnDefSchema, R as deriveStatusFromChildStatuses, Rn as getEvalRegistry, Rt as serializedCacheSpanSchema, Sn as evalLog, St as evalChartConfigSchema, T as createRunRequestSchema, Tn as getEvalStartTime, Tt as evalChartTypeSchema, U as agentEvalsConfigSchema, Ut as traceAttributeDisplayPlacementSchema, V as DEFAULT_API_CALLS_CONFIG, Vt as traceAttributeDisplayFormatSchema, W as apiCallMetricFormatSchema, Wt as traceAttributeDisplaySchema, X as llmCallMetricPlacementSchema, Xt as traceSpanWarningSchema, Y as llmCallMetricFormatSchema, Yt as traceSpanSchema, Z as llmCallMetricSchema, Zt as cellValueSchema, _n as EvalAssertionError, _t as scoreTraceSchema, an as runArtifactRefSchema, at as assertionFailureSchema, bt as evalChartBuiltinMetricSchema, cn as captureEvalSpanError, ct as evalFreshnessStatusSchema, dn as hashCacheKey, dt as evalStatsConfigSchema, en as columnKindSchema, et as removeDefaultConfigSchema, fn as hashCacheKeySync, ft as evalSummarySchema, gn as serializeCacheValue, gt as runLogPhaseSchema, hn as serializeCacheRecording, ht as runLogLocationSchema, in as repoFileRefSchema, it as trialSelectionModeSchema, j as extractLlmCalls, jn as runInEvalScope, jt as cacheFileSchema, k as extractCacheHits, kn as nextEvalId, kt as cacheEntrySchema, ln as evalSpan, lt as evalStatAggregateSchema, mn as deserializeCacheValue, mt as runLogLevelSchema, nn as jsonCellSchema, nt as resolveLlmCallsConfig, on as z, ot as caseDetailSchema, pn as deserializeCacheRecording, pt as runLogEntrySchema, q as apiCallsConfigSchema, qt as traceSpanErrorSchema, rn as numberDisplayOptionsSchema, rt as runLogsConfigSchema, sn as buildTraceTree, st as caseRowSchema, tn as fileRefSchema, tt as resolveApiCallsConfig, un as evalTracer, ut as evalStatItemSchema, vn as advanceEvalTime, vt as evalChartAggregateSchema, wn as getEvalCaseInput, wt as evalChartTooltipExtraSchema, xn as evalAssert, xt as evalChartColorSchema, yn as appendToEvalOutput, yt as evalChartAxisSchema, z as runManifestSchema, zt as spanCacheOptionsSchema } from "./runOrchestration-D697g6Qe.mjs";
2
+ import { n as createRunner, t as runCli } from "./cli-Be0x8CS3.mjs";
3
+ import "./src-D6cettg0.mjs";
4
+ export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, w as createFsCacheStore, yn as configureEvalRunLogs, z as runSummarySchema } from "./runOrchestration-CimthgI7.mjs";
1
+ import { B as runSummarySchema, Et as evalChartsConfigSchema, Qt as columnDefSchema, T as createRunRequestSchema, b as loadConfig, bn as configureEvalRunLogs, dt as evalStatsConfigSchema, t as executeRun, v as parseEvalMetas, w as createFsCacheStore, z as runManifestSchema } from "./runOrchestration-D697g6Qe.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";
@@ -1330,7 +1330,7 @@ const errorCoreFields = new Set([
1330
1330
  "stack",
1331
1331
  "capturedAt"
1332
1332
  ]);
1333
- function isRecord$4(value) {
1333
+ function isRecord$5(value) {
1334
1334
  return typeof value === "object" && value !== null && !Array.isArray(value);
1335
1335
  }
1336
1336
  function formatUnknownErrorMessage(error) {
@@ -1358,7 +1358,7 @@ function normalizeTraceError(error, capturedAt = void 0) {
1358
1358
  stack: error.stack,
1359
1359
  capturedAt
1360
1360
  };
1361
- if (isRecord$4(error)) {
1361
+ if (isRecord$5(error)) {
1362
1362
  const extraFields = getErrorExtraFields(error);
1363
1363
  const name = typeof error.name === "string" ? error.name : void 0;
1364
1364
  const stack = typeof error.stack === "string" ? error.stack : void 0;
@@ -1383,7 +1383,7 @@ function normalizeTraceWarnings(warningOrWarnings, additionalWarnings, capturedA
1383
1383
  return (additionalWarnings.length > 0 ? [warningOrWarnings, ...additionalWarnings] : Array.isArray(warningOrWarnings) ? warningOrWarnings : [warningOrWarnings]).map((warning) => normalizeTraceError(warning, capturedAt));
1384
1384
  }
1385
1385
  function isCaptureEvalSpanErrorOptions(value) {
1386
- if (!isRecord$4(value)) return false;
1386
+ if (!isRecord$5(value)) return false;
1387
1387
  const keys = Object.keys(value);
1388
1388
  if (keys.length === 0) return false;
1389
1389
  if (!keys.every((key) => key === "level")) return false;
@@ -2727,13 +2727,16 @@ const apiCallMetricFormatSchema = llmCallMetricFormatSchema;
2727
2727
  const llmCallMetricPlacementSchema = z.enum(["header", "body"]);
2728
2728
  /** Where an API-call metric is rendered inside the API calls tab. */
2729
2729
  const apiCallMetricPlacementSchema = llmCallMetricPlacementSchema;
2730
+ const callDerivedAttributeSchema = z.custom((value) => typeof value === "function", { message: "Expected a derived attribute function" });
2730
2731
  /**
2731
2732
  * Schema for a single user-defined metric attached to LLM call rows.
2732
2733
  *
2733
2734
  * Each metric reads `path` from the span's `attributes` and renders the value
2734
- * with the configured `format` and `numberFormat`. `placements` controls
2735
- * whether the metric appears as a chip on the collapsed row header, as a row
2736
- * inside the expanded body, or both. Defaults to `['body']` when omitted.
2735
+ * with the configured `format` and `numberFormat`. Use
2736
+ * `llmCalls.derivedAttributes` when a metric should read a value computed from
2737
+ * other attributes. `placements` controls whether the metric appears as a chip
2738
+ * on the collapsed row header, as a row inside the expanded body, or both.
2739
+ * Defaults to `['body']` when omitted.
2737
2740
  */
2738
2741
  const llmCallMetricSchema = z.object({
2739
2742
  /** Display label for the metric row or header chip. */
@@ -2760,9 +2763,11 @@ const llmCallMetricSchema = z.object({
2760
2763
  * Schema for a single user-defined metric attached to API call rows.
2761
2764
  *
2762
2765
  * Each metric reads `path` from the span's `attributes` and renders the value
2763
- * with the configured `format` and `numberFormat`. `placements` controls
2764
- * whether the metric appears as a chip on the collapsed row header, as a row
2765
- * inside the expanded body, or both. Defaults to `['body']` when omitted.
2766
+ * with the configured `format` and `numberFormat`. Use
2767
+ * `apiCalls.derivedAttributes` when a metric should read a value computed from
2768
+ * other attributes. `placements` controls whether the metric appears as a chip
2769
+ * on the collapsed row header, as a row inside the expanded body, or both.
2770
+ * Defaults to `['body']` when omitted.
2766
2771
  */
2767
2772
  const apiCallMetricSchema = z.object({
2768
2773
  /** Display label for the metric row or header chip. */
@@ -2839,6 +2844,13 @@ const llmCallsConfigSchema = z.object({
2839
2844
  toolCalls: z.string().optional()
2840
2845
  }).optional(),
2841
2846
  /**
2847
+ * Derived attributes persisted onto every matching LLM span before
2848
+ * `deriveFromTracing`, default outputs, trace display, and call metrics read
2849
+ * the trace. Keys are dot-paths under `span.attributes`; return `undefined`
2850
+ * to skip writing the attribute for one span.
2851
+ */
2852
+ derivedAttributes: z.record(z.string().min(1), callDerivedAttributeSchema).optional(),
2853
+ /**
2842
2854
  * Model/provider pricing registry used to calculate LLM-call costs from
2843
2855
  * token counts. Built-in LLM cost fields are only derived from this registry.
2844
2856
  */
@@ -2867,6 +2879,13 @@ const apiCallsConfigSchema = z.object({
2867
2879
  durationMs: z.string().optional(),
2868
2880
  error: z.string().optional()
2869
2881
  }).optional(),
2882
+ /**
2883
+ * Derived attributes persisted onto every matching API span before trace
2884
+ * display and call metrics read the trace. Keys are dot-paths under
2885
+ * `span.attributes`; return `undefined` to skip writing the attribute for
2886
+ * one span.
2887
+ */
2888
+ derivedAttributes: z.record(z.string().min(1), callDerivedAttributeSchema).optional(),
2870
2889
  /** Custom user-defined metrics surfaced on each API call. */
2871
2890
  metrics: z.array(apiCallMetricSchema).optional()
2872
2891
  });
@@ -2898,6 +2917,7 @@ const DEFAULT_LLM_CALLS_CONFIG = {
2898
2917
  reasoning: "reasoning",
2899
2918
  toolCalls: "toolCalls"
2900
2919
  },
2920
+ derivedAttributes: [],
2901
2921
  metrics: [],
2902
2922
  pricing: []
2903
2923
  };
@@ -2921,8 +2941,35 @@ const DEFAULT_API_CALLS_CONFIG = {
2921
2941
  durationMs: "durationMs",
2922
2942
  error: "error"
2923
2943
  },
2944
+ derivedAttributes: [],
2924
2945
  metrics: []
2925
2946
  };
2947
+ function resolveDerivedAttributes(input) {
2948
+ return Object.entries(input ?? {}).map(([path, compute]) => ({
2949
+ path,
2950
+ compute
2951
+ }));
2952
+ }
2953
+ function resolveLlmCallMetric(metric) {
2954
+ return {
2955
+ label: metric.label,
2956
+ tooltip: metric.tooltip,
2957
+ path: metric.path,
2958
+ format: metric.format ?? "string",
2959
+ numberFormat: metric.numberFormat,
2960
+ placements: metric.placements ? [...metric.placements] : ["body"]
2961
+ };
2962
+ }
2963
+ function resolveApiCallMetric(metric) {
2964
+ return {
2965
+ label: metric.label,
2966
+ tooltip: metric.tooltip,
2967
+ path: metric.path,
2968
+ format: metric.format ?? "string",
2969
+ numberFormat: metric.numberFormat,
2970
+ placements: metric.placements ? [...metric.placements] : ["body"]
2971
+ };
2972
+ }
2926
2973
  /**
2927
2974
  * Resolve the user-authored LLM-calls config to a fully-defaulted shape used
2928
2975
  * by the UI to derive the LLM calls tab.
@@ -2942,14 +2989,8 @@ function resolveLlmCallsConfig(input) {
2942
2989
  ...DEFAULT_LLM_CALLS_CONFIG.attributes,
2943
2990
  ...input?.attributes
2944
2991
  },
2945
- metrics: (input?.metrics ?? []).map((m) => ({
2946
- label: m.label,
2947
- tooltip: m.tooltip,
2948
- path: m.path,
2949
- format: m.format ?? "string",
2950
- numberFormat: m.numberFormat,
2951
- placements: m.placements ? [...m.placements] : ["body"]
2952
- })),
2992
+ derivedAttributes: resolveDerivedAttributes(input?.derivedAttributes),
2993
+ metrics: (input?.metrics ?? []).map(resolveLlmCallMetric),
2953
2994
  pricing: (input?.pricing ?? []).map((p) => ({
2954
2995
  model: p.model,
2955
2996
  provider: p.provider,
@@ -2979,14 +3020,8 @@ function resolveApiCallsConfig(input) {
2979
3020
  ...DEFAULT_API_CALLS_CONFIG.attributes,
2980
3021
  ...input?.attributes
2981
3022
  },
2982
- metrics: (input?.metrics ?? []).map((m) => ({
2983
- label: m.label,
2984
- tooltip: m.tooltip,
2985
- path: m.path,
2986
- format: m.format ?? "string",
2987
- numberFormat: m.numberFormat,
2988
- placements: m.placements ? [...m.placements] : ["body"]
2989
- }))
3023
+ derivedAttributes: resolveDerivedAttributes(input?.derivedAttributes),
3024
+ metrics: (input?.metrics ?? []).map(resolveApiCallMetric)
2990
3025
  };
2991
3026
  }
2992
3027
  /** Zod schema for validating `agent-evals.config.ts` input. */
@@ -3206,7 +3241,7 @@ function getEvalTitle(evalLike) {
3206
3241
  }
3207
3242
  //#endregion
3208
3243
  //#region ../shared/src/utils/getNestedAttribute.ts
3209
- function isRecord$3(value) {
3244
+ function isRecord$4(value) {
3210
3245
  return typeof value === "object" && value !== null;
3211
3246
  }
3212
3247
  /**
@@ -3221,12 +3256,84 @@ function getNestedAttribute(value, path) {
3221
3256
  const parts = path.split(".");
3222
3257
  let current = value;
3223
3258
  for (const part of parts) {
3224
- if (!isRecord$3(current) || !(part in current)) return;
3259
+ if (!isRecord$4(current) || !(part in current)) return;
3225
3260
  current = current[part];
3226
3261
  }
3227
3262
  return current;
3228
3263
  }
3229
3264
  //#endregion
3265
+ //#region ../shared/src/utils/deriveCallAttributes.ts
3266
+ function isRecord$3(value) {
3267
+ return typeof value === "object" && value !== null;
3268
+ }
3269
+ function mergeNestedAttribute$1(value, path, attributeValue) {
3270
+ const root = value === void 0 ? {} : { ...value };
3271
+ const parts = path.split(".");
3272
+ let current = root;
3273
+ for (const [index, part] of parts.entries()) {
3274
+ if (index === parts.length - 1) {
3275
+ current[part] = attributeValue;
3276
+ continue;
3277
+ }
3278
+ const nextValue = current[part];
3279
+ const nextRecord = isRecord$3(nextValue) ? { ...nextValue } : {};
3280
+ current[part] = nextRecord;
3281
+ current = nextRecord;
3282
+ }
3283
+ return root;
3284
+ }
3285
+ function applyDerivedAttributesForKind(params) {
3286
+ let attributes = params.span.attributes;
3287
+ for (const derivedAttribute of params.derivedAttributes) {
3288
+ if (derivedAttribute.compute === void 0) continue;
3289
+ const span = {
3290
+ ...params.span,
3291
+ attributes
3292
+ };
3293
+ const value = (() => {
3294
+ try {
3295
+ return derivedAttribute.compute({
3296
+ attributes,
3297
+ span,
3298
+ get: (path) => getNestedAttribute(attributes, path)
3299
+ });
3300
+ } catch {
3301
+ return;
3302
+ }
3303
+ })();
3304
+ if (value === void 0) continue;
3305
+ attributes = mergeNestedAttribute$1(attributes, derivedAttribute.path, value);
3306
+ }
3307
+ if (attributes === params.span.attributes) return params.span;
3308
+ return {
3309
+ ...params.span,
3310
+ attributes
3311
+ };
3312
+ }
3313
+ /**
3314
+ * Persist configured derived attributes onto matching LLM/API spans.
3315
+ *
3316
+ * These derived attributes are applied before trace consumers run, so
3317
+ * `deriveFromTracing`, default usage extraction, trace display, and call
3318
+ * metrics can all read them by normal dot-path lookup.
3319
+ */
3320
+ function applyDerivedCallAttributes(params) {
3321
+ const llmKinds = new Set(params.llmCallsConfig.kinds);
3322
+ const apiKinds = new Set(params.apiCallsConfig.kinds);
3323
+ return params.spans.map((span) => {
3324
+ let nextSpan = span;
3325
+ if (llmKinds.has(span.kind)) nextSpan = applyDerivedAttributesForKind({
3326
+ span: nextSpan,
3327
+ derivedAttributes: params.llmCallsConfig.derivedAttributes
3328
+ });
3329
+ if (apiKinds.has(span.kind)) nextSpan = applyDerivedAttributesForKind({
3330
+ span: nextSpan,
3331
+ derivedAttributes: params.apiCallsConfig.derivedAttributes
3332
+ });
3333
+ return nextSpan;
3334
+ });
3335
+ }
3336
+ //#endregion
3230
3337
  //#region ../shared/src/utils/extractLlmCalls.ts
3231
3338
  function readNumber$2(attributes, path) {
3232
3339
  const raw = getNestedAttribute(attributes, path);
@@ -5361,7 +5468,12 @@ async function runCase(params) {
5361
5468
  startTime: evalDef.startTime,
5362
5469
  freezeTime: evalDef.freezeTime
5363
5470
  });
5364
- const traceTree = buildTraceTree(scope.spans, scope.checkpoints);
5471
+ const spansWithDerivedAttributes = applyDerivedCallAttributes({
5472
+ spans: scope.spans,
5473
+ llmCallsConfig,
5474
+ apiCallsConfig
5475
+ });
5476
+ const traceTree = buildTraceTree(spansWithDerivedAttributes, scope.checkpoints);
5365
5477
  const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
5366
5478
  if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) scope.assertionFailures.push(toAssertionFailure(executeError.message, executeError));
5367
5479
  if (!nonAssertError && evalDef.deriveFromTracing) {
@@ -5383,7 +5495,7 @@ async function runCase(params) {
5383
5495
  }
5384
5496
  if (!nonAssertError) addDefaultOutputs({
5385
5497
  outputs: scope.outputs,
5386
- spans: scope.spans,
5498
+ spans: spansWithDerivedAttributes,
5387
5499
  llmCallsConfig,
5388
5500
  apiCallsConfig,
5389
5501
  globalRemove: globalRemoveDefaultConfig,
@@ -5471,7 +5583,7 @@ async function runCase(params) {
5471
5583
  }
5472
5584
  }
5473
5585
  const status = nonAssertError ? "error" : passed ? "pass" : "fail";
5474
- const { trace: displayTrace, traceDisplay } = resolveTracePresentation(scope.spans, globalTraceDisplay, evalDef.traceDisplay);
5586
+ const { trace: displayTrace, traceDisplay } = resolveTracePresentation(spansWithDerivedAttributes, globalTraceDisplay, evalDef.traceDisplay);
5475
5587
  const columns = {};
5476
5588
  const columnOverrides = mergeDefaultColumns({
5477
5589
  columns: evalDef.columns,
@@ -5938,4 +6050,4 @@ function toLastRunStatus(status) {
5938
6050
  return status === "pending" ? null : status;
5939
6051
  }
5940
6052
  //#endregion
5941
- export { removeDefaultConfigSchema as $, columnKindSchema as $t, extractApiCalls as A, runInEvalScope as An, cacheFileSchema as At, DEFAULT_API_CALLS_CONFIG as B, traceAttributeDisplayFormatSchema as Bt, validateCharts as C, getEvalCaseInput as Cn, evalChartTooltipExtraSchema as Ct, sseEnvelopeSchema as D, mergeEvalOutput as Dn, cacheDebugKeyFileSchema as Dt, updateManualScoreRequestSchema as E, isInEvalScope as En, cacheDebugKeyEntrySchema as Et, deriveScopedSummaryFromCases as F, repoFile as Fn, cacheRecordingSchema as Ft, apiCallMetricSchema as G, traceDisplayInputConfigSchema as Gt, agentEvalsConfigSchema as H, traceAttributeDisplayPlacementSchema as Ht, deriveStatusFromCaseRows as I, defineEval as In, cacheStatusSchema as It, llmCallMetricFormatSchema as J, traceSpanSchema as Jt, apiCallsConfigSchema as K, traceSpanErrorSchema as Kt, deriveStatusFromChildStatuses as L, getEvalRegistry as Ln, serializedCacheSpanSchema as Lt, getNestedAttribute as M, setEvalOutput as Mn, cacheModeSchema as Mt, getEvalTitle as N, setScopeCacheContext as Nn, cacheOperationTypeSchema as Nt, extractCacheEntries as O, nextEvalId as On, cacheEntrySchema as Ot, getEvalDisplayStatus as P, startEvalBackgroundJob as Pn, cacheRecordingOpSchema as Pt, llmCallsConfigSchema as Q, columnFormatSchema as Qt, runManifestSchema as R, spanCacheOptionsSchema as Rt, normalizeScoreDef as S, getCurrentScope as Sn, evalChartMetricSchema as St, createRunRequestSchema as T, incrementEvalOutput as Tn, evalChartsConfigSchema as Tt, apiCallMetricFormatSchema as U, traceAttributeDisplaySchema as Ut, DEFAULT_LLM_CALLS_CONFIG as V, traceAttributeDisplayInputSchema as Vt, apiCallMetricPlacementSchema as W, traceDisplayConfigSchema as Wt, llmCallMetricSchema as X, cellValueSchema as Xt, llmCallMetricPlacementSchema as Y, traceSpanWarningSchema as Yt, llmCallPricingSchema as Z, columnDefSchema as Zt, loadEvalModule as _, advanceEvalTime as _n, evalChartAggregateSchema as _t, loadPersistedRunSnapshot as a, z$1 as an, caseDetailSchema as at, loadConfig as b, evalAssert as bn, evalChartColorSchema as bt, persistCaseDetail as c, evalSpan as cn, evalStatAggregateSchema as ct, recomputePersistedCaseStatus as d, hashCacheKeySync as dn, evalSummarySchema as dt, fileRefSchema as en, resolveApiCallsConfig as et, runTouchesEval as f, deserializeCacheRecording as fn, runLogEntrySchema as ft, setLatestRunInfoMap as g, EvalAssertionError as gn, scoreTraceSchema as gt, getTargetEvalIds as h, serializeCacheValue as hn, runLogPhaseSchema as ht, getLatestRunInfos as i, runArtifactRefSchema as in, assertionFailureSchema as it, extractLlmCalls as j, runInExistingEvalScope as jn, cacheListItemSchema as jt, extractCacheHits as k, runInEvalRuntimeScope as kn, cacheEntryWithDebugKeySchema as kt, persistRunState as l, evalTracer as ln, evalStatItemSchema as lt, buildEvalSummary as m, serializeCacheRecording as mn, runLogLocationSchema as mt, generateRunId as n, numberDisplayOptionsSchema as nn, runLogsConfigSchema as nt, loadPersistedRunSnapshots as o, buildTraceTree as on, caseRowSchema as ot, resolveArtifactPath as p, deserializeCacheValue as pn, runLogLevelSchema as pt, defaultConfigKeySchema as q, traceSpanKindSchema as qt, getLastRunStatuses as r, repoFileRefSchema as rn, trialSelectionModeSchema as rt, nextShortIdFromSnapshots as s, captureEvalSpanError as sn, evalFreshnessStatusSchema as st, executeRun as t, jsonCellSchema as tn, resolveLlmCallsConfig as tt, recomputeEvalStatusesInRuns as u, hashCacheKey as un, evalStatsConfigSchema as ut, parseEvalMetas as v, appendToEvalOutput as vn, evalChartAxisSchema as vt, createFsCacheStore as w, getEvalStartTime as wn, evalChartTypeSchema as wt, buildDeclaredColumnDefs as x, evalLog as xn, evalChartConfigSchema as xt, resolveEvalDefaultConfig as y, configureEvalRunLogs as yn, evalChartBuiltinMetricSchema as yt, runSummarySchema as z, traceCacheRefSchema as zt };
6053
+ export { llmCallsConfigSchema as $, columnFormatSchema as $t, extractApiCalls as A, runInEvalRuntimeScope as An, cacheEntryWithDebugKeySchema as At, runSummarySchema as B, traceCacheRefSchema as Bt, validateCharts as C, getCurrentScope as Cn, evalChartMetricSchema as Ct, sseEnvelopeSchema as D, isInEvalScope as Dn, cacheDebugKeyEntrySchema as Dt, updateManualScoreRequestSchema as E, incrementEvalOutput as En, evalChartsConfigSchema as Et, getEvalDisplayStatus as F, startEvalBackgroundJob as Fn, cacheRecordingOpSchema as Ft, apiCallMetricPlacementSchema as G, traceDisplayConfigSchema as Gt, DEFAULT_LLM_CALLS_CONFIG as H, traceAttributeDisplayInputSchema as Ht, deriveScopedSummaryFromCases as I, repoFile as In, cacheRecordingSchema as It, defaultConfigKeySchema as J, traceSpanKindSchema as Jt, apiCallMetricSchema as K, traceDisplayInputConfigSchema as Kt, deriveStatusFromCaseRows as L, defineEval as Ln, cacheStatusSchema as Lt, applyDerivedCallAttributes as M, runInExistingEvalScope as Mn, cacheListItemSchema as Mt, getNestedAttribute as N, setEvalOutput as Nn, cacheModeSchema as Nt, extractCacheEntries as O, mergeEvalOutput as On, cacheDebugKeyFileSchema as Ot, getEvalTitle as P, setScopeCacheContext as Pn, cacheOperationTypeSchema as Pt, llmCallPricingSchema as Q, columnDefSchema as Qt, deriveStatusFromChildStatuses as R, getEvalRegistry as Rn, serializedCacheSpanSchema as Rt, normalizeScoreDef as S, evalLog as Sn, evalChartConfigSchema as St, createRunRequestSchema as T, getEvalStartTime as Tn, evalChartTypeSchema as Tt, agentEvalsConfigSchema as U, traceAttributeDisplayPlacementSchema as Ut, DEFAULT_API_CALLS_CONFIG as V, traceAttributeDisplayFormatSchema as Vt, apiCallMetricFormatSchema as W, traceAttributeDisplaySchema as Wt, llmCallMetricPlacementSchema as X, traceSpanWarningSchema as Xt, llmCallMetricFormatSchema as Y, traceSpanSchema as Yt, llmCallMetricSchema as Z, cellValueSchema as Zt, loadEvalModule as _, EvalAssertionError as _n, scoreTraceSchema as _t, loadPersistedRunSnapshot as a, runArtifactRefSchema as an, assertionFailureSchema as at, loadConfig as b, configureEvalRunLogs as bn, evalChartBuiltinMetricSchema as bt, persistCaseDetail as c, captureEvalSpanError as cn, evalFreshnessStatusSchema as ct, recomputePersistedCaseStatus as d, hashCacheKey as dn, evalStatsConfigSchema as dt, columnKindSchema as en, removeDefaultConfigSchema as et, runTouchesEval as f, hashCacheKeySync as fn, evalSummarySchema as ft, setLatestRunInfoMap as g, serializeCacheValue as gn, runLogPhaseSchema as gt, getTargetEvalIds as h, serializeCacheRecording as hn, runLogLocationSchema as ht, getLatestRunInfos as i, repoFileRefSchema as in, trialSelectionModeSchema as it, extractLlmCalls as j, runInEvalScope as jn, cacheFileSchema as jt, extractCacheHits as k, nextEvalId as kn, cacheEntrySchema as kt, persistRunState as l, evalSpan as ln, evalStatAggregateSchema as lt, buildEvalSummary as m, deserializeCacheValue as mn, runLogLevelSchema as mt, generateRunId as n, jsonCellSchema as nn, resolveLlmCallsConfig as nt, loadPersistedRunSnapshots as o, z$1 as on, caseDetailSchema as ot, resolveArtifactPath as p, deserializeCacheRecording as pn, runLogEntrySchema as pt, apiCallsConfigSchema as q, traceSpanErrorSchema as qt, getLastRunStatuses as r, numberDisplayOptionsSchema as rn, runLogsConfigSchema as rt, nextShortIdFromSnapshots as s, buildTraceTree as sn, caseRowSchema as st, executeRun as t, fileRefSchema as tn, resolveApiCallsConfig as tt, recomputeEvalStatusesInRuns as u, evalTracer as un, evalStatItemSchema as ut, parseEvalMetas as v, advanceEvalTime as vn, evalChartAggregateSchema as vt, createFsCacheStore as w, getEvalCaseInput as wn, evalChartTooltipExtraSchema as wt, buildDeclaredColumnDefs as x, evalAssert as xn, evalChartColorSchema as xt, resolveEvalDefaultConfig as y, appendToEvalOutput as yn, evalChartAxisSchema as yt, runManifestSchema as z, spanCacheOptionsSchema as zt };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-D3QNOcPN.mjs";
2
- import "./src-CcXfWT4M.mjs";
1
+ import { n as createRunner } from "./cli-Be0x8CS3.mjs";
2
+ import "./src-D6cettg0.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-4yNYRvmF.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-B4SosWgD.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -0,0 +1,3 @@
1
+ import "./runOrchestration-D697g6Qe.mjs";
2
+ import "./cli-Be0x8CS3.mjs";
3
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.23.0",
3
+ "version": "0.24.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -59,8 +59,8 @@
59
59
  "@types/node": "^24.7.2",
60
60
  "typescript": "^5.9.2",
61
61
  "@agent-evals/runner": "0.0.1",
62
- "@agent-evals/sdk": "0.0.1",
63
- "@agent-evals/shared": "0.0.1"
62
+ "@agent-evals/shared": "0.0.1",
63
+ "@agent-evals/sdk": "0.0.1"
64
64
  },
65
65
  "scripts": {
66
66
  "build": "pnpm --filter @agent-evals/web build && tsdown",
@@ -274,10 +274,12 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
274
274
  attribute paths. `latencyMs` is time to first token; duration, total tokens,
275
275
  tokens/sec, and USD costs are derived. Override `kinds` to broaden the filter,
276
276
  override `attributes.<field>` for non-default primitive span shapes, configure
277
- `pricing` to derive USD costs from token counts by model/provider, and add
278
- entries to `metrics` to surface arbitrary user metrics (`format: 'string' |
279
- 'number' | 'duration' | 'json' | 'boolean'`, `placements: ['header' |
280
- 'body']`).
277
+ `pricing` to derive USD costs from token counts by model/provider, add
278
+ `derivedAttributes` to persist computed values back onto matching LLM spans
279
+ before trace consumers run, and add entries to `metrics` to surface arbitrary
280
+ user metrics (`format: 'string' | 'number' | 'duration' | 'json' |
281
+ 'boolean'`, `placements: ['header' | 'body']`). `derivedAttributes` keys are
282
+ dot-paths under `span.attributes`; return `undefined` to skip one span.
281
283
  - Default usage config derives missing eval outputs from matching LLM/API spans
282
284
  before `outputsSchema` and scores run: `apiCalls`, `costUsd`, `llmTurns`,
283
285
  `inputTokens`, `outputTokens`, `totalTokens`, `cachedInputTokens`,
@@ -298,7 +300,8 @@ cacheCreationInputTokens` so cache details are not double-counted.
298
300
  and `'fetch'` spans with `method`, `url`, `statusCode`, `request`,
299
301
  `response`, `requestBody`, `responseBody`, `headers`, `durationMs`, and
300
302
  `error` read from conventional attribute paths. Override `kinds` or
301
- `attributes.<field>` for external tracers, and add `metrics` with the same
303
+ `attributes.<field>` for external tracers, add `derivedAttributes` for
304
+ computed persisted API span attributes, and add `metrics` with the same
302
305
  formats and placements as LLM-call metrics.
303
306
  - `runLogs` (in `agent-evals.config.ts`) controls case log capture. Use
304
307
  `runLogs: { captureConsole: false }` to keep console output in the terminal
@@ -1,3 +0,0 @@
1
- import "./runOrchestration-CimthgI7.mjs";
2
- import "./cli-D3QNOcPN.mjs";
3
- export {};