npm - @ls-stack/agent-eval - Versions diffs - 0.23.0 → 0.24.0 - Mend

@ls-stack/agent-eval 0.23.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/{app-Cw79dJDr.mjs → app-DYRmucgj.mjs} +3 -3
package/dist/apps/web/dist/assets/{index-AUDD3rNB.js → index-KbbX3NYr.js} +35 -35
package/dist/apps/web/dist/index.html +1 -1
package/dist/bin.mjs +1 -1
package/dist/{cli-D3QNOcPN.mjs → cli-Be0x8CS3.mjs} +3 -3
package/dist/index.d.mts +51 -7
package/dist/index.mjs +4 -4
package/dist/runChild.mjs +1 -1
package/dist/{runOrchestration-CimthgI7.mjs → runOrchestration-D697g6Qe.mjs} +143 -31
package/dist/{runner-4yNYRvmF.mjs → runner-B4SosWgD.mjs} +2 -2
package/dist/{runner-B-SYzW8w.mjs → runner-jSujaSKt.mjs} +1 -1
package/dist/src-D6cettg0.mjs +3 -0
package/package.json +3 -3
package/skills/agent-eval/SKILL.md +8 -5
package/dist/src-CcXfWT4M.mjs +0 -3

package/dist/apps/web/dist/index.html CHANGED Viewed

@@ -25,7 +25,7 @@
       href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
       rel="stylesheet"
     />
-    <script type="module" crossorigin src="/assets/index-AUDD3rNB.js"></script>
+    <script type="module" crossorigin src="/assets/index-KbbX3NYr.js"></script>
     <link rel="stylesheet" crossorigin href="/assets/index-r0dVFK0B.css">
   </head>
   <body>

package/dist/bin.mjs CHANGED Viewed

@@ -1,5 +1,5 @@
 #!/usr/bin/env node
-import { t as runCli } from "./cli-D3QNOcPN.mjs";
+import { t as runCli } from "./cli-Be0x8CS3.mjs";
 import { spawn } from "node:child_process";
 //#region src/bin.ts
 const moduleMocksFlag = "--experimental-test-module-mocks";

package/dist/{cli-D3QNOcPN.mjs → cli-Be0x8CS3.mjs} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { C as validateCharts, F as deriveScopedSummaryFromCases, Ln as getEvalRegistry, N as getEvalTitle, P as getEvalDisplayStatus, S as normalizeScoreDef, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as loadConfig, c as persistCaseDetail, d as recomputePersistedCaseStatus, et as resolveApiCallsConfig, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, l as persistRunState, m as buildEvalSummary, n as generateRunId, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, tt as resolveLlmCallsConfig, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, w as createFsCacheStore, x as buildDeclaredColumnDefs, y as resolveEvalDefaultConfig, z as runSummarySchema } from "./runOrchestration-CimthgI7.mjs";
+import { B as runSummarySchema, C as validateCharts, F as getEvalDisplayStatus, I as deriveScopedSummaryFromCases, P as getEvalTitle, Rn as getEvalRegistry, S as normalizeScoreDef, _ as loadEvalModule, a as loadPersistedRunSnapshot, b as loadConfig, c as persistCaseDetail, d as recomputePersistedCaseStatus, f as runTouchesEval, g as setLatestRunInfoMap, h as getTargetEvalIds, i as getLatestRunInfos, l as persistRunState, m as buildEvalSummary, n as generateRunId, nt as resolveLlmCallsConfig, o as loadPersistedRunSnapshots, p as resolveArtifactPath, r as getLastRunStatuses, s as nextShortIdFromSnapshots, tt as resolveApiCallsConfig, u as recomputeEvalStatusesInRuns, v as parseEvalMetas, w as createFsCacheStore, x as buildDeclaredColumnDefs, y as resolveEvalDefaultConfig } from "./runOrchestration-D697g6Qe.mjs";
 import { createHash } from "node:crypto";
 import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
 import { dirname, join, relative, resolve } from "node:path";
@@ -959,8 +959,8 @@ async function commandApp(args) {
 	const { serve } = await import("@hono/node-server");
 	const bundledWebDist = resolve(currentDir, "apps/web/dist");
 	if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
-	const appModule = await import("./app-Cw79dJDr.mjs");
-	const runnerModule = await import("./runner-B-SYzW8w.mjs");
+	const appModule = await import("./app-DYRmucgj.mjs");
+	const runnerModule = await import("./runner-jSujaSKt.mjs");
 	if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
 	if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
 	await runnerModule.initRunner();

package/dist/index.d.mts CHANGED Viewed

@@ -1539,13 +1539,35 @@ declare const apiCallMetricPlacementSchema: z$1.ZodEnum<{
 }>;
 /** Placement option for an API-call metric. */
 type ApiCallMetricPlacement = z$1.infer<typeof apiCallMetricPlacementSchema>;
+/** Context passed to an LLM/API-call derived attribute function. */
+type CallDerivedAttributeContext = {
+  /** Raw attributes from the matching trace span. */attributes: Record<string, unknown> | undefined; /** Matching trace span. */
+  span: EvalTraceSpan; /** Dot-path helper for reading from `span.attributes`. */
+  get: (path: string) => unknown;
+};
+/**
+ * Runner-side function used to derive one new span attribute from a matching
+ * LLM/API-call span. Return `undefined` to omit the attribute for that span.
+ */
+type CallDerivedAttribute = (ctx: CallDerivedAttributeContext) => unknown;
+/** One resolved derived span attribute rule. */
+type ResolvedCallDerivedAttribute = {
+  /** Dot-path where the derived value is persisted on `span.attributes`. */path: string;
+  /**
+   * Function that derives the persisted value for each matching span. Omitted
+   * after this config is serialized to the browser.
+   */
+  compute?: CallDerivedAttribute;
+};
 /**
  * Schema for a single user-defined metric attached to LLM call rows.
  *
  * Each metric reads `path` from the span's `attributes` and renders the value
- * with the configured `format` and `numberFormat`. `placements` controls
- * whether the metric appears as a chip on the collapsed row header, as a row
- * inside the expanded body, or both. Defaults to `['body']` when omitted.
+ * with the configured `format` and `numberFormat`. Use
+ * `llmCalls.derivedAttributes` when a metric should read a value computed from
+ * other attributes. `placements` controls whether the metric appears as a chip
+ * on the collapsed row header, as a row inside the expanded body, or both.
+ * Defaults to `['body']` when omitted.
  */
 declare const llmCallMetricSchema: z$1.ZodObject<{
   label: z$1.ZodString;
@@ -1570,9 +1592,11 @@ type LlmCallMetric = z$1.infer<typeof llmCallMetricSchema>;
  * Schema for a single user-defined metric attached to API call rows.
  *
  * Each metric reads `path` from the span's `attributes` and renders the value
- * with the configured `format` and `numberFormat`. `placements` controls
- * whether the metric appears as a chip on the collapsed row header, as a row
- * inside the expanded body, or both. Defaults to `['body']` when omitted.
+ * with the configured `format` and `numberFormat`. Use
+ * `apiCalls.derivedAttributes` when a metric should read a value computed from
+ * other attributes. `placements` controls whether the metric appears as a chip
+ * on the collapsed row header, as a row inside the expanded body, or both.
+ * Defaults to `['body']` when omitted.
  */
 declare const apiCallMetricSchema: z$1.ZodObject<{
   label: z$1.ZodString;
@@ -1629,6 +1653,7 @@ declare const llmCallsConfigSchema: z$1.ZodObject<{
     reasoning: z$1.ZodOptional<z$1.ZodString>;
     toolCalls: z$1.ZodOptional<z$1.ZodString>;
   }, z$1.core.$strip>>;
+  derivedAttributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodCustom<CallDerivedAttribute, CallDerivedAttribute>>>;
   pricing: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
     model: z$1.ZodString;
     provider: z$1.ZodOptional<z$1.ZodString>;
@@ -1674,6 +1699,7 @@ declare const apiCallsConfigSchema: z$1.ZodObject<{
     durationMs: z$1.ZodOptional<z$1.ZodString>;
     error: z$1.ZodOptional<z$1.ZodString>;
   }, z$1.core.$strip>>;
+  derivedAttributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodCustom<CallDerivedAttribute, CallDerivedAttribute>>>;
   metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
     label: z$1.ZodString;
     tooltip: z$1.ZodOptional<z$1.ZodString>;
@@ -1720,6 +1746,7 @@ type ResolvedLlmCallsConfig = {
     reasoning: string;
     toolCalls: string;
   };
+  derivedAttributes: ResolvedCallDerivedAttribute[];
   metrics: ResolvedLlmCallMetric[];
   pricing: ResolvedLlmCallPricing[];
 };
@@ -1738,6 +1765,7 @@ type ResolvedApiCallsConfig = {
     durationMs: string;
     error: string;
   };
+  derivedAttributes: ResolvedCallDerivedAttribute[];
   metrics: ResolvedApiCallMetric[];
 };
 /** Fully-resolved LLM-call metric used by the runner and UI. */
@@ -1983,6 +2011,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
       reasoning: z$1.ZodOptional<z$1.ZodString>;
       toolCalls: z$1.ZodOptional<z$1.ZodString>;
     }, z$1.core.$strip>>;
+    derivedAttributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodCustom<CallDerivedAttribute, CallDerivedAttribute>>>;
     pricing: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
       model: z$1.ZodString;
       provider: z$1.ZodOptional<z$1.ZodString>;
@@ -2037,6 +2066,7 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
       durationMs: z$1.ZodOptional<z$1.ZodString>;
       error: z$1.ZodOptional<z$1.ZodString>;
     }, z$1.core.$strip>>;
+    derivedAttributes: z$1.ZodOptional<z$1.ZodRecord<z$1.ZodString, z$1.ZodCustom<CallDerivedAttribute, CallDerivedAttribute>>>;
     metrics: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
       label: z$1.ZodString;
       tooltip: z$1.ZodOptional<z$1.ZodString>;
@@ -2067,6 +2097,20 @@ declare const agentEvalsConfigSchema: z$1.ZodObject<{
   }, z$1.core.$strip>>;
 }, z$1.core.$strip>;
 //#endregion
+//#region ../shared/src/utils/deriveCallAttributes.d.ts
+/**
+ * Persist configured derived attributes onto matching LLM/API spans.
+ *
+ * These derived attributes are applied before trace consumers run, so
+ * `deriveFromTracing`, default usage extraction, trace display, and call
+ * metrics can all read them by normal dot-path lookup.
+ */
+declare function applyDerivedCallAttributes(params: {
+  spans: EvalTraceSpan[];
+  llmCallsConfig: ResolvedLlmCallsConfig;
+  apiCallsConfig: ResolvedApiCallsConfig;
+}): EvalTraceSpan[];
+//#endregion
 //#region ../shared/src/utils/extractLlmCalls.d.ts
 /** Resolved value for one user-defined metric on an LLM call row. */
 type LlmCallMetricValue = {
@@ -3623,4 +3667,4 @@ declare function createRunner({
  */
 declare function runCli(argv: string[]): Promise<void>;
 //#endregion
-export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallsConfigInput, type NumberDisplayOptions, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
+export { type AgentEvalsConfig, type ApiCallEntry, type ApiCallMetric, type ApiCallMetricFormat, type ApiCallMetricPlacement, type ApiCallMetricValue, type ApiCallsConfigInput, type AssertionFailure, type CacheActivityEntry, type CacheAdapter, type CacheDebugKeyEntry, type CacheDebugKeyFile, type CacheDebugKeyWrite, type CacheEntry, type CacheEntryWithDebugKey, type CacheFile, type CacheHitEntry, type CacheKeyHashInput, type CacheKeyHashOptions, type CacheListItem, type CacheMode, type CacheOperationType, type CacheRecording, type CacheRecordingFrame, type CacheRecordingOp, type CacheScopeContext, type CacheStatus, type CallDerivedAttribute, type CallDerivedAttributeContext, type CaptureEvalSpanErrorLevel, type CaptureEvalSpanErrorOptions, type CaseDetail, type CaseRow, type CellValue, type ColumnDef, type ColumnFormat, type ColumnKind, type CreateRunRequest, DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, type DefaultConfigKey, type DerivedStatus, EvalAssertionError, type EvalCase, type EvalCaseScope, type EvalChartAggregate, type EvalChartAxis, type EvalChartBuiltinMetric, type EvalChartColor, type EvalChartConfig, type EvalChartMetric, type EvalChartTooltipExtra, type EvalChartType, type EvalChartsConfig, type EvalColumnOverride, type EvalColumns, type EvalDefinition, type EvalDeriveContext, type EvalDisplayStatus, type EvalExecuteContext, type EvalFreshnessStatus, type EvalManualScoreDef, type EvalOutputs, type EvalOutputsSchema, type EvalRunner, type EvalRuntimeScope, type EvalScoreContext, type EvalScoreDef, type EvalScoreFn, type EvalSetOutput, type EvalStartTime, type EvalStatAggregate, type EvalStatItem, type EvalStatsConfig, type EvalSummary, type EvalTimeUnit, type EvalTraceSpan, type EvalTraceSpanError, type EvalTraceSpanWarning, type EvalTraceTree, type FileRef, type JsonCell, type LlmCallEntry, type LlmCallMetric, type LlmCallMetricFormat, type LlmCallMetricPlacement, type LlmCallMetricValue, type LlmCallPricing, type LlmCallsConfigInput, type NumberDisplayOptions, type RemoveDefaultConfig, type RepoFileRef, type ResolvedApiCallMetric, type ResolvedApiCallsConfig, type ResolvedCallDerivedAttribute, type ResolvedLlmCallMetric, type ResolvedLlmCallPricing, type ResolvedLlmCallsConfig, type RunArtifactRef, type RunInEvalScopeOptions, type RunLogEntry, type RunLogLevel, type RunLogLocation, type RunLogPhase, type RunLogsConfigInput, type RunManifest, type RunSummary, type ScalarCell, type ScopedCaseSummary, type ScoreTrace, type SerializedCacheSpan, type SerializedCacheValue, type SpanCacheOptions, type SseEnvelope, type SseEventType, type TraceActiveSpan, type TraceAttributeDisplay, type TraceAttributeDisplayFormat, type TraceAttributeDisplayInput, type TraceAttributeDisplayPlacement, type TraceAttributeTransform, type TraceAttributeTransformContext, type TraceCacheInfo, type TraceCacheRef, type TraceDisplayConfig, type TraceDisplayInputConfig, type TraceSpanInfo, type TrialSelectionMode, type UpdateManualScoreRequest, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };

package/dist/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { $ as removeDefaultConfigSchema, $t as columnKindSchema, A as extractApiCalls, An as runInEvalScope, At as cacheFileSchema, B as DEFAULT_API_CALLS_CONFIG, Bt as traceAttributeDisplayFormatSchema, Cn as getEvalCaseInput, Ct as evalChartTooltipExtraSchema, D as sseEnvelopeSchema, Dn as mergeEvalOutput, Dt as cacheDebugKeyFileSchema, E as updateManualScoreRequestSchema, En as isInEvalScope, Et as cacheDebugKeyEntrySchema, F as deriveScopedSummaryFromCases, Fn as repoFile, Ft as cacheRecordingSchema, G as apiCallMetricSchema, Gt as traceDisplayInputConfigSchema, H as agentEvalsConfigSchema, Ht as traceAttributeDisplayPlacementSchema, I as deriveStatusFromCaseRows, In as defineEval, It as cacheStatusSchema, J as llmCallMetricFormatSchema, Jt as traceSpanSchema, K as apiCallsConfigSchema, Kt as traceSpanErrorSchema, L as deriveStatusFromChildStatuses, Ln as getEvalRegistry, Lt as serializedCacheSpanSchema, M as getNestedAttribute, Mn as setEvalOutput, Mt as cacheModeSchema, N as getEvalTitle, Nn as setScopeCacheContext, Nt as cacheOperationTypeSchema, O as extractCacheEntries, On as nextEvalId, Ot as cacheEntrySchema, P as getEvalDisplayStatus, Pn as startEvalBackgroundJob, Pt as cacheRecordingOpSchema, Q as llmCallsConfigSchema, Qt as columnFormatSchema, R as runManifestSchema, Rt as spanCacheOptionsSchema, Sn as getCurrentScope, St as evalChartMetricSchema, T as createRunRequestSchema, Tn as incrementEvalOutput, Tt as evalChartsConfigSchema, U as apiCallMetricFormatSchema, Ut as traceAttributeDisplaySchema, V as DEFAULT_LLM_CALLS_CONFIG, Vt as traceAttributeDisplayInputSchema, W as apiCallMetricPlacementSchema, Wt as traceDisplayConfigSchema, X as llmCallMetricSchema, Xt as cellValueSchema, Y as llmCallMetricPlacementSchema, Yt as traceSpanWarningSchema, Z as llmCallPricingSchema, Zt as columnDefSchema, _n as advanceEvalTime, _t as evalChartAggregateSchema, an as z, at as caseDetailSchema, bn as evalAssert, bt as evalChartColorSchema, cn as evalSpan, ct as evalStatAggregateSchema, dn as hashCacheKeySync, dt as evalSummarySchema, en as fileRefSchema, et as resolveApiCallsConfig, fn as deserializeCacheRecording, ft as runLogEntrySchema, gn as EvalAssertionError, gt as scoreTraceSchema, hn as serializeCacheValue, ht as runLogPhaseSchema, in as runArtifactRefSchema, it as assertionFailureSchema, j as extractLlmCalls, jn as runInExistingEvalScope, jt as cacheListItemSchema, k as extractCacheHits, kn as runInEvalRuntimeScope, kt as cacheEntryWithDebugKeySchema, ln as evalTracer, lt as evalStatItemSchema, mn as serializeCacheRecording, mt as runLogLocationSchema, nn as numberDisplayOptionsSchema, nt as runLogsConfigSchema, on as buildTraceTree, ot as caseRowSchema, pn as deserializeCacheValue, pt as runLogLevelSchema, q as defaultConfigKeySchema, qt as traceSpanKindSchema, rn as repoFileRefSchema, rt as trialSelectionModeSchema, sn as captureEvalSpanError, st as evalFreshnessStatusSchema, tn as jsonCellSchema, tt as resolveLlmCallsConfig, un as hashCacheKey, ut as evalStatsConfigSchema, vn as appendToEvalOutput, vt as evalChartAxisSchema, wn as getEvalStartTime, wt as evalChartTypeSchema, xn as evalLog, xt as evalChartConfigSchema, yt as evalChartBuiltinMetricSchema, z as runSummarySchema, zt as traceCacheRefSchema } from "./runOrchestration-CimthgI7.mjs";
-import { n as createRunner, t as runCli } from "./cli-D3QNOcPN.mjs";
-import "./src-CcXfWT4M.mjs";
-export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
+import { $ as llmCallsConfigSchema, $t as columnFormatSchema, A as extractApiCalls, An as runInEvalRuntimeScope, At as cacheEntryWithDebugKeySchema, B as runSummarySchema, Bt as traceCacheRefSchema, Cn as getCurrentScope, Ct as evalChartMetricSchema, D as sseEnvelopeSchema, Dn as isInEvalScope, Dt as cacheDebugKeyEntrySchema, E as updateManualScoreRequestSchema, En as incrementEvalOutput, Et as evalChartsConfigSchema, F as getEvalDisplayStatus, Fn as startEvalBackgroundJob, Ft as cacheRecordingOpSchema, G as apiCallMetricPlacementSchema, Gt as traceDisplayConfigSchema, H as DEFAULT_LLM_CALLS_CONFIG, Ht as traceAttributeDisplayInputSchema, I as deriveScopedSummaryFromCases, In as repoFile, It as cacheRecordingSchema, J as defaultConfigKeySchema, Jt as traceSpanKindSchema, K as apiCallMetricSchema, Kt as traceDisplayInputConfigSchema, L as deriveStatusFromCaseRows, Ln as defineEval, Lt as cacheStatusSchema, M as applyDerivedCallAttributes, Mn as runInExistingEvalScope, Mt as cacheListItemSchema, N as getNestedAttribute, Nn as setEvalOutput, Nt as cacheModeSchema, O as extractCacheEntries, On as mergeEvalOutput, Ot as cacheDebugKeyFileSchema, P as getEvalTitle, Pn as setScopeCacheContext, Pt as cacheOperationTypeSchema, Q as llmCallPricingSchema, Qt as columnDefSchema, R as deriveStatusFromChildStatuses, Rn as getEvalRegistry, Rt as serializedCacheSpanSchema, Sn as evalLog, St as evalChartConfigSchema, T as createRunRequestSchema, Tn as getEvalStartTime, Tt as evalChartTypeSchema, U as agentEvalsConfigSchema, Ut as traceAttributeDisplayPlacementSchema, V as DEFAULT_API_CALLS_CONFIG, Vt as traceAttributeDisplayFormatSchema, W as apiCallMetricFormatSchema, Wt as traceAttributeDisplaySchema, X as llmCallMetricPlacementSchema, Xt as traceSpanWarningSchema, Y as llmCallMetricFormatSchema, Yt as traceSpanSchema, Z as llmCallMetricSchema, Zt as cellValueSchema, _n as EvalAssertionError, _t as scoreTraceSchema, an as runArtifactRefSchema, at as assertionFailureSchema, bt as evalChartBuiltinMetricSchema, cn as captureEvalSpanError, ct as evalFreshnessStatusSchema, dn as hashCacheKey, dt as evalStatsConfigSchema, en as columnKindSchema, et as removeDefaultConfigSchema, fn as hashCacheKeySync, ft as evalSummarySchema, gn as serializeCacheValue, gt as runLogPhaseSchema, hn as serializeCacheRecording, ht as runLogLocationSchema, in as repoFileRefSchema, it as trialSelectionModeSchema, j as extractLlmCalls, jn as runInEvalScope, jt as cacheFileSchema, k as extractCacheHits, kn as nextEvalId, kt as cacheEntrySchema, ln as evalSpan, lt as evalStatAggregateSchema, mn as deserializeCacheValue, mt as runLogLevelSchema, nn as jsonCellSchema, nt as resolveLlmCallsConfig, on as z, ot as caseDetailSchema, pn as deserializeCacheRecording, pt as runLogEntrySchema, q as apiCallsConfigSchema, qt as traceSpanErrorSchema, rn as numberDisplayOptionsSchema, rt as runLogsConfigSchema, sn as buildTraceTree, st as caseRowSchema, tn as fileRefSchema, tt as resolveApiCallsConfig, un as evalTracer, ut as evalStatItemSchema, vn as advanceEvalTime, vt as evalChartAggregateSchema, wn as getEvalCaseInput, wt as evalChartTooltipExtraSchema, xn as evalAssert, xt as evalChartColorSchema, yn as appendToEvalOutput, yt as evalChartAxisSchema, z as runManifestSchema, zt as spanCacheOptionsSchema } from "./runOrchestration-D697g6Qe.mjs";
+import { n as createRunner, t as runCli } from "./cli-Be0x8CS3.mjs";
+import "./src-D6cettg0.mjs";
+export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, columnDefSchema, columnFormatSchema, columnKindSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingSchema, llmCallsConfigSchema, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };

package/dist/runChild.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { R as runManifestSchema, T as createRunRequestSchema, Tt as evalChartsConfigSchema, Zt as columnDefSchema, b as loadConfig, t as executeRun, ut as evalStatsConfigSchema, v as parseEvalMetas, w as createFsCacheStore, yn as configureEvalRunLogs, z as runSummarySchema } from "./runOrchestration-CimthgI7.mjs";
+import { B as runSummarySchema, Et as evalChartsConfigSchema, Qt as columnDefSchema, T as createRunRequestSchema, b as loadConfig, bn as configureEvalRunLogs, dt as evalStatsConfigSchema, t as executeRun, v as parseEvalMetas, w as createFsCacheStore, z as runManifestSchema } from "./runOrchestration-D697g6Qe.mjs";
 import { createHash } from "node:crypto";
 import { readFile } from "node:fs/promises";
 import { relative } from "node:path";

package/dist/{runOrchestration-CimthgI7.mjs → runOrchestration-D697g6Qe.mjs} RENAMED Viewed

@@ -1330,7 +1330,7 @@ const errorCoreFields = new Set([
 	"stack",
 	"capturedAt"
 ]);
-function isRecord$4(value) {
+function isRecord$5(value) {
 	return typeof value === "object" && value !== null && !Array.isArray(value);
 }
 function formatUnknownErrorMessage(error) {
@@ -1358,7 +1358,7 @@ function normalizeTraceError(error, capturedAt = void 0) {
 		stack: error.stack,
 		capturedAt
 	};
-	if (isRecord$4(error)) {
+	if (isRecord$5(error)) {
 		const extraFields = getErrorExtraFields(error);
 		const name = typeof error.name === "string" ? error.name : void 0;
 		const stack = typeof error.stack === "string" ? error.stack : void 0;
@@ -1383,7 +1383,7 @@ function normalizeTraceWarnings(warningOrWarnings, additionalWarnings, capturedA
 	return (additionalWarnings.length > 0 ? [warningOrWarnings, ...additionalWarnings] : Array.isArray(warningOrWarnings) ? warningOrWarnings : [warningOrWarnings]).map((warning) => normalizeTraceError(warning, capturedAt));
 }
 function isCaptureEvalSpanErrorOptions(value) {
-	if (!isRecord$4(value)) return false;
+	if (!isRecord$5(value)) return false;
 	const keys = Object.keys(value);
 	if (keys.length === 0) return false;
 	if (!keys.every((key) => key === "level")) return false;
@@ -2727,13 +2727,16 @@ const apiCallMetricFormatSchema = llmCallMetricFormatSchema;
 const llmCallMetricPlacementSchema = z.enum(["header", "body"]);
 /** Where an API-call metric is rendered inside the API calls tab. */
 const apiCallMetricPlacementSchema = llmCallMetricPlacementSchema;
+const callDerivedAttributeSchema = z.custom((value) => typeof value === "function", { message: "Expected a derived attribute function" });
 /**
 * Schema for a single user-defined metric attached to LLM call rows.
 *
 * Each metric reads `path` from the span's `attributes` and renders the value
-* with the configured `format` and `numberFormat`. `placements` controls
-* whether the metric appears as a chip on the collapsed row header, as a row
-* inside the expanded body, or both. Defaults to `['body']` when omitted.
+* with the configured `format` and `numberFormat`. Use
+* `llmCalls.derivedAttributes` when a metric should read a value computed from
+* other attributes. `placements` controls whether the metric appears as a chip
+* on the collapsed row header, as a row inside the expanded body, or both.
+* Defaults to `['body']` when omitted.
 */
 const llmCallMetricSchema = z.object({
 	/** Display label for the metric row or header chip. */
@@ -2760,9 +2763,11 @@ const llmCallMetricSchema = z.object({
 * Schema for a single user-defined metric attached to API call rows.
 *
 * Each metric reads `path` from the span's `attributes` and renders the value
-* with the configured `format` and `numberFormat`. `placements` controls
-* whether the metric appears as a chip on the collapsed row header, as a row
-* inside the expanded body, or both. Defaults to `['body']` when omitted.
+* with the configured `format` and `numberFormat`. Use
+* `apiCalls.derivedAttributes` when a metric should read a value computed from
+* other attributes. `placements` controls whether the metric appears as a chip
+* on the collapsed row header, as a row inside the expanded body, or both.
+* Defaults to `['body']` when omitted.
 */
 const apiCallMetricSchema = z.object({
 	/** Display label for the metric row or header chip. */
@@ -2839,6 +2844,13 @@ const llmCallsConfigSchema = z.object({
 		toolCalls: z.string().optional()
 	}).optional(),
 	/**
+	* Derived attributes persisted onto every matching LLM span before
+	* `deriveFromTracing`, default outputs, trace display, and call metrics read
+	* the trace. Keys are dot-paths under `span.attributes`; return `undefined`
+	* to skip writing the attribute for one span.
+	*/
+	derivedAttributes: z.record(z.string().min(1), callDerivedAttributeSchema).optional(),
+	/**
 	* Model/provider pricing registry used to calculate LLM-call costs from
 	* token counts. Built-in LLM cost fields are only derived from this registry.
 	*/
@@ -2867,6 +2879,13 @@ const apiCallsConfigSchema = z.object({
 		durationMs: z.string().optional(),
 		error: z.string().optional()
 	}).optional(),
+	/**
+	* Derived attributes persisted onto every matching API span before trace
+	* display and call metrics read the trace. Keys are dot-paths under
+	* `span.attributes`; return `undefined` to skip writing the attribute for
+	* one span.
+	*/
+	derivedAttributes: z.record(z.string().min(1), callDerivedAttributeSchema).optional(),
 	/** Custom user-defined metrics surfaced on each API call. */
 	metrics: z.array(apiCallMetricSchema).optional()
 });
@@ -2898,6 +2917,7 @@ const DEFAULT_LLM_CALLS_CONFIG = {
 		reasoning: "reasoning",
 		toolCalls: "toolCalls"
 	},
+	derivedAttributes: [],
 	metrics: [],
 	pricing: []
 };
@@ -2921,8 +2941,35 @@ const DEFAULT_API_CALLS_CONFIG = {
 		durationMs: "durationMs",
 		error: "error"
 	},
+	derivedAttributes: [],
 	metrics: []
 };
+function resolveDerivedAttributes(input) {
+	return Object.entries(input ?? {}).map(([path, compute]) => ({
+		path,
+		compute
+	}));
+}
+function resolveLlmCallMetric(metric) {
+	return {
+		label: metric.label,
+		tooltip: metric.tooltip,
+		path: metric.path,
+		format: metric.format ?? "string",
+		numberFormat: metric.numberFormat,
+		placements: metric.placements ? [...metric.placements] : ["body"]
+	};
+}
+function resolveApiCallMetric(metric) {
+	return {
+		label: metric.label,
+		tooltip: metric.tooltip,
+		path: metric.path,
+		format: metric.format ?? "string",
+		numberFormat: metric.numberFormat,
+		placements: metric.placements ? [...metric.placements] : ["body"]
+	};
+}
 /**
 * Resolve the user-authored LLM-calls config to a fully-defaulted shape used
 * by the UI to derive the LLM calls tab.
@@ -2942,14 +2989,8 @@ function resolveLlmCallsConfig(input) {
 			...DEFAULT_LLM_CALLS_CONFIG.attributes,
 			...input?.attributes
 		},
-		metrics: (input?.metrics ?? []).map((m) => ({
-			label: m.label,
-			tooltip: m.tooltip,
-			path: m.path,
-			format: m.format ?? "string",
-			numberFormat: m.numberFormat,
-			placements: m.placements ? [...m.placements] : ["body"]
-		})),
+		derivedAttributes: resolveDerivedAttributes(input?.derivedAttributes),
+		metrics: (input?.metrics ?? []).map(resolveLlmCallMetric),
 		pricing: (input?.pricing ?? []).map((p) => ({
 			model: p.model,
 			provider: p.provider,
@@ -2979,14 +3020,8 @@ function resolveApiCallsConfig(input) {
 			...DEFAULT_API_CALLS_CONFIG.attributes,
 			...input?.attributes
 		},
-		metrics: (input?.metrics ?? []).map((m) => ({
-			label: m.label,
-			tooltip: m.tooltip,
-			path: m.path,
-			format: m.format ?? "string",
-			numberFormat: m.numberFormat,
-			placements: m.placements ? [...m.placements] : ["body"]
-		}))
+		derivedAttributes: resolveDerivedAttributes(input?.derivedAttributes),
+		metrics: (input?.metrics ?? []).map(resolveApiCallMetric)
 	};
 }
 /** Zod schema for validating `agent-evals.config.ts` input. */
@@ -3206,7 +3241,7 @@ function getEvalTitle(evalLike) {
 }
 //#endregion
 //#region ../shared/src/utils/getNestedAttribute.ts
-function isRecord$3(value) {
+function isRecord$4(value) {
 	return typeof value === "object" && value !== null;
 }
 /**
@@ -3221,12 +3256,84 @@ function getNestedAttribute(value, path) {
 	const parts = path.split(".");
 	let current = value;
 	for (const part of parts) {
-		if (!isRecord$3(current) || !(part in current)) return;
+		if (!isRecord$4(current) || !(part in current)) return;
 		current = current[part];
 	}
 	return current;
 }
 //#endregion
+//#region ../shared/src/utils/deriveCallAttributes.ts
+function isRecord$3(value) {
+	return typeof value === "object" && value !== null;
+}
+function mergeNestedAttribute$1(value, path, attributeValue) {
+	const root = value === void 0 ? {} : { ...value };
+	const parts = path.split(".");
+	let current = root;
+	for (const [index, part] of parts.entries()) {
+		if (index === parts.length - 1) {
+			current[part] = attributeValue;
+			continue;
+		}
+		const nextValue = current[part];
+		const nextRecord = isRecord$3(nextValue) ? { ...nextValue } : {};
+		current[part] = nextRecord;
+		current = nextRecord;
+	}
+	return root;
+}
+function applyDerivedAttributesForKind(params) {
+	let attributes = params.span.attributes;
+	for (const derivedAttribute of params.derivedAttributes) {
+		if (derivedAttribute.compute === void 0) continue;
+		const span = {
+			...params.span,
+			attributes
+		};
+		const value = (() => {
+			try {
+				return derivedAttribute.compute({
+					attributes,
+					span,
+					get: (path) => getNestedAttribute(attributes, path)
+				});
+			} catch {
+				return;
+			}
+		})();
+		if (value === void 0) continue;
+		attributes = mergeNestedAttribute$1(attributes, derivedAttribute.path, value);
+	}
+	if (attributes === params.span.attributes) return params.span;
+	return {
+		...params.span,
+		attributes
+	};
+}
+/**
+* Persist configured derived attributes onto matching LLM/API spans.
+*
+* These derived attributes are applied before trace consumers run, so
+* `deriveFromTracing`, default usage extraction, trace display, and call
+* metrics can all read them by normal dot-path lookup.
+*/
+function applyDerivedCallAttributes(params) {
+	const llmKinds = new Set(params.llmCallsConfig.kinds);
+	const apiKinds = new Set(params.apiCallsConfig.kinds);
+	return params.spans.map((span) => {
+		let nextSpan = span;
+		if (llmKinds.has(span.kind)) nextSpan = applyDerivedAttributesForKind({
+			span: nextSpan,
+			derivedAttributes: params.llmCallsConfig.derivedAttributes
+		});
+		if (apiKinds.has(span.kind)) nextSpan = applyDerivedAttributesForKind({
+			span: nextSpan,
+			derivedAttributes: params.apiCallsConfig.derivedAttributes
+		});
+		return nextSpan;
+	});
+}
+//#endregion
 //#region ../shared/src/utils/extractLlmCalls.ts
 function readNumber$2(attributes, path) {
 	const raw = getNestedAttribute(attributes, path);
@@ -5361,7 +5468,12 @@ async function runCase(params) {
 		startTime: evalDef.startTime,
 		freezeTime: evalDef.freezeTime
 	});
-	const traceTree = buildTraceTree(scope.spans, scope.checkpoints);
+	const spansWithDerivedAttributes = applyDerivedCallAttributes({
+		spans: scope.spans,
+		llmCallsConfig,
+		apiCallsConfig
+	});
+	const traceTree = buildTraceTree(spansWithDerivedAttributes, scope.checkpoints);
 	const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
 	if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) scope.assertionFailures.push(toAssertionFailure(executeError.message, executeError));
 	if (!nonAssertError && evalDef.deriveFromTracing) {
@@ -5383,7 +5495,7 @@ async function runCase(params) {
 	}
 	if (!nonAssertError) addDefaultOutputs({
 		outputs: scope.outputs,
-		spans: scope.spans,
+		spans: spansWithDerivedAttributes,
 		llmCallsConfig,
 		apiCallsConfig,
 		globalRemove: globalRemoveDefaultConfig,
@@ -5471,7 +5583,7 @@ async function runCase(params) {
 		}
 	}
 	const status = nonAssertError ? "error" : passed ? "pass" : "fail";
-	const { trace: displayTrace, traceDisplay } = resolveTracePresentation(scope.spans, globalTraceDisplay, evalDef.traceDisplay);
+	const { trace: displayTrace, traceDisplay } = resolveTracePresentation(spansWithDerivedAttributes, globalTraceDisplay, evalDef.traceDisplay);
 	const columns = {};
 	const columnOverrides = mergeDefaultColumns({
 		columns: evalDef.columns,
@@ -5938,4 +6050,4 @@ function toLastRunStatus(status) {
 	return status === "pending" ? null : status;
 }
 //#endregion
-export { removeDefaultConfigSchema as $, columnKindSchema as $t, extractApiCalls as A, runInEvalScope as An, cacheFileSchema as At, DEFAULT_API_CALLS_CONFIG as B, traceAttributeDisplayFormatSchema as Bt, validateCharts as C, getEvalCaseInput as Cn, evalChartTooltipExtraSchema as Ct, sseEnvelopeSchema as D, mergeEvalOutput as Dn, cacheDebugKeyFileSchema as Dt, updateManualScoreRequestSchema as E, isInEvalScope as En, cacheDebugKeyEntrySchema as Et, deriveScopedSummaryFromCases as F, repoFile as Fn, cacheRecordingSchema as Ft, apiCallMetricSchema as G, traceDisplayInputConfigSchema as Gt, agentEvalsConfigSchema as H, traceAttributeDisplayPlacementSchema as Ht, deriveStatusFromCaseRows as I, defineEval as In, cacheStatusSchema as It, llmCallMetricFormatSchema as J, traceSpanSchema as Jt, apiCallsConfigSchema as K, traceSpanErrorSchema as Kt, deriveStatusFromChildStatuses as L, getEvalRegistry as Ln, serializedCacheSpanSchema as Lt, getNestedAttribute as M, setEvalOutput as Mn, cacheModeSchema as Mt, getEvalTitle as N, setScopeCacheContext as Nn, cacheOperationTypeSchema as Nt, extractCacheEntries as O, nextEvalId as On, cacheEntrySchema as Ot, getEvalDisplayStatus as P, startEvalBackgroundJob as Pn, cacheRecordingOpSchema as Pt, llmCallsConfigSchema as Q, columnFormatSchema as Qt, runManifestSchema as R, spanCacheOptionsSchema as Rt, normalizeScoreDef as S, getCurrentScope as Sn, evalChartMetricSchema as St, createRunRequestSchema as T, incrementEvalOutput as Tn, evalChartsConfigSchema as Tt, apiCallMetricFormatSchema as U, traceAttributeDisplaySchema as Ut, DEFAULT_LLM_CALLS_CONFIG as V, traceAttributeDisplayInputSchema as Vt, apiCallMetricPlacementSchema as W, traceDisplayConfigSchema as Wt, llmCallMetricSchema as X, cellValueSchema as Xt, llmCallMetricPlacementSchema as Y, traceSpanWarningSchema as Yt, llmCallPricingSchema as Z, columnDefSchema as Zt, loadEvalModule as _, advanceEvalTime as _n, evalChartAggregateSchema as _t, loadPersistedRunSnapshot as a, z$1 as an, caseDetailSchema as at, loadConfig as b, evalAssert as bn, evalChartColorSchema as bt, persistCaseDetail as c, evalSpan as cn, evalStatAggregateSchema as ct, recomputePersistedCaseStatus as d, hashCacheKeySync as dn, evalSummarySchema as dt, fileRefSchema as en, resolveApiCallsConfig as et, runTouchesEval as f, deserializeCacheRecording as fn, runLogEntrySchema as ft, setLatestRunInfoMap as g, EvalAssertionError as gn, scoreTraceSchema as gt, getTargetEvalIds as h, serializeCacheValue as hn, runLogPhaseSchema as ht, getLatestRunInfos as i, runArtifactRefSchema as in, assertionFailureSchema as it, extractLlmCalls as j, runInExistingEvalScope as jn, cacheListItemSchema as jt, extractCacheHits as k, runInEvalRuntimeScope as kn, cacheEntryWithDebugKeySchema as kt, persistRunState as l, evalTracer as ln, evalStatItemSchema as lt, buildEvalSummary as m, serializeCacheRecording as mn, runLogLocationSchema as mt, generateRunId as n, numberDisplayOptionsSchema as nn, runLogsConfigSchema as nt, loadPersistedRunSnapshots as o, buildTraceTree as on, caseRowSchema as ot, resolveArtifactPath as p, deserializeCacheValue as pn, runLogLevelSchema as pt, defaultConfigKeySchema as q, traceSpanKindSchema as qt, getLastRunStatuses as r, repoFileRefSchema as rn, trialSelectionModeSchema as rt, nextShortIdFromSnapshots as s, captureEvalSpanError as sn, evalFreshnessStatusSchema as st, executeRun as t, jsonCellSchema as tn, resolveLlmCallsConfig as tt, recomputeEvalStatusesInRuns as u, hashCacheKey as un, evalStatsConfigSchema as ut, parseEvalMetas as v, appendToEvalOutput as vn, evalChartAxisSchema as vt, createFsCacheStore as w, getEvalStartTime as wn, evalChartTypeSchema as wt, buildDeclaredColumnDefs as x, evalLog as xn, evalChartConfigSchema as xt, resolveEvalDefaultConfig as y, configureEvalRunLogs as yn, evalChartBuiltinMetricSchema as yt, runSummarySchema as z, traceCacheRefSchema as zt };
+export { llmCallsConfigSchema as $, columnFormatSchema as $t, extractApiCalls as A, runInEvalRuntimeScope as An, cacheEntryWithDebugKeySchema as At, runSummarySchema as B, traceCacheRefSchema as Bt, validateCharts as C, getCurrentScope as Cn, evalChartMetricSchema as Ct, sseEnvelopeSchema as D, isInEvalScope as Dn, cacheDebugKeyEntrySchema as Dt, updateManualScoreRequestSchema as E, incrementEvalOutput as En, evalChartsConfigSchema as Et, getEvalDisplayStatus as F, startEvalBackgroundJob as Fn, cacheRecordingOpSchema as Ft, apiCallMetricPlacementSchema as G, traceDisplayConfigSchema as Gt, DEFAULT_LLM_CALLS_CONFIG as H, traceAttributeDisplayInputSchema as Ht, deriveScopedSummaryFromCases as I, repoFile as In, cacheRecordingSchema as It, defaultConfigKeySchema as J, traceSpanKindSchema as Jt, apiCallMetricSchema as K, traceDisplayInputConfigSchema as Kt, deriveStatusFromCaseRows as L, defineEval as Ln, cacheStatusSchema as Lt, applyDerivedCallAttributes as M, runInExistingEvalScope as Mn, cacheListItemSchema as Mt, getNestedAttribute as N, setEvalOutput as Nn, cacheModeSchema as Nt, extractCacheEntries as O, mergeEvalOutput as On, cacheDebugKeyFileSchema as Ot, getEvalTitle as P, setScopeCacheContext as Pn, cacheOperationTypeSchema as Pt, llmCallPricingSchema as Q, columnDefSchema as Qt, deriveStatusFromChildStatuses as R, getEvalRegistry as Rn, serializedCacheSpanSchema as Rt, normalizeScoreDef as S, evalLog as Sn, evalChartConfigSchema as St, createRunRequestSchema as T, getEvalStartTime as Tn, evalChartTypeSchema as Tt, agentEvalsConfigSchema as U, traceAttributeDisplayPlacementSchema as Ut, DEFAULT_API_CALLS_CONFIG as V, traceAttributeDisplayFormatSchema as Vt, apiCallMetricFormatSchema as W, traceAttributeDisplaySchema as Wt, llmCallMetricPlacementSchema as X, traceSpanWarningSchema as Xt, llmCallMetricFormatSchema as Y, traceSpanSchema as Yt, llmCallMetricSchema as Z, cellValueSchema as Zt, loadEvalModule as _, EvalAssertionError as _n, scoreTraceSchema as _t, loadPersistedRunSnapshot as a, runArtifactRefSchema as an, assertionFailureSchema as at, loadConfig as b, configureEvalRunLogs as bn, evalChartBuiltinMetricSchema as bt, persistCaseDetail as c, captureEvalSpanError as cn, evalFreshnessStatusSchema as ct, recomputePersistedCaseStatus as d, hashCacheKey as dn, evalStatsConfigSchema as dt, columnKindSchema as en, removeDefaultConfigSchema as et, runTouchesEval as f, hashCacheKeySync as fn, evalSummarySchema as ft, setLatestRunInfoMap as g, serializeCacheValue as gn, runLogPhaseSchema as gt, getTargetEvalIds as h, serializeCacheRecording as hn, runLogLocationSchema as ht, getLatestRunInfos as i, repoFileRefSchema as in, trialSelectionModeSchema as it, extractLlmCalls as j, runInEvalScope as jn, cacheFileSchema as jt, extractCacheHits as k, nextEvalId as kn, cacheEntrySchema as kt, persistRunState as l, evalSpan as ln, evalStatAggregateSchema as lt, buildEvalSummary as m, deserializeCacheValue as mn, runLogLevelSchema as mt, generateRunId as n, jsonCellSchema as nn, resolveLlmCallsConfig as nt, loadPersistedRunSnapshots as o, z$1 as on, caseDetailSchema as ot, resolveArtifactPath as p, deserializeCacheRecording as pn, runLogEntrySchema as pt, apiCallsConfigSchema as q, traceSpanErrorSchema as qt, getLastRunStatuses as r, numberDisplayOptionsSchema as rn, runLogsConfigSchema as rt, nextShortIdFromSnapshots as s, buildTraceTree as sn, caseRowSchema as st, executeRun as t, fileRefSchema as tn, resolveApiCallsConfig as tt, recomputeEvalStatusesInRuns as u, evalTracer as un, evalStatItemSchema as ut, parseEvalMetas as v, advanceEvalTime as vn, evalChartAggregateSchema as vt, createFsCacheStore as w, getEvalCaseInput as wn, evalChartTooltipExtraSchema as wt, buildDeclaredColumnDefs as x, evalAssert as xn, evalChartColorSchema as xt, resolveEvalDefaultConfig as y, appendToEvalOutput as yn, evalChartAxisSchema as yt, runManifestSchema as z, spanCacheOptionsSchema as zt };

package/dist/{runner-4yNYRvmF.mjs → runner-B4SosWgD.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { n as createRunner } from "./cli-D3QNOcPN.mjs";
-import "./src-CcXfWT4M.mjs";
+import { n as createRunner } from "./cli-Be0x8CS3.mjs";
+import "./src-D6cettg0.mjs";
 //#region ../../apps/server/src/runner.ts
 let runnerInstance = null;
 function getRunnerInstance() {

package/dist/{runner-B-SYzW8w.mjs → runner-jSujaSKt.mjs} RENAMED Viewed

@@ -1,2 +1,2 @@
-import { n as initRunner, t as getRunnerInstance } from "./runner-4yNYRvmF.mjs";
+import { n as initRunner, t as getRunnerInstance } from "./runner-B4SosWgD.mjs";
 export { getRunnerInstance, initRunner };

package/dist/src-D6cettg0.mjs ADDED Viewed

@@ -0,0 +1,3 @@
+import "./runOrchestration-D697g6Qe.mjs";
+import "./cli-Be0x8CS3.mjs";
+export {};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ls-stack/agent-eval",
-  "version": "0.23.0",
+  "version": "0.24.0",
   "type": "module",
   "bin": {
     "agent-evals": "./dist/bin.mjs"
@@ -59,8 +59,8 @@
     "@types/node": "^24.7.2",
     "typescript": "^5.9.2",
     "@agent-evals/runner": "0.0.1",
-    "@agent-evals/sdk": "0.0.1",
-    "@agent-evals/shared": "0.0.1"
+    "@agent-evals/shared": "0.0.1",
+    "@agent-evals/sdk": "0.0.1"
   },
   "scripts": {
     "build": "pnpm --filter @agent-evals/web build && tsdown",

package/skills/agent-eval/SKILL.md CHANGED Viewed

@@ -274,10 +274,12 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
   attribute paths. `latencyMs` is time to first token; duration, total tokens,
   tokens/sec, and USD costs are derived. Override `kinds` to broaden the filter,
   override `attributes.<field>` for non-default primitive span shapes, configure
-  `pricing` to derive USD costs from token counts by model/provider, and add
-  entries to `metrics` to surface arbitrary user metrics (`format: 'string' |
-'number' | 'duration' | 'json' | 'boolean'`, `placements: ['header' |
-'body']`).
+  `pricing` to derive USD costs from token counts by model/provider, add
+  `derivedAttributes` to persist computed values back onto matching LLM spans
+  before trace consumers run, and add entries to `metrics` to surface arbitrary
+  user metrics (`format: 'string' | 'number' | 'duration' | 'json' |
+'boolean'`, `placements: ['header' | 'body']`). `derivedAttributes` keys are
+  dot-paths under `span.attributes`; return `undefined` to skip one span.
 - Default usage config derives missing eval outputs from matching LLM/API spans
   before `outputsSchema` and scores run: `apiCalls`, `costUsd`, `llmTurns`,
   `inputTokens`, `outputTokens`, `totalTokens`, `cachedInputTokens`,
@@ -298,7 +300,8 @@ cacheCreationInputTokens` so cache details are not double-counted.
   and `'fetch'` spans with `method`, `url`, `statusCode`, `request`,
   `response`, `requestBody`, `responseBody`, `headers`, `durationMs`, and
   `error` read from conventional attribute paths. Override `kinds` or
-  `attributes.<field>` for external tracers, and add `metrics` with the same
+  `attributes.<field>` for external tracers, add `derivedAttributes` for
+  computed persisted API span attributes, and add `metrics` with the same
   formats and placements as LLM-call metrics.
 - `runLogs` (in `agent-evals.config.ts`) controls case log capture. Use
   `runLogs: { captureConsole: false }` to keep console output in the terminal

package/dist/src-CcXfWT4M.mjs DELETED Viewed

@@ -1,3 +0,0 @@
-import "./runOrchestration-CimthgI7.mjs";
-import "./cli-D3QNOcPN.mjs";
-export {};