npm - @ls-stack/agent-eval - Versions diffs - 0.29.0 → 0.31.0 - Mend

@ls-stack/agent-eval 0.29.0 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/{app-D6-msfKP.mjs → app-BrSMRTpy.mjs} +4 -4
package/dist/apps/web/dist/assets/index-CPcVyFRP.js +118 -0
package/dist/apps/web/dist/assets/index-ClPR-tfN.css +1 -0
package/dist/apps/web/dist/index.html +2 -2
package/dist/bin.mjs +1 -1
package/dist/{cli-CIc_gBNM.mjs → cli-CMPmuY7W.mjs} +3 -3
package/dist/index.d.mts +182 -113
package/dist/index.mjs +4 -4
package/dist/runChild.mjs +1 -1
package/dist/{runOrchestration-CIARrLs6.mjs → runOrchestration-CAyVXPFz.mjs} +69 -12
package/dist/{runner-1F8MeY5V.mjs → runner-Bnm1nz0U.mjs} +2 -2
package/dist/{runner-Bq1f9B9d.mjs → runner-CmpWwCe1.mjs} +1 -1
package/dist/src-gZm9nyTp.mjs +3 -0
package/package.json +1 -1
package/skills/agent-eval/SKILL.md +12 -8
package/dist/apps/web/dist/assets/index-BCr6J8Uj.js +0 -118
package/dist/apps/web/dist/assets/index-DjUTm3M-.css +0 -1
package/dist/src-CkWT1iSu.mjs +0 -3

package/dist/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { $ as defaultConfigKeySchema, $n as incrementEvalOutput, $t as cacheEntryWithDebugKeySchema, A as createRunRequestSchema, An as buildTraceTree, At as runLogPhaseSchema, B as getEvalDisplayStatus, Bn as repoFile, Bt as manualInputTextFieldSchema, Cn as columnKindSchema, Ct as evalStatAggregateSchema, Dn as repoFileRefSchema, Dt as runLogEntrySchema, En as numberDisplayOptionsSchema, Et as evalSummarySchema, F as extractApiCalls, Fn as hashCacheKeySync, Ft as manualInputJsonFieldSchema, G as runSummarySchema, Gn as advanceEvalTime, Gt as evalChartConfigSchema, H as deriveStatusFromCaseRows, Hn as readManualInputFile, Ht as evalChartAxisSchema, I as extractLlmCalls, In as deserializeCacheRecording, It as manualInputMultilineFieldSchema, J as agentEvalsConfigSchema, Jn as evalAssert, Jt as evalChartTypeSchema, K as DEFAULT_API_CALLS_CONFIG, Kn as appendToEvalOutput, Kt as evalChartMetricSchema, L as applyDerivedCallAttributes, Ln as deserializeCacheValue, Lt as manualInputNumberFieldSchema, M as sseEnvelopeSchema, Mn as evalSpan, Mt as manualInputBooleanFieldSchema, N as extractCacheEntries, Nn as evalTracer, Nt as manualInputDescriptorSchema, O as configReloadStateSchema, On as runArtifactRefSchema, Ot as runLogLevelSchema, P as extractCacheHits, Pn as hashCacheKey, Pt as manualInputFieldDescriptorSchema, Q as apiCallsConfigSchema, Qn as getEvalStartTime, Qt as cacheEntrySchema, R as getNestedAttribute, Rn as serializeCacheRecording, Rt as manualInputSelectFieldSchema, Sn as columnFormatSchema, St as evalFreshnessStatusSchema, Tn as jsonCellSchema, Tt as evalStatsConfigSchema, U as deriveStatusFromChildStatuses, Un as evalExpect, Ut as evalChartBuiltinMetricSchema, V as deriveScopedSummaryFromCases, Vn as manualInputFileValueSchema, Vt as evalChartAggregateSchema, W as runManifestSchema, Wn as EvalAssertionError, Wt as evalChartColorSchema, X as apiCallMetricPlacementSchema, Xn as getCurrentScope, Xt as cacheDebugKeyEntrySchema, Y as apiCallMetricFormatSchema, Yn as evalLog, Yt as evalChartsConfigSchema, Z as apiCallMetricSchema, Zn as getEvalCaseInput, Zt as cacheDebugKeyFileSchema, _n as traceSpanKindSchema, _t as getCaseRowEvalKey, an as cacheRecordingSchema, ar as runInExistingEvalScope, at as llmCallMetricSchema, bn as cellValueSchema, bt as caseRowSchema, cn as spanCacheOptionsSchema, cr as startEvalBackgroundJob, ct as llmCallsConfigSchema, dn as traceAttributeDisplayInputSchema, dt as resolveLlmCallsConfig, en as cacheFileSchema, er as isInEvalScope, et as evalColumnOverrideSchema, fn as traceAttributeDisplayPlacementSchema, ft as runLogsConfigSchema, gn as traceSpanErrorSchema, gt as getCaseRowCaseKey, hn as traceDisplayInputConfigSchema, ht as buildEvalKey, in as cacheRecordingOpSchema, ir as runInEvalScope, it as llmCallMetricPlacementSchema, j as updateManualScoreRequestSchema, jn as captureEvalSpanError, jt as scoreTraceSchema, k as configReloadStatusSchema, kn as z, kt as runLogLocationSchema, ln as traceCacheRefSchema, lr as defineEval, lt as removeDefaultConfigSchema, mn as traceDisplayConfigSchema, mt as buildCaseKey, nn as cacheModeSchema, nr as nextEvalId, nt as evalDeriveConfigSchema, on as cacheStatusSchema, or as setEvalOutput, ot as llmCallPricingRateSchema, pn as traceAttributeDisplaySchema, pt as trialSelectionModeSchema, q as DEFAULT_LLM_CALLS_CONFIG, qt as evalChartTooltipExtraSchema, rn as cacheOperationTypeSchema, rr as runInEvalRuntimeScope, rt as llmCallMetricFormatSchema, sn as serializedCacheSpanSchema, sr as setScopeCacheContext, st as llmCallPricingSchema, tn as cacheListItemSchema, tr as mergeEvalOutput, tt as evalColumnsSchema, un as traceAttributeDisplayFormatSchema, ur as getEvalRegistry, ut as resolveApiCallsConfig, vn as traceSpanSchema, vt as assertionFailureSchema, wn as fileRefSchema, wt as evalStatItemSchema, xn as columnDefSchema, xt as discoveryIssueSchema, yn as traceSpanWarningSchema, yt as caseDetailSchema, z as getEvalTitle, zn as serializeCacheValue, zt as manualInputSelectOptionSchema } from "./runOrchestration-CIARrLs6.mjs";
-import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CIc_gBNM.mjs";
-import "./src-CkWT1iSu.mjs";
-export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
+import { $ as defaultConfigKeySchema, $n as getEvalStartTime, $t as cacheEntrySchema, A as createRunRequestSchema, An as z, At as runLogLocationSchema, B as getEvalDisplayStatus, Bn as serializeCacheValue, Bt as manualInputSelectOptionSchema, Cn as columnFormatSchema, Ct as evalFreshnessStatusSchema, Dn as numberDisplayOptionsSchema, Dt as evalSummarySchema, En as jsonCellSchema, Et as evalStatsConfigSchema, F as extractApiCalls, Fn as hashCacheKey, Ft as manualInputFieldDescriptorSchema, G as runSummarySchema, Gn as EvalAssertionError, Gt as evalChartColorSchema, H as deriveStatusFromCaseRows, Hn as manualInputFileValueSchema, Ht as evalChartAggregateSchema, I as extractLlmCalls, In as hashCacheKeySync, It as manualInputJsonFieldSchema, J as agentEvalsConfigSchema, Jt as evalChartTooltipExtraSchema, K as DEFAULT_API_CALLS_CONFIG, Kn as advanceEvalTime, Kt as evalChartConfigSchema, L as applyDerivedCallAttributes, Ln as deserializeCacheRecording, Lt as manualInputMultilineFieldSchema, M as sseEnvelopeSchema, Mn as captureEvalSpanError, Mt as scoreTraceSchema, N as extractCacheEntries, Nn as evalSpan, Nt as manualInputBooleanFieldSchema, O as configReloadStateSchema, On as repoFileRefSchema, Ot as runLogEntrySchema, P as extractCacheHits, Pn as evalTracer, Pt as manualInputDescriptorSchema, Q as apiCallsConfigSchema, Qn as getEvalCaseInput, Qt as cacheDebugKeyFileSchema, R as getNestedAttribute, Rn as deserializeCacheValue, Rt as manualInputNumberFieldSchema, Sn as columnDefSchema, St as discoveryIssueSchema, Tn as fileRefSchema, Tt as evalStatItemSchema, U as deriveStatusFromChildStatuses, Un as readManualInputFile, Ut as evalChartAxisSchema, V as deriveScopedSummaryFromCases, Vn as repoFile, Vt as manualInputTextFieldSchema, W as runManifestSchema, Wn as evalExpect, Wt as evalChartBuiltinMetricSchema, X as apiCallMetricPlacementSchema, Xn as evalLog, Xt as evalChartsConfigSchema, Y as apiCallMetricFormatSchema, Yn as evalAssert, Yt as evalChartTypeSchema, Z as apiCallMetricSchema, Zn as getCurrentScope, Zt as cacheDebugKeyEntrySchema, _n as traceSpanErrorSchema, _t as getCaseRowCaseKey, an as cacheRecordingOpSchema, ar as runInEvalScope, at as llmCallMetricPlacementSchema, bn as traceSpanWarningSchema, bt as caseDetailSchema, cn as serializedCacheSpanSchema, cr as setScopeCacheContext, ct as llmCallPricingSchema, dn as traceAttributeDisplayFormatSchema, dr as getEvalRegistry, dt as resolveApiCallsConfig, en as cacheEntryWithDebugKeySchema, er as incrementEvalOutput, et as evalColumnOverrideSchema, fn as traceAttributeDisplayInputSchema, ft as resolveLlmCallsConfig, gn as traceDisplayInputConfigSchema, gt as buildEvalKey, hn as traceDisplayConfigSchema, ht as buildCaseKey, in as cacheOperationTypeSchema, ir as runInEvalRuntimeScope, it as llmCallMetricFormatSchema, j as updateManualScoreRequestSchema, jn as buildTraceTree, jt as runLogPhaseSchema, k as configReloadStatusSchema, kn as runArtifactRefSchema, kt as runLogLevelSchema, ln as spanCacheOptionsSchema, lr as startEvalBackgroundJob, lt as llmCallsConfigSchema, mn as traceAttributeDisplaySchema, mt as trialSelectionModeSchema, nn as cacheListItemSchema, nr as mergeEvalOutput, nt as evalDeriveConfigSchema, on as cacheRecordingSchema, or as runInExistingEvalScope, ot as llmCallMetricSchema, pn as traceAttributeDisplayPlacementSchema, pt as runLogsConfigSchema, q as DEFAULT_LLM_CALLS_CONFIG, qn as appendToEvalOutput, qt as evalChartMetricSchema, rn as cacheModeSchema, rr as nextEvalId, rt as llmCallCostCurrencySchema, sn as cacheStatusSchema, sr as setEvalOutput, st as llmCallPricingRateSchema, tn as cacheFileSchema, tr as isInEvalScope, tt as evalColumnsSchema, un as traceCacheRefSchema, ur as defineEval, ut as removeDefaultConfigSchema, vn as traceSpanKindSchema, vt as getCaseRowEvalKey, wn as columnKindSchema, wt as evalStatAggregateSchema, xn as cellValueSchema, xt as caseRowSchema, yn as traceSpanSchema, yt as assertionFailureSchema, z as getEvalTitle, zn as serializeCacheRecording, zt as manualInputSelectFieldSchema } from "./runOrchestration-CAyVXPFz.mjs";
+import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CMPmuY7W.mjs";
+import "./src-gZm9nyTp.mjs";
+export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };

package/dist/runChild.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, G as runSummarySchema, Nt as manualInputDescriptorSchema, Tt as evalStatsConfigSchema, W as runManifestSchema, Yt as evalChartsConfigSchema, ht as buildEvalKey, qn as configureEvalRunLogs, r as getTargetEvals$1, t as executeRun, x as parseEvalDiscovery, xn as columnDefSchema } from "./runOrchestration-CIARrLs6.mjs";
+import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, Et as evalStatsConfigSchema, G as runSummarySchema, Jn as configureEvalRunLogs, Pt as manualInputDescriptorSchema, Sn as columnDefSchema, W as runManifestSchema, Xt as evalChartsConfigSchema, gt as buildEvalKey, r as getTargetEvals$1, t as executeRun, x as parseEvalDiscovery } from "./runOrchestration-CAyVXPFz.mjs";
 import { createHash } from "node:crypto";
 import { readFile } from "node:fs/promises";
 import { relative } from "node:path";

package/dist/{runOrchestration-CIARrLs6.mjs → runOrchestration-CAyVXPFz.mjs} RENAMED Viewed

@@ -3219,6 +3219,8 @@ const llmCallMetricPlacementSchema = z.enum(["header", "body"]);
 /** Where an API-call metric is rendered inside the API calls tab. */
 const apiCallMetricPlacementSchema = llmCallMetricPlacementSchema;
 const callDerivedAttributeSchema = z.custom((value) => typeof value === "function", { message: "Expected a derived attribute function" });
+const callDerivedAttributesFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a derived attributes function" });
+const callDerivedAttributesConfigSchema = z.union([z.record(z.string().min(1), callDerivedAttributeSchema), callDerivedAttributesFnSchema]);
 /**
 * Schema for a single user-defined metric attached to LLM call rows.
 *
@@ -3314,6 +3316,20 @@ const llmCallPricingSchema = llmCallPricingRateSchema.extend({
 	*/
 	providers: z.record(z.string().min(1), llmCallPricingRateSchema).optional()
 });
+/**
+* Schema for extra currencies displayed in the LLM calls breakdown table.
+* Costs are still derived in USD, then multiplied by `usdToCurrencyRate`.
+*/
+const llmCallCostCurrencySchema = z.object({
+	/** Currency code or short display token, such as `BRL` or `EUR`. */
+	code: z.string().min(1),
+	/** Optional display label for tooltips and future UI surfaces. */
+	label: z.string().min(1).optional(),
+	/** Multiplier used to convert one USD to this currency. */
+	usdToCurrencyRate: z.number().nonnegative(),
+	/** Number presentation options for the converted value. */
+	numberFormat: numberDisplayOptionsSchema.optional()
+});
 /** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
 const llmCallsConfigSchema = z.object({
 	/** Span kinds treated as LLM calls. Defaults to `['llm']`. */
@@ -3345,15 +3361,21 @@ const llmCallsConfigSchema = z.object({
 	/**
 	* Derived attributes persisted onto every matching LLM span before
 	* `deriveFromTracing`, default outputs, trace display, and call metrics read
-	* the trace. Keys are dot-paths under `span.attributes`; return `undefined`
-	* to skip writing the attribute for one span.
+	* the trace. Use a keyed map for one-off fields, or one callback returning a
+	* path/value object for multiple fields. Keys are dot-paths under
+	* `span.attributes`; return `undefined` to skip one span or one returned key.
 	*/
-	derivedAttributes: z.record(z.string().min(1), callDerivedAttributeSchema).optional(),
+	derivedAttributes: callDerivedAttributesConfigSchema.optional(),
 	/**
 	* Model-keyed pricing registry used to calculate LLM-call costs from token
 	* counts. Built-in LLM cost fields are only derived from this registry.
 	*/
 	pricing: z.record(z.string().min(1), llmCallPricingSchema).optional(),
+	/**
+	* Additional currencies shown as columns in the LLM calls breakdown table.
+	* These do not change persisted `costUsd` outputs, stats, or charts.
+	*/
+	costCurrencies: z.array(llmCallCostCurrencySchema).optional(),
 	/** Custom user-defined metrics surfaced on each LLM call. */
 	metrics: z.array(llmCallMetricSchema).optional()
 });
@@ -3380,11 +3402,12 @@ const apiCallsConfigSchema = z.object({
 	}).optional(),
 	/**
 	* Derived attributes persisted onto every matching API span before trace
-	* display and call metrics read the trace. Keys are dot-paths under
-	* `span.attributes`; return `undefined` to skip writing the attribute for
-	* one span.
+	* display and call metrics read the trace. Use a keyed map for one-off
+	* fields, or one callback returning a path/value object for multiple fields.
+	* Keys are dot-paths under `span.attributes`; return `undefined` to skip one
+	* span or one returned key.
 	*/
-	derivedAttributes: z.record(z.string().min(1), callDerivedAttributeSchema).optional(),
+	derivedAttributes: callDerivedAttributesConfigSchema.optional(),
 	/** Custom user-defined metrics surfaced on each API call. */
 	metrics: z.array(apiCallMetricSchema).optional()
 });
@@ -3418,7 +3441,8 @@ const DEFAULT_LLM_CALLS_CONFIG = {
 	},
 	derivedAttributes: [],
 	metrics: [],
-	pricing: []
+	pricing: [],
+	costCurrencies: []
 };
 /** Default API-calls config the UI uses before the workspace fetch resolves. */
 const DEFAULT_API_CALLS_CONFIG = {
@@ -3444,7 +3468,9 @@ const DEFAULT_API_CALLS_CONFIG = {
 	metrics: []
 };
 function resolveDerivedAttributes(input) {
-	return Object.entries(input ?? {}).map(([path, compute]) => ({
+	if (input === void 0) return [];
+	if (typeof input === "function") return [{ computeMany: input }];
+	return Object.entries(input).map(([path, compute]) => ({
 		path,
 		compute
 	}));
@@ -3496,6 +3522,14 @@ function resolveLlmCallPricingEntries(model, pricing) {
 	});
 	return entries;
 }
+function resolveLlmCallCostCurrency(currency) {
+	return {
+		code: currency.code,
+		label: currency.label,
+		usdToCurrencyRate: currency.usdToCurrencyRate,
+		numberFormat: currency.numberFormat
+	};
+}
 /**
 * Resolve the user-authored LLM-calls config to a fully-defaulted shape used
 * by the UI to derive the LLM calls tab.
@@ -3507,6 +3541,8 @@ function resolveLlmCallPricingEntries(model, pricing) {
 * - Missing `metrics[].placements` defaults to `['body']`.
 * - Missing `pricing` defaults to an empty registry; built-in costs are only
 *   derived from configured model-keyed pricing and token counts.
+* - Missing `costCurrencies` defaults to an empty list; extra currencies only
+*   affect the expanded LLM calls breakdown table.
 */
 function resolveLlmCallsConfig(input) {
 	return {
@@ -3517,7 +3553,8 @@ function resolveLlmCallsConfig(input) {
 		},
 		derivedAttributes: resolveDerivedAttributes(input?.derivedAttributes),
 		metrics: (input?.metrics ?? []).map(resolveLlmCallMetric),
-		pricing: Object.entries(input?.pricing ?? {}).flatMap(([model, pricing]) => resolveLlmCallPricingEntries(model, pricing))
+		pricing: Object.entries(input?.pricing ?? {}).flatMap(([model, pricing]) => resolveLlmCallPricingEntries(model, pricing)),
+		costCurrencies: (input?.costCurrencies ?? []).map(resolveLlmCallCostCurrency)
 	};
 }
 /**
@@ -3809,11 +3846,31 @@ function mergeNestedAttribute$1(value, path, attributeValue) {
 function applyDerivedAttributesForKind(params) {
 	let attributes = params.span.attributes;
 	for (const derivedAttribute of params.derivedAttributes) {
-		if (derivedAttribute.compute === void 0) continue;
 		const span = {
 			...params.span,
 			attributes
 		};
+		if (derivedAttribute.computeMany !== void 0) {
+			const values = (() => {
+				try {
+					return derivedAttribute.computeMany({
+						attributes,
+						span,
+						get: (path) => getNestedAttribute(attributes, path)
+					});
+				} catch {
+					return;
+				}
+			})();
+			if (!isRecord$3(values)) continue;
+			for (const [path, value] of Object.entries(values)) {
+				if (value === void 0) continue;
+				attributes = mergeNestedAttribute$1(attributes, path, value);
+			}
+			continue;
+		}
+		if (derivedAttribute.path === void 0) continue;
+		if (derivedAttribute.compute === void 0) continue;
 		const value = (() => {
 			try {
 				return derivedAttribute.compute({
@@ -7161,4 +7218,4 @@ function toLastRunStatus(status) {
 	return status === "pending" ? null : status;
 }
 //#endregion
-export { defaultConfigKeySchema as $, incrementEvalOutput as $n, cacheEntryWithDebugKeySchema as $t, createRunRequestSchema as A, buildTraceTree as An, runLogPhaseSchema as At, getEvalDisplayStatus as B, repoFile as Bn, manualInputTextFieldSchema as Bt, loadConfig as C, columnKindSchema as Cn, evalStatAggregateSchema as Ct, createFsCacheStore as D, repoFileRefSchema as Dn, runLogEntrySchema as Dt, validateCharts as E, numberDisplayOptionsSchema as En, evalSummarySchema as Et, extractApiCalls as F, hashCacheKeySync as Fn, manualInputJsonFieldSchema as Ft, runSummarySchema as G, advanceEvalTime as Gn, evalChartConfigSchema as Gt, deriveStatusFromCaseRows as H, readManualInputFile as Hn, evalChartAxisSchema as Ht, extractLlmCalls as I, deserializeCacheRecording as In, manualInputMultilineFieldSchema as It, agentEvalsConfigSchema as J, evalAssert as Jn, evalChartTypeSchema as Jt, DEFAULT_API_CALLS_CONFIG as K, appendToEvalOutput as Kn, evalChartMetricSchema as Kt, applyDerivedCallAttributes as L, deserializeCacheValue as Ln, manualInputNumberFieldSchema as Lt, sseEnvelopeSchema as M, evalSpan as Mn, manualInputBooleanFieldSchema as Mt, extractCacheEntries as N, evalTracer as Nn, manualInputDescriptorSchema as Nt, configReloadStateSchema as O, runArtifactRefSchema as On, runLogLevelSchema as Ot, extractCacheHits as P, hashCacheKey as Pn, manualInputFieldDescriptorSchema as Pt, apiCallsConfigSchema as Q, getEvalStartTime as Qn, cacheEntrySchema as Qt, getNestedAttribute as R, serializeCacheRecording as Rn, manualInputSelectFieldSchema as Rt, resolveEvalDefaultConfig as S, columnFormatSchema as Sn, evalFreshnessStatusSchema as St, normalizeScoreDef as T, jsonCellSchema as Tn, evalStatsConfigSchema as Tt, deriveStatusFromChildStatuses as U, evalExpect as Un, evalChartBuiltinMetricSchema as Ut, deriveScopedSummaryFromCases as V, manualInputFileValueSchema as Vn, evalChartAggregateSchema as Vt, runManifestSchema as W, EvalAssertionError as Wn, evalChartColorSchema as Wt, apiCallMetricPlacementSchema as X, getCurrentScope as Xn, cacheDebugKeyEntrySchema as Xt, apiCallMetricFormatSchema as Y, evalLog as Yn, evalChartsConfigSchema as Yt, apiCallMetricSchema as Z, getEvalCaseInput as Zn, cacheDebugKeyFileSchema as Zt, buildManualInputDescriptor as _, traceSpanKindSchema as _n, getCaseRowEvalKey as _t, getLastRunStatuses as a, cacheRecordingSchema as an, runInExistingEvalScope as ar, llmCallMetricSchema as at, loadEvalModule as b, cellValueSchema as bn, caseRowSchema as bt, loadPersistedRunSnapshots as c, spanCacheOptionsSchema as cn, startEvalBackgroundJob as cr, llmCallsConfigSchema as ct, persistRunState as d, traceAttributeDisplayInputSchema as dn, resolveLlmCallsConfig as dt, cacheFileSchema as en, isInEvalScope as er, evalColumnOverrideSchema as et, recomputeEvalStatusesInRuns as f, traceAttributeDisplayPlacementSchema as fn, runLogsConfigSchema as ft, resolveArtifactPath as g, traceSpanErrorSchema as gn, getCaseRowCaseKey as gt, resolveTracePresentation as h, traceDisplayInputConfigSchema as hn, buildEvalKey as ht, generateRunId as i, cacheRecordingOpSchema as in, runInEvalScope as ir, llmCallMetricPlacementSchema as it, updateManualScoreRequestSchema as j, captureEvalSpanError as jn, scoreTraceSchema as jt, configReloadStatusSchema as k, z$1 as kn, runLogLocationSchema as kt, nextShortIdFromSnapshots as l, traceCacheRefSchema as ln, defineEval as lr, removeDefaultConfigSchema as lt, runTouchesEval as m, traceDisplayConfigSchema as mn, buildCaseKey as mt, getTargetEvalKeys as n, cacheModeSchema as nn, nextEvalId as nr, evalDeriveConfigSchema as nt, getLatestRunInfos as o, cacheStatusSchema as on, setEvalOutput as or, llmCallPricingRateSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplaySchema as pn, trialSelectionModeSchema as pt, DEFAULT_LLM_CALLS_CONFIG as q, configureEvalRunLogs as qn, evalChartTooltipExtraSchema as qt, getTargetEvals as r, cacheOperationTypeSchema as rn, runInEvalRuntimeScope as rr, llmCallMetricFormatSchema as rt, loadPersistedRunSnapshot as s, serializedCacheSpanSchema as sn, setScopeCacheContext as sr, llmCallPricingSchema as st, executeRun as t, cacheListItemSchema as tn, mergeEvalOutput as tr, evalColumnsSchema as tt, persistCaseDetail as u, traceAttributeDisplayFormatSchema as un, getEvalRegistry as ur, resolveApiCallsConfig as ut, parseManualInputValues as v, traceSpanSchema as vn, assertionFailureSchema as vt, buildDeclaredColumnDefs as w, fileRefSchema as wn, evalStatItemSchema as wt, parseEvalDiscovery as x, columnDefSchema as xn, discoveryIssueSchema as xt, deriveEvalFreshness as y, traceSpanWarningSchema as yn, caseDetailSchema as yt, getEvalTitle as z, serializeCacheValue as zn, manualInputSelectOptionSchema as zt };
+export { defaultConfigKeySchema as $, getEvalStartTime as $n, cacheEntrySchema as $t, createRunRequestSchema as A, z$1 as An, runLogLocationSchema as At, getEvalDisplayStatus as B, serializeCacheValue as Bn, manualInputSelectOptionSchema as Bt, loadConfig as C, columnFormatSchema as Cn, evalFreshnessStatusSchema as Ct, createFsCacheStore as D, numberDisplayOptionsSchema as Dn, evalSummarySchema as Dt, validateCharts as E, jsonCellSchema as En, evalStatsConfigSchema as Et, extractApiCalls as F, hashCacheKey as Fn, manualInputFieldDescriptorSchema as Ft, runSummarySchema as G, EvalAssertionError as Gn, evalChartColorSchema as Gt, deriveStatusFromCaseRows as H, manualInputFileValueSchema as Hn, evalChartAggregateSchema as Ht, extractLlmCalls as I, hashCacheKeySync as In, manualInputJsonFieldSchema as It, agentEvalsConfigSchema as J, configureEvalRunLogs as Jn, evalChartTooltipExtraSchema as Jt, DEFAULT_API_CALLS_CONFIG as K, advanceEvalTime as Kn, evalChartConfigSchema as Kt, applyDerivedCallAttributes as L, deserializeCacheRecording as Ln, manualInputMultilineFieldSchema as Lt, sseEnvelopeSchema as M, captureEvalSpanError as Mn, scoreTraceSchema as Mt, extractCacheEntries as N, evalSpan as Nn, manualInputBooleanFieldSchema as Nt, configReloadStateSchema as O, repoFileRefSchema as On, runLogEntrySchema as Ot, extractCacheHits as P, evalTracer as Pn, manualInputDescriptorSchema as Pt, apiCallsConfigSchema as Q, getEvalCaseInput as Qn, cacheDebugKeyFileSchema as Qt, getNestedAttribute as R, deserializeCacheValue as Rn, manualInputNumberFieldSchema as Rt, resolveEvalDefaultConfig as S, columnDefSchema as Sn, discoveryIssueSchema as St, normalizeScoreDef as T, fileRefSchema as Tn, evalStatItemSchema as Tt, deriveStatusFromChildStatuses as U, readManualInputFile as Un, evalChartAxisSchema as Ut, deriveScopedSummaryFromCases as V, repoFile as Vn, manualInputTextFieldSchema as Vt, runManifestSchema as W, evalExpect as Wn, evalChartBuiltinMetricSchema as Wt, apiCallMetricPlacementSchema as X, evalLog as Xn, evalChartsConfigSchema as Xt, apiCallMetricFormatSchema as Y, evalAssert as Yn, evalChartTypeSchema as Yt, apiCallMetricSchema as Z, getCurrentScope as Zn, cacheDebugKeyEntrySchema as Zt, buildManualInputDescriptor as _, traceSpanErrorSchema as _n, getCaseRowCaseKey as _t, getLastRunStatuses as a, cacheRecordingOpSchema as an, runInEvalScope as ar, llmCallMetricPlacementSchema as at, loadEvalModule as b, traceSpanWarningSchema as bn, caseDetailSchema as bt, loadPersistedRunSnapshots as c, serializedCacheSpanSchema as cn, setScopeCacheContext as cr, llmCallPricingSchema as ct, persistRunState as d, traceAttributeDisplayFormatSchema as dn, getEvalRegistry as dr, resolveApiCallsConfig as dt, cacheEntryWithDebugKeySchema as en, incrementEvalOutput as er, evalColumnOverrideSchema as et, recomputeEvalStatusesInRuns as f, traceAttributeDisplayInputSchema as fn, resolveLlmCallsConfig as ft, resolveArtifactPath as g, traceDisplayInputConfigSchema as gn, buildEvalKey as gt, resolveTracePresentation as h, traceDisplayConfigSchema as hn, buildCaseKey as ht, generateRunId as i, cacheOperationTypeSchema as in, runInEvalRuntimeScope as ir, llmCallMetricFormatSchema as it, updateManualScoreRequestSchema as j, buildTraceTree as jn, runLogPhaseSchema as jt, configReloadStatusSchema as k, runArtifactRefSchema as kn, runLogLevelSchema as kt, nextShortIdFromSnapshots as l, spanCacheOptionsSchema as ln, startEvalBackgroundJob as lr, llmCallsConfigSchema as lt, runTouchesEval as m, traceAttributeDisplaySchema as mn, trialSelectionModeSchema as mt, getTargetEvalKeys as n, cacheListItemSchema as nn, mergeEvalOutput as nr, evalDeriveConfigSchema as nt, getLatestRunInfos as o, cacheRecordingSchema as on, runInExistingEvalScope as or, llmCallMetricSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplayPlacementSchema as pn, runLogsConfigSchema as pt, DEFAULT_LLM_CALLS_CONFIG as q, appendToEvalOutput as qn, evalChartMetricSchema as qt, getTargetEvals as r, cacheModeSchema as rn, nextEvalId as rr, llmCallCostCurrencySchema as rt, loadPersistedRunSnapshot as s, cacheStatusSchema as sn, setEvalOutput as sr, llmCallPricingRateSchema as st, executeRun as t, cacheFileSchema as tn, isInEvalScope as tr, evalColumnsSchema as tt, persistCaseDetail as u, traceCacheRefSchema as un, defineEval as ur, removeDefaultConfigSchema as ut, parseManualInputValues as v, traceSpanKindSchema as vn, getCaseRowEvalKey as vt, buildDeclaredColumnDefs as w, columnKindSchema as wn, evalStatAggregateSchema as wt, parseEvalDiscovery as x, cellValueSchema as xn, caseRowSchema as xt, deriveEvalFreshness as y, traceSpanSchema as yn, assertionFailureSchema as yt, getEvalTitle as z, serializeCacheRecording as zn, manualInputSelectFieldSchema as zt };

package/dist/{runner-1F8MeY5V.mjs → runner-Bnm1nz0U.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { n as createRunner } from "./cli-CIc_gBNM.mjs";
-import "./src-CkWT1iSu.mjs";
+import { n as createRunner } from "./cli-CMPmuY7W.mjs";
+import "./src-gZm9nyTp.mjs";
 //#region ../../apps/server/src/runner.ts
 let runnerInstance = null;
 function getRunnerInstance() {

package/dist/{runner-Bq1f9B9d.mjs → runner-CmpWwCe1.mjs} RENAMED Viewed

@@ -1,2 +1,2 @@
-import { n as initRunner, t as getRunnerInstance } from "./runner-1F8MeY5V.mjs";
+import { n as initRunner, t as getRunnerInstance } from "./runner-Bnm1nz0U.mjs";
 export { getRunnerInstance, initRunner };

package/dist/src-gZm9nyTp.mjs ADDED Viewed

@@ -0,0 +1,3 @@
+import "./runOrchestration-CAyVXPFz.mjs";
+import "./cli-CMPmuY7W.mjs";
+export {};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ls-stack/agent-eval",
-  "version": "0.29.0",
+  "version": "0.31.0",
   "type": "module",
   "bin": {
     "agent-evals": "./dist/bin.mjs"

package/skills/agent-eval/SKILL.md CHANGED Viewed

@@ -341,12 +341,15 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
   tokens/sec, and USD costs are derived. Override `kinds` to broaden the filter,
   override `attributes.<field>` for non-default primitive span shapes, configure
   model-keyed `pricing` to derive USD costs from token counts, with nested
-  `providers` entries for provider-specific rates, add `derivedAttributes` to
-  persist computed values back onto matching LLM spans before trace consumers
-  run, and add entries to `metrics` to surface arbitrary user metrics
+  `providers` entries for provider-specific rates, add `costCurrencies` to show
+  converted cost columns in the expanded breakdown table only, add
+  `derivedAttributes` to persist computed values back onto matching LLM spans
+  before trace consumers run, and add entries to `metrics` to surface arbitrary user metrics
   (`format: 'string' | 'number' | 'duration' | 'json' | 'boolean'`,
-  `placements: ['header' | 'body']`). `derivedAttributes` keys are dot-paths
-  under `span.attributes`; return `undefined` to skip one span. For saved runs,
+  `placements: ['header' | 'body']`). `derivedAttributes` can be a keyed map
+  for one-off fields or one callback that returns multiple path/value pairs.
+  Derived keys are dot-paths under `span.attributes`; return `undefined` to
+  skip one span or one returned key. For saved runs,
   the case drawer more menu can recalculate configured LLM/API derived
   attributes for one case and persist the updated trace artifacts without
   re-running the eval.
@@ -374,9 +377,10 @@ cacheCreationInputTokens` so cache details are not double-counted.
   and `'fetch'` spans with `method`, `url`, `statusCode`, `request`,
   `response`, `requestBody`, `responseBody`, `headers`, `durationMs`, and
   `error` read from conventional attribute paths. Override `kinds` or
-  `attributes.<field>` for external tracers, add `derivedAttributes` for
-  computed persisted API span attributes, and add `metrics` with the same
-  formats and placements as LLM-call metrics.
+  `attributes.<field>` for external tracers, add `derivedAttributes` as a
+  keyed map or object-returning callback for computed persisted API span
+  attributes, and add `metrics` with the same formats and placements as
+  LLM-call metrics.
 - `runLogs` (in `agent-evals.config.ts`) controls case log capture. Use
   `runLogs: { captureConsole: false }` to keep console output in the terminal
   without persisting console calls to case details. Manual `evalLog(...)` calls