@ls-stack/agent-eval 0.29.0 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as defaultConfigKeySchema, $n as incrementEvalOutput, $t as cacheEntryWithDebugKeySchema, A as createRunRequestSchema, An as buildTraceTree, At as runLogPhaseSchema, B as getEvalDisplayStatus, Bn as repoFile, Bt as manualInputTextFieldSchema, Cn as columnKindSchema, Ct as evalStatAggregateSchema, Dn as repoFileRefSchema, Dt as runLogEntrySchema, En as numberDisplayOptionsSchema, Et as evalSummarySchema, F as extractApiCalls, Fn as hashCacheKeySync, Ft as manualInputJsonFieldSchema, G as runSummarySchema, Gn as advanceEvalTime, Gt as evalChartConfigSchema, H as deriveStatusFromCaseRows, Hn as readManualInputFile, Ht as evalChartAxisSchema, I as extractLlmCalls, In as deserializeCacheRecording, It as manualInputMultilineFieldSchema, J as agentEvalsConfigSchema, Jn as evalAssert, Jt as evalChartTypeSchema, K as DEFAULT_API_CALLS_CONFIG, Kn as appendToEvalOutput, Kt as evalChartMetricSchema, L as applyDerivedCallAttributes, Ln as deserializeCacheValue, Lt as manualInputNumberFieldSchema, M as sseEnvelopeSchema, Mn as evalSpan, Mt as manualInputBooleanFieldSchema, N as extractCacheEntries, Nn as evalTracer, Nt as manualInputDescriptorSchema, O as configReloadStateSchema, On as runArtifactRefSchema, Ot as runLogLevelSchema, P as extractCacheHits, Pn as hashCacheKey, Pt as manualInputFieldDescriptorSchema, Q as apiCallsConfigSchema, Qn as getEvalStartTime, Qt as cacheEntrySchema, R as getNestedAttribute, Rn as serializeCacheRecording, Rt as manualInputSelectFieldSchema, Sn as columnFormatSchema, St as evalFreshnessStatusSchema, Tn as jsonCellSchema, Tt as evalStatsConfigSchema, U as deriveStatusFromChildStatuses, Un as evalExpect, Ut as evalChartBuiltinMetricSchema, V as deriveScopedSummaryFromCases, Vn as manualInputFileValueSchema, Vt as evalChartAggregateSchema, W as runManifestSchema, Wn as EvalAssertionError, Wt as evalChartColorSchema, X as apiCallMetricPlacementSchema, Xn as getCurrentScope, Xt as cacheDebugKeyEntrySchema, Y as apiCallMetricFormatSchema, Yn as evalLog, Yt as evalChartsConfigSchema, Z as apiCallMetricSchema, Zn as getEvalCaseInput, Zt as cacheDebugKeyFileSchema, _n as traceSpanKindSchema, _t as getCaseRowEvalKey, an as cacheRecordingSchema, ar as runInExistingEvalScope, at as llmCallMetricSchema, bn as cellValueSchema, bt as caseRowSchema, cn as spanCacheOptionsSchema, cr as startEvalBackgroundJob, ct as llmCallsConfigSchema, dn as traceAttributeDisplayInputSchema, dt as resolveLlmCallsConfig, en as cacheFileSchema, er as isInEvalScope, et as evalColumnOverrideSchema, fn as traceAttributeDisplayPlacementSchema, ft as runLogsConfigSchema, gn as traceSpanErrorSchema, gt as getCaseRowCaseKey, hn as traceDisplayInputConfigSchema, ht as buildEvalKey, in as cacheRecordingOpSchema, ir as runInEvalScope, it as llmCallMetricPlacementSchema, j as updateManualScoreRequestSchema, jn as captureEvalSpanError, jt as scoreTraceSchema, k as configReloadStatusSchema, kn as z, kt as runLogLocationSchema, ln as traceCacheRefSchema, lr as defineEval, lt as removeDefaultConfigSchema, mn as traceDisplayConfigSchema, mt as buildCaseKey, nn as cacheModeSchema, nr as nextEvalId, nt as evalDeriveConfigSchema, on as cacheStatusSchema, or as setEvalOutput, ot as llmCallPricingRateSchema, pn as traceAttributeDisplaySchema, pt as trialSelectionModeSchema, q as DEFAULT_LLM_CALLS_CONFIG, qt as evalChartTooltipExtraSchema, rn as cacheOperationTypeSchema, rr as runInEvalRuntimeScope, rt as llmCallMetricFormatSchema, sn as serializedCacheSpanSchema, sr as setScopeCacheContext, st as llmCallPricingSchema, tn as cacheListItemSchema, tr as mergeEvalOutput, tt as evalColumnsSchema, un as traceAttributeDisplayFormatSchema, ur as getEvalRegistry, ut as resolveApiCallsConfig, vn as traceSpanSchema, vt as assertionFailureSchema, wn as fileRefSchema, wt as evalStatItemSchema, xn as columnDefSchema, xt as discoveryIssueSchema, yn as traceSpanWarningSchema, yt as caseDetailSchema, z as getEvalTitle, zn as serializeCacheValue, zt as manualInputSelectOptionSchema } from "./runOrchestration-CIARrLs6.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CIc_gBNM.mjs";
3
- import "./src-CkWT1iSu.mjs";
4
- export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
1
+ import { $ as defaultConfigKeySchema, $n as getEvalStartTime, $t as cacheEntrySchema, A as createRunRequestSchema, An as z, At as runLogLocationSchema, B as getEvalDisplayStatus, Bn as serializeCacheValue, Bt as manualInputSelectOptionSchema, Cn as columnFormatSchema, Ct as evalFreshnessStatusSchema, Dn as numberDisplayOptionsSchema, Dt as evalSummarySchema, En as jsonCellSchema, Et as evalStatsConfigSchema, F as extractApiCalls, Fn as hashCacheKey, Ft as manualInputFieldDescriptorSchema, G as runSummarySchema, Gn as EvalAssertionError, Gt as evalChartColorSchema, H as deriveStatusFromCaseRows, Hn as manualInputFileValueSchema, Ht as evalChartAggregateSchema, I as extractLlmCalls, In as hashCacheKeySync, It as manualInputJsonFieldSchema, J as agentEvalsConfigSchema, Jt as evalChartTooltipExtraSchema, K as DEFAULT_API_CALLS_CONFIG, Kn as advanceEvalTime, Kt as evalChartConfigSchema, L as applyDerivedCallAttributes, Ln as deserializeCacheRecording, Lt as manualInputMultilineFieldSchema, M as sseEnvelopeSchema, Mn as captureEvalSpanError, Mt as scoreTraceSchema, N as extractCacheEntries, Nn as evalSpan, Nt as manualInputBooleanFieldSchema, O as configReloadStateSchema, On as repoFileRefSchema, Ot as runLogEntrySchema, P as extractCacheHits, Pn as evalTracer, Pt as manualInputDescriptorSchema, Q as apiCallsConfigSchema, Qn as getEvalCaseInput, Qt as cacheDebugKeyFileSchema, R as getNestedAttribute, Rn as deserializeCacheValue, Rt as manualInputNumberFieldSchema, Sn as columnDefSchema, St as discoveryIssueSchema, Tn as fileRefSchema, Tt as evalStatItemSchema, U as deriveStatusFromChildStatuses, Un as readManualInputFile, Ut as evalChartAxisSchema, V as deriveScopedSummaryFromCases, Vn as repoFile, Vt as manualInputTextFieldSchema, W as runManifestSchema, Wn as evalExpect, Wt as evalChartBuiltinMetricSchema, X as apiCallMetricPlacementSchema, Xn as evalLog, Xt as evalChartsConfigSchema, Y as apiCallMetricFormatSchema, Yn as evalAssert, Yt as evalChartTypeSchema, Z as apiCallMetricSchema, Zn as getCurrentScope, Zt as cacheDebugKeyEntrySchema, _n as traceSpanErrorSchema, _t as getCaseRowCaseKey, an as cacheRecordingOpSchema, ar as runInEvalScope, at as llmCallMetricPlacementSchema, bn as traceSpanWarningSchema, bt as caseDetailSchema, cn as serializedCacheSpanSchema, cr as setScopeCacheContext, ct as llmCallPricingSchema, dn as traceAttributeDisplayFormatSchema, dr as getEvalRegistry, dt as resolveApiCallsConfig, en as cacheEntryWithDebugKeySchema, er as incrementEvalOutput, et as evalColumnOverrideSchema, fn as traceAttributeDisplayInputSchema, ft as resolveLlmCallsConfig, gn as traceDisplayInputConfigSchema, gt as buildEvalKey, hn as traceDisplayConfigSchema, ht as buildCaseKey, in as cacheOperationTypeSchema, ir as runInEvalRuntimeScope, it as llmCallMetricFormatSchema, j as updateManualScoreRequestSchema, jn as buildTraceTree, jt as runLogPhaseSchema, k as configReloadStatusSchema, kn as runArtifactRefSchema, kt as runLogLevelSchema, ln as spanCacheOptionsSchema, lr as startEvalBackgroundJob, lt as llmCallsConfigSchema, mn as traceAttributeDisplaySchema, mt as trialSelectionModeSchema, nn as cacheListItemSchema, nr as mergeEvalOutput, nt as evalDeriveConfigSchema, on as cacheRecordingSchema, or as runInExistingEvalScope, ot as llmCallMetricSchema, pn as traceAttributeDisplayPlacementSchema, pt as runLogsConfigSchema, q as DEFAULT_LLM_CALLS_CONFIG, qn as appendToEvalOutput, qt as evalChartMetricSchema, rn as cacheModeSchema, rr as nextEvalId, rt as llmCallCostCurrencySchema, sn as cacheStatusSchema, sr as setEvalOutput, st as llmCallPricingRateSchema, tn as cacheFileSchema, tr as isInEvalScope, tt as evalColumnsSchema, un as traceCacheRefSchema, ur as defineEval, ut as removeDefaultConfigSchema, vn as traceSpanKindSchema, vt as getCaseRowEvalKey, wn as columnKindSchema, wt as evalStatAggregateSchema, xn as cellValueSchema, xt as caseRowSchema, yn as traceSpanSchema, yt as assertionFailureSchema, z as getEvalTitle, zn as serializeCacheRecording, zt as manualInputSelectFieldSchema } from "./runOrchestration-CAyVXPFz.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CMPmuY7W.mjs";
3
+ import "./src-gZm9nyTp.mjs";
4
+ export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, G as runSummarySchema, Nt as manualInputDescriptorSchema, Tt as evalStatsConfigSchema, W as runManifestSchema, Yt as evalChartsConfigSchema, ht as buildEvalKey, qn as configureEvalRunLogs, r as getTargetEvals$1, t as executeRun, x as parseEvalDiscovery, xn as columnDefSchema } from "./runOrchestration-CIARrLs6.mjs";
1
+ import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, Et as evalStatsConfigSchema, G as runSummarySchema, Jn as configureEvalRunLogs, Pt as manualInputDescriptorSchema, Sn as columnDefSchema, W as runManifestSchema, Xt as evalChartsConfigSchema, gt as buildEvalKey, r as getTargetEvals$1, t as executeRun, x as parseEvalDiscovery } from "./runOrchestration-CAyVXPFz.mjs";
2
2
  import { createHash } from "node:crypto";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";
@@ -3219,6 +3219,8 @@ const llmCallMetricPlacementSchema = z.enum(["header", "body"]);
3219
3219
  /** Where an API-call metric is rendered inside the API calls tab. */
3220
3220
  const apiCallMetricPlacementSchema = llmCallMetricPlacementSchema;
3221
3221
  const callDerivedAttributeSchema = z.custom((value) => typeof value === "function", { message: "Expected a derived attribute function" });
3222
+ const callDerivedAttributesFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a derived attributes function" });
3223
+ const callDerivedAttributesConfigSchema = z.union([z.record(z.string().min(1), callDerivedAttributeSchema), callDerivedAttributesFnSchema]);
3222
3224
  /**
3223
3225
  * Schema for a single user-defined metric attached to LLM call rows.
3224
3226
  *
@@ -3314,6 +3316,20 @@ const llmCallPricingSchema = llmCallPricingRateSchema.extend({
3314
3316
  */
3315
3317
  providers: z.record(z.string().min(1), llmCallPricingRateSchema).optional()
3316
3318
  });
3319
+ /**
3320
+ * Schema for extra currencies displayed in the LLM calls breakdown table.
3321
+ * Costs are still derived in USD, then multiplied by `usdToCurrencyRate`.
3322
+ */
3323
+ const llmCallCostCurrencySchema = z.object({
3324
+ /** Currency code or short display token, such as `BRL` or `EUR`. */
3325
+ code: z.string().min(1),
3326
+ /** Optional display label for tooltips and future UI surfaces. */
3327
+ label: z.string().min(1).optional(),
3328
+ /** Multiplier used to convert one USD to this currency. */
3329
+ usdToCurrencyRate: z.number().nonnegative(),
3330
+ /** Number presentation options for the converted value. */
3331
+ numberFormat: numberDisplayOptionsSchema.optional()
3332
+ });
3317
3333
  /** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
3318
3334
  const llmCallsConfigSchema = z.object({
3319
3335
  /** Span kinds treated as LLM calls. Defaults to `['llm']`. */
@@ -3345,15 +3361,21 @@ const llmCallsConfigSchema = z.object({
3345
3361
  /**
3346
3362
  * Derived attributes persisted onto every matching LLM span before
3347
3363
  * `deriveFromTracing`, default outputs, trace display, and call metrics read
3348
- * the trace. Keys are dot-paths under `span.attributes`; return `undefined`
3349
- * to skip writing the attribute for one span.
3364
+ * the trace. Use a keyed map for one-off fields, or one callback returning a
3365
+ * path/value object for multiple fields. Keys are dot-paths under
3366
+ * `span.attributes`; return `undefined` to skip one span or one returned key.
3350
3367
  */
3351
- derivedAttributes: z.record(z.string().min(1), callDerivedAttributeSchema).optional(),
3368
+ derivedAttributes: callDerivedAttributesConfigSchema.optional(),
3352
3369
  /**
3353
3370
  * Model-keyed pricing registry used to calculate LLM-call costs from token
3354
3371
  * counts. Built-in LLM cost fields are only derived from this registry.
3355
3372
  */
3356
3373
  pricing: z.record(z.string().min(1), llmCallPricingSchema).optional(),
3374
+ /**
3375
+ * Additional currencies shown as columns in the LLM calls breakdown table.
3376
+ * These do not change persisted `costUsd` outputs, stats, or charts.
3377
+ */
3378
+ costCurrencies: z.array(llmCallCostCurrencySchema).optional(),
3357
3379
  /** Custom user-defined metrics surfaced on each LLM call. */
3358
3380
  metrics: z.array(llmCallMetricSchema).optional()
3359
3381
  });
@@ -3380,11 +3402,12 @@ const apiCallsConfigSchema = z.object({
3380
3402
  }).optional(),
3381
3403
  /**
3382
3404
  * Derived attributes persisted onto every matching API span before trace
3383
- * display and call metrics read the trace. Keys are dot-paths under
3384
- * `span.attributes`; return `undefined` to skip writing the attribute for
3385
- * one span.
3405
+ * display and call metrics read the trace. Use a keyed map for one-off
3406
+ * fields, or one callback returning a path/value object for multiple fields.
3407
+ * Keys are dot-paths under `span.attributes`; return `undefined` to skip one
3408
+ * span or one returned key.
3386
3409
  */
3387
- derivedAttributes: z.record(z.string().min(1), callDerivedAttributeSchema).optional(),
3410
+ derivedAttributes: callDerivedAttributesConfigSchema.optional(),
3388
3411
  /** Custom user-defined metrics surfaced on each API call. */
3389
3412
  metrics: z.array(apiCallMetricSchema).optional()
3390
3413
  });
@@ -3418,7 +3441,8 @@ const DEFAULT_LLM_CALLS_CONFIG = {
3418
3441
  },
3419
3442
  derivedAttributes: [],
3420
3443
  metrics: [],
3421
- pricing: []
3444
+ pricing: [],
3445
+ costCurrencies: []
3422
3446
  };
3423
3447
  /** Default API-calls config the UI uses before the workspace fetch resolves. */
3424
3448
  const DEFAULT_API_CALLS_CONFIG = {
@@ -3444,7 +3468,9 @@ const DEFAULT_API_CALLS_CONFIG = {
3444
3468
  metrics: []
3445
3469
  };
3446
3470
  function resolveDerivedAttributes(input) {
3447
- return Object.entries(input ?? {}).map(([path, compute]) => ({
3471
+ if (input === void 0) return [];
3472
+ if (typeof input === "function") return [{ computeMany: input }];
3473
+ return Object.entries(input).map(([path, compute]) => ({
3448
3474
  path,
3449
3475
  compute
3450
3476
  }));
@@ -3496,6 +3522,14 @@ function resolveLlmCallPricingEntries(model, pricing) {
3496
3522
  });
3497
3523
  return entries;
3498
3524
  }
3525
+ function resolveLlmCallCostCurrency(currency) {
3526
+ return {
3527
+ code: currency.code,
3528
+ label: currency.label,
3529
+ usdToCurrencyRate: currency.usdToCurrencyRate,
3530
+ numberFormat: currency.numberFormat
3531
+ };
3532
+ }
3499
3533
  /**
3500
3534
  * Resolve the user-authored LLM-calls config to a fully-defaulted shape used
3501
3535
  * by the UI to derive the LLM calls tab.
@@ -3507,6 +3541,8 @@ function resolveLlmCallPricingEntries(model, pricing) {
3507
3541
  * - Missing `metrics[].placements` defaults to `['body']`.
3508
3542
  * - Missing `pricing` defaults to an empty registry; built-in costs are only
3509
3543
  * derived from configured model-keyed pricing and token counts.
3544
+ * - Missing `costCurrencies` defaults to an empty list; extra currencies only
3545
+ * affect the expanded LLM calls breakdown table.
3510
3546
  */
3511
3547
  function resolveLlmCallsConfig(input) {
3512
3548
  return {
@@ -3517,7 +3553,8 @@ function resolveLlmCallsConfig(input) {
3517
3553
  },
3518
3554
  derivedAttributes: resolveDerivedAttributes(input?.derivedAttributes),
3519
3555
  metrics: (input?.metrics ?? []).map(resolveLlmCallMetric),
3520
- pricing: Object.entries(input?.pricing ?? {}).flatMap(([model, pricing]) => resolveLlmCallPricingEntries(model, pricing))
3556
+ pricing: Object.entries(input?.pricing ?? {}).flatMap(([model, pricing]) => resolveLlmCallPricingEntries(model, pricing)),
3557
+ costCurrencies: (input?.costCurrencies ?? []).map(resolveLlmCallCostCurrency)
3521
3558
  };
3522
3559
  }
3523
3560
  /**
@@ -3809,11 +3846,31 @@ function mergeNestedAttribute$1(value, path, attributeValue) {
3809
3846
  function applyDerivedAttributesForKind(params) {
3810
3847
  let attributes = params.span.attributes;
3811
3848
  for (const derivedAttribute of params.derivedAttributes) {
3812
- if (derivedAttribute.compute === void 0) continue;
3813
3849
  const span = {
3814
3850
  ...params.span,
3815
3851
  attributes
3816
3852
  };
3853
+ if (derivedAttribute.computeMany !== void 0) {
3854
+ const values = (() => {
3855
+ try {
3856
+ return derivedAttribute.computeMany({
3857
+ attributes,
3858
+ span,
3859
+ get: (path) => getNestedAttribute(attributes, path)
3860
+ });
3861
+ } catch {
3862
+ return;
3863
+ }
3864
+ })();
3865
+ if (!isRecord$3(values)) continue;
3866
+ for (const [path, value] of Object.entries(values)) {
3867
+ if (value === void 0) continue;
3868
+ attributes = mergeNestedAttribute$1(attributes, path, value);
3869
+ }
3870
+ continue;
3871
+ }
3872
+ if (derivedAttribute.path === void 0) continue;
3873
+ if (derivedAttribute.compute === void 0) continue;
3817
3874
  const value = (() => {
3818
3875
  try {
3819
3876
  return derivedAttribute.compute({
@@ -7161,4 +7218,4 @@ function toLastRunStatus(status) {
7161
7218
  return status === "pending" ? null : status;
7162
7219
  }
7163
7220
  //#endregion
7164
- export { defaultConfigKeySchema as $, incrementEvalOutput as $n, cacheEntryWithDebugKeySchema as $t, createRunRequestSchema as A, buildTraceTree as An, runLogPhaseSchema as At, getEvalDisplayStatus as B, repoFile as Bn, manualInputTextFieldSchema as Bt, loadConfig as C, columnKindSchema as Cn, evalStatAggregateSchema as Ct, createFsCacheStore as D, repoFileRefSchema as Dn, runLogEntrySchema as Dt, validateCharts as E, numberDisplayOptionsSchema as En, evalSummarySchema as Et, extractApiCalls as F, hashCacheKeySync as Fn, manualInputJsonFieldSchema as Ft, runSummarySchema as G, advanceEvalTime as Gn, evalChartConfigSchema as Gt, deriveStatusFromCaseRows as H, readManualInputFile as Hn, evalChartAxisSchema as Ht, extractLlmCalls as I, deserializeCacheRecording as In, manualInputMultilineFieldSchema as It, agentEvalsConfigSchema as J, evalAssert as Jn, evalChartTypeSchema as Jt, DEFAULT_API_CALLS_CONFIG as K, appendToEvalOutput as Kn, evalChartMetricSchema as Kt, applyDerivedCallAttributes as L, deserializeCacheValue as Ln, manualInputNumberFieldSchema as Lt, sseEnvelopeSchema as M, evalSpan as Mn, manualInputBooleanFieldSchema as Mt, extractCacheEntries as N, evalTracer as Nn, manualInputDescriptorSchema as Nt, configReloadStateSchema as O, runArtifactRefSchema as On, runLogLevelSchema as Ot, extractCacheHits as P, hashCacheKey as Pn, manualInputFieldDescriptorSchema as Pt, apiCallsConfigSchema as Q, getEvalStartTime as Qn, cacheEntrySchema as Qt, getNestedAttribute as R, serializeCacheRecording as Rn, manualInputSelectFieldSchema as Rt, resolveEvalDefaultConfig as S, columnFormatSchema as Sn, evalFreshnessStatusSchema as St, normalizeScoreDef as T, jsonCellSchema as Tn, evalStatsConfigSchema as Tt, deriveStatusFromChildStatuses as U, evalExpect as Un, evalChartBuiltinMetricSchema as Ut, deriveScopedSummaryFromCases as V, manualInputFileValueSchema as Vn, evalChartAggregateSchema as Vt, runManifestSchema as W, EvalAssertionError as Wn, evalChartColorSchema as Wt, apiCallMetricPlacementSchema as X, getCurrentScope as Xn, cacheDebugKeyEntrySchema as Xt, apiCallMetricFormatSchema as Y, evalLog as Yn, evalChartsConfigSchema as Yt, apiCallMetricSchema as Z, getEvalCaseInput as Zn, cacheDebugKeyFileSchema as Zt, buildManualInputDescriptor as _, traceSpanKindSchema as _n, getCaseRowEvalKey as _t, getLastRunStatuses as a, cacheRecordingSchema as an, runInExistingEvalScope as ar, llmCallMetricSchema as at, loadEvalModule as b, cellValueSchema as bn, caseRowSchema as bt, loadPersistedRunSnapshots as c, spanCacheOptionsSchema as cn, startEvalBackgroundJob as cr, llmCallsConfigSchema as ct, persistRunState as d, traceAttributeDisplayInputSchema as dn, resolveLlmCallsConfig as dt, cacheFileSchema as en, isInEvalScope as er, evalColumnOverrideSchema as et, recomputeEvalStatusesInRuns as f, traceAttributeDisplayPlacementSchema as fn, runLogsConfigSchema as ft, resolveArtifactPath as g, traceSpanErrorSchema as gn, getCaseRowCaseKey as gt, resolveTracePresentation as h, traceDisplayInputConfigSchema as hn, buildEvalKey as ht, generateRunId as i, cacheRecordingOpSchema as in, runInEvalScope as ir, llmCallMetricPlacementSchema as it, updateManualScoreRequestSchema as j, captureEvalSpanError as jn, scoreTraceSchema as jt, configReloadStatusSchema as k, z$1 as kn, runLogLocationSchema as kt, nextShortIdFromSnapshots as l, traceCacheRefSchema as ln, defineEval as lr, removeDefaultConfigSchema as lt, runTouchesEval as m, traceDisplayConfigSchema as mn, buildCaseKey as mt, getTargetEvalKeys as n, cacheModeSchema as nn, nextEvalId as nr, evalDeriveConfigSchema as nt, getLatestRunInfos as o, cacheStatusSchema as on, setEvalOutput as or, llmCallPricingRateSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplaySchema as pn, trialSelectionModeSchema as pt, DEFAULT_LLM_CALLS_CONFIG as q, configureEvalRunLogs as qn, evalChartTooltipExtraSchema as qt, getTargetEvals as r, cacheOperationTypeSchema as rn, runInEvalRuntimeScope as rr, llmCallMetricFormatSchema as rt, loadPersistedRunSnapshot as s, serializedCacheSpanSchema as sn, setScopeCacheContext as sr, llmCallPricingSchema as st, executeRun as t, cacheListItemSchema as tn, mergeEvalOutput as tr, evalColumnsSchema as tt, persistCaseDetail as u, traceAttributeDisplayFormatSchema as un, getEvalRegistry as ur, resolveApiCallsConfig as ut, parseManualInputValues as v, traceSpanSchema as vn, assertionFailureSchema as vt, buildDeclaredColumnDefs as w, fileRefSchema as wn, evalStatItemSchema as wt, parseEvalDiscovery as x, columnDefSchema as xn, discoveryIssueSchema as xt, deriveEvalFreshness as y, traceSpanWarningSchema as yn, caseDetailSchema as yt, getEvalTitle as z, serializeCacheValue as zn, manualInputSelectOptionSchema as zt };
7221
+ export { defaultConfigKeySchema as $, getEvalStartTime as $n, cacheEntrySchema as $t, createRunRequestSchema as A, z$1 as An, runLogLocationSchema as At, getEvalDisplayStatus as B, serializeCacheValue as Bn, manualInputSelectOptionSchema as Bt, loadConfig as C, columnFormatSchema as Cn, evalFreshnessStatusSchema as Ct, createFsCacheStore as D, numberDisplayOptionsSchema as Dn, evalSummarySchema as Dt, validateCharts as E, jsonCellSchema as En, evalStatsConfigSchema as Et, extractApiCalls as F, hashCacheKey as Fn, manualInputFieldDescriptorSchema as Ft, runSummarySchema as G, EvalAssertionError as Gn, evalChartColorSchema as Gt, deriveStatusFromCaseRows as H, manualInputFileValueSchema as Hn, evalChartAggregateSchema as Ht, extractLlmCalls as I, hashCacheKeySync as In, manualInputJsonFieldSchema as It, agentEvalsConfigSchema as J, configureEvalRunLogs as Jn, evalChartTooltipExtraSchema as Jt, DEFAULT_API_CALLS_CONFIG as K, advanceEvalTime as Kn, evalChartConfigSchema as Kt, applyDerivedCallAttributes as L, deserializeCacheRecording as Ln, manualInputMultilineFieldSchema as Lt, sseEnvelopeSchema as M, captureEvalSpanError as Mn, scoreTraceSchema as Mt, extractCacheEntries as N, evalSpan as Nn, manualInputBooleanFieldSchema as Nt, configReloadStateSchema as O, repoFileRefSchema as On, runLogEntrySchema as Ot, extractCacheHits as P, evalTracer as Pn, manualInputDescriptorSchema as Pt, apiCallsConfigSchema as Q, getEvalCaseInput as Qn, cacheDebugKeyFileSchema as Qt, getNestedAttribute as R, deserializeCacheValue as Rn, manualInputNumberFieldSchema as Rt, resolveEvalDefaultConfig as S, columnDefSchema as Sn, discoveryIssueSchema as St, normalizeScoreDef as T, fileRefSchema as Tn, evalStatItemSchema as Tt, deriveStatusFromChildStatuses as U, readManualInputFile as Un, evalChartAxisSchema as Ut, deriveScopedSummaryFromCases as V, repoFile as Vn, manualInputTextFieldSchema as Vt, runManifestSchema as W, evalExpect as Wn, evalChartBuiltinMetricSchema as Wt, apiCallMetricPlacementSchema as X, evalLog as Xn, evalChartsConfigSchema as Xt, apiCallMetricFormatSchema as Y, evalAssert as Yn, evalChartTypeSchema as Yt, apiCallMetricSchema as Z, getCurrentScope as Zn, cacheDebugKeyEntrySchema as Zt, buildManualInputDescriptor as _, traceSpanErrorSchema as _n, getCaseRowCaseKey as _t, getLastRunStatuses as a, cacheRecordingOpSchema as an, runInEvalScope as ar, llmCallMetricPlacementSchema as at, loadEvalModule as b, traceSpanWarningSchema as bn, caseDetailSchema as bt, loadPersistedRunSnapshots as c, serializedCacheSpanSchema as cn, setScopeCacheContext as cr, llmCallPricingSchema as ct, persistRunState as d, traceAttributeDisplayFormatSchema as dn, getEvalRegistry as dr, resolveApiCallsConfig as dt, cacheEntryWithDebugKeySchema as en, incrementEvalOutput as er, evalColumnOverrideSchema as et, recomputeEvalStatusesInRuns as f, traceAttributeDisplayInputSchema as fn, resolveLlmCallsConfig as ft, resolveArtifactPath as g, traceDisplayInputConfigSchema as gn, buildEvalKey as gt, resolveTracePresentation as h, traceDisplayConfigSchema as hn, buildCaseKey as ht, generateRunId as i, cacheOperationTypeSchema as in, runInEvalRuntimeScope as ir, llmCallMetricFormatSchema as it, updateManualScoreRequestSchema as j, buildTraceTree as jn, runLogPhaseSchema as jt, configReloadStatusSchema as k, runArtifactRefSchema as kn, runLogLevelSchema as kt, nextShortIdFromSnapshots as l, spanCacheOptionsSchema as ln, startEvalBackgroundJob as lr, llmCallsConfigSchema as lt, runTouchesEval as m, traceAttributeDisplaySchema as mn, trialSelectionModeSchema as mt, getTargetEvalKeys as n, cacheListItemSchema as nn, mergeEvalOutput as nr, evalDeriveConfigSchema as nt, getLatestRunInfos as o, cacheRecordingSchema as on, runInExistingEvalScope as or, llmCallMetricSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplayPlacementSchema as pn, runLogsConfigSchema as pt, DEFAULT_LLM_CALLS_CONFIG as q, appendToEvalOutput as qn, evalChartMetricSchema as qt, getTargetEvals as r, cacheModeSchema as rn, nextEvalId as rr, llmCallCostCurrencySchema as rt, loadPersistedRunSnapshot as s, cacheStatusSchema as sn, setEvalOutput as sr, llmCallPricingRateSchema as st, executeRun as t, cacheFileSchema as tn, isInEvalScope as tr, evalColumnsSchema as tt, persistCaseDetail as u, traceCacheRefSchema as un, defineEval as ur, removeDefaultConfigSchema as ut, parseManualInputValues as v, traceSpanKindSchema as vn, getCaseRowEvalKey as vt, buildDeclaredColumnDefs as w, columnKindSchema as wn, evalStatAggregateSchema as wt, parseEvalDiscovery as x, cellValueSchema as xn, caseRowSchema as xt, deriveEvalFreshness as y, traceSpanSchema as yn, assertionFailureSchema as yt, getEvalTitle as z, serializeCacheRecording as zn, manualInputSelectFieldSchema as zt };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-CIc_gBNM.mjs";
2
- import "./src-CkWT1iSu.mjs";
1
+ import { n as createRunner } from "./cli-CMPmuY7W.mjs";
2
+ import "./src-gZm9nyTp.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-1F8MeY5V.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-Bnm1nz0U.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -0,0 +1,3 @@
1
+ import "./runOrchestration-CAyVXPFz.mjs";
2
+ import "./cli-CMPmuY7W.mjs";
3
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.29.0",
3
+ "version": "0.31.0",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -341,12 +341,15 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
341
341
  tokens/sec, and USD costs are derived. Override `kinds` to broaden the filter,
342
342
  override `attributes.<field>` for non-default primitive span shapes, configure
343
343
  model-keyed `pricing` to derive USD costs from token counts, with nested
344
- `providers` entries for provider-specific rates, add `derivedAttributes` to
345
- persist computed values back onto matching LLM spans before trace consumers
346
- run, and add entries to `metrics` to surface arbitrary user metrics
344
+ `providers` entries for provider-specific rates, add `costCurrencies` to show
345
+ converted cost columns in the expanded breakdown table only, add
346
+ `derivedAttributes` to persist computed values back onto matching LLM spans
347
+ before trace consumers run, and add entries to `metrics` to surface arbitrary user metrics
347
348
  (`format: 'string' | 'number' | 'duration' | 'json' | 'boolean'`,
348
- `placements: ['header' | 'body']`). `derivedAttributes` keys are dot-paths
349
- under `span.attributes`; return `undefined` to skip one span. For saved runs,
349
+ `placements: ['header' | 'body']`). `derivedAttributes` can be a keyed map
350
+ for one-off fields or one callback that returns multiple path/value pairs.
351
+ Derived keys are dot-paths under `span.attributes`; return `undefined` to
352
+ skip one span or one returned key. For saved runs,
350
353
  the case drawer more menu can recalculate configured LLM/API derived
351
354
  attributes for one case and persist the updated trace artifacts without
352
355
  re-running the eval.
@@ -374,9 +377,10 @@ cacheCreationInputTokens` so cache details are not double-counted.
374
377
  and `'fetch'` spans with `method`, `url`, `statusCode`, `request`,
375
378
  `response`, `requestBody`, `responseBody`, `headers`, `durationMs`, and
376
379
  `error` read from conventional attribute paths. Override `kinds` or
377
- `attributes.<field>` for external tracers, add `derivedAttributes` for
378
- computed persisted API span attributes, and add `metrics` with the same
379
- formats and placements as LLM-call metrics.
380
+ `attributes.<field>` for external tracers, add `derivedAttributes` as a
381
+ keyed map or object-returning callback for computed persisted API span
382
+ attributes, and add `metrics` with the same formats and placements as
383
+ LLM-call metrics.
380
384
  - `runLogs` (in `agent-evals.config.ts`) controls case log capture. Use
381
385
  `runLogs: { captureConsole: false }` to keep console output in the terminal
382
386
  without persisting console calls to case details. Manual `evalLog(...)` calls