@ls-stack/agent-eval 0.29.0 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-D6-msfKP.mjs → app-BrSMRTpy.mjs} +4 -4
- package/dist/apps/web/dist/assets/index-CPcVyFRP.js +118 -0
- package/dist/apps/web/dist/assets/index-ClPR-tfN.css +1 -0
- package/dist/apps/web/dist/index.html +2 -2
- package/dist/bin.mjs +1 -1
- package/dist/{cli-CIc_gBNM.mjs → cli-CMPmuY7W.mjs} +3 -3
- package/dist/index.d.mts +182 -113
- package/dist/index.mjs +4 -4
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-CIARrLs6.mjs → runOrchestration-CAyVXPFz.mjs} +69 -12
- package/dist/{runner-1F8MeY5V.mjs → runner-Bnm1nz0U.mjs} +2 -2
- package/dist/{runner-Bq1f9B9d.mjs → runner-CmpWwCe1.mjs} +1 -1
- package/dist/src-gZm9nyTp.mjs +3 -0
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +12 -8
- package/dist/apps/web/dist/assets/index-BCr6J8Uj.js +0 -118
- package/dist/apps/web/dist/assets/index-DjUTm3M-.css +0 -1
- package/dist/src-CkWT1iSu.mjs +0 -3
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as defaultConfigKeySchema, $n as
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import "./src-
|
|
4
|
-
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
|
1
|
+
import { $ as defaultConfigKeySchema, $n as getEvalStartTime, $t as cacheEntrySchema, A as createRunRequestSchema, An as z, At as runLogLocationSchema, B as getEvalDisplayStatus, Bn as serializeCacheValue, Bt as manualInputSelectOptionSchema, Cn as columnFormatSchema, Ct as evalFreshnessStatusSchema, Dn as numberDisplayOptionsSchema, Dt as evalSummarySchema, En as jsonCellSchema, Et as evalStatsConfigSchema, F as extractApiCalls, Fn as hashCacheKey, Ft as manualInputFieldDescriptorSchema, G as runSummarySchema, Gn as EvalAssertionError, Gt as evalChartColorSchema, H as deriveStatusFromCaseRows, Hn as manualInputFileValueSchema, Ht as evalChartAggregateSchema, I as extractLlmCalls, In as hashCacheKeySync, It as manualInputJsonFieldSchema, J as agentEvalsConfigSchema, Jt as evalChartTooltipExtraSchema, K as DEFAULT_API_CALLS_CONFIG, Kn as advanceEvalTime, Kt as evalChartConfigSchema, L as applyDerivedCallAttributes, Ln as deserializeCacheRecording, Lt as manualInputMultilineFieldSchema, M as sseEnvelopeSchema, Mn as captureEvalSpanError, Mt as scoreTraceSchema, N as extractCacheEntries, Nn as evalSpan, Nt as manualInputBooleanFieldSchema, O as configReloadStateSchema, On as repoFileRefSchema, Ot as runLogEntrySchema, P as extractCacheHits, Pn as evalTracer, Pt as manualInputDescriptorSchema, Q as apiCallsConfigSchema, Qn as getEvalCaseInput, Qt as cacheDebugKeyFileSchema, R as getNestedAttribute, Rn as deserializeCacheValue, Rt as manualInputNumberFieldSchema, Sn as columnDefSchema, St as discoveryIssueSchema, Tn as fileRefSchema, Tt as evalStatItemSchema, U as deriveStatusFromChildStatuses, Un as readManualInputFile, Ut as evalChartAxisSchema, V as deriveScopedSummaryFromCases, Vn as repoFile, Vt as manualInputTextFieldSchema, W as runManifestSchema, Wn as evalExpect, Wt as evalChartBuiltinMetricSchema, X as apiCallMetricPlacementSchema, Xn as evalLog, Xt as evalChartsConfigSchema, Y as apiCallMetricFormatSchema, Yn as evalAssert, Yt as evalChartTypeSchema, Z as apiCallMetricSchema, Zn as getCurrentScope, Zt as cacheDebugKeyEntrySchema, _n as traceSpanErrorSchema, _t as getCaseRowCaseKey, an as cacheRecordingOpSchema, ar as runInEvalScope, at as llmCallMetricPlacementSchema, bn as traceSpanWarningSchema, bt as caseDetailSchema, cn as serializedCacheSpanSchema, cr as setScopeCacheContext, ct as llmCallPricingSchema, dn as traceAttributeDisplayFormatSchema, dr as getEvalRegistry, dt as resolveApiCallsConfig, en as cacheEntryWithDebugKeySchema, er as incrementEvalOutput, et as evalColumnOverrideSchema, fn as traceAttributeDisplayInputSchema, ft as resolveLlmCallsConfig, gn as traceDisplayInputConfigSchema, gt as buildEvalKey, hn as traceDisplayConfigSchema, ht as buildCaseKey, in as cacheOperationTypeSchema, ir as runInEvalRuntimeScope, it as llmCallMetricFormatSchema, j as updateManualScoreRequestSchema, jn as buildTraceTree, jt as runLogPhaseSchema, k as configReloadStatusSchema, kn as runArtifactRefSchema, kt as runLogLevelSchema, ln as spanCacheOptionsSchema, lr as startEvalBackgroundJob, lt as llmCallsConfigSchema, mn as traceAttributeDisplaySchema, mt as trialSelectionModeSchema, nn as cacheListItemSchema, nr as mergeEvalOutput, nt as evalDeriveConfigSchema, on as cacheRecordingSchema, or as runInExistingEvalScope, ot as llmCallMetricSchema, pn as traceAttributeDisplayPlacementSchema, pt as runLogsConfigSchema, q as DEFAULT_LLM_CALLS_CONFIG, qn as appendToEvalOutput, qt as evalChartMetricSchema, rn as cacheModeSchema, rr as nextEvalId, rt as llmCallCostCurrencySchema, sn as cacheStatusSchema, sr as setEvalOutput, st as llmCallPricingRateSchema, tn as cacheFileSchema, tr as isInEvalScope, tt as evalColumnsSchema, un as traceCacheRefSchema, ur as defineEval, ut as removeDefaultConfigSchema, vn as traceSpanKindSchema, vt as getCaseRowEvalKey, wn as columnKindSchema, wt as evalStatAggregateSchema, xn as cellValueSchema, xt as caseRowSchema, yn as traceSpanSchema, yt as assertionFailureSchema, z as getEvalTitle, zn as serializeCacheRecording, zt as manualInputSelectFieldSchema } from "./runOrchestration-CAyVXPFz.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-CMPmuY7W.mjs";
|
|
3
|
+
import "./src-gZm9nyTp.mjs";
|
|
4
|
+
export { DEFAULT_API_CALLS_CONFIG, DEFAULT_LLM_CALLS_CONFIG, EvalAssertionError, advanceEvalTime, agentEvalsConfigSchema, apiCallMetricFormatSchema, apiCallMetricPlacementSchema, apiCallMetricSchema, apiCallsConfigSchema, appendToEvalOutput, applyDerivedCallAttributes, assertionFailureSchema, buildCaseKey, buildEvalKey, buildTraceTree, cacheDebugKeyEntrySchema, cacheDebugKeyFileSchema, cacheEntrySchema, cacheEntryWithDebugKeySchema, cacheFileSchema, cacheListItemSchema, cacheModeSchema, cacheOperationTypeSchema, cacheRecordingOpSchema, cacheRecordingSchema, cacheStatusSchema, captureEvalSpanError, caseDetailSchema, caseRowSchema, cellValueSchema, cleanupStagedManualInputFiles, columnDefSchema, columnFormatSchema, columnKindSchema, configReloadStateSchema, configReloadStatusSchema, createRunRequestSchema, createRunner, defaultConfigKeySchema, defineEval, deriveScopedSummaryFromCases, deriveStatusFromCaseRows, deriveStatusFromChildStatuses, deserializeCacheRecording, deserializeCacheValue, discoveryIssueSchema, evalAssert, evalChartAggregateSchema, evalChartAxisSchema, evalChartBuiltinMetricSchema, evalChartColorSchema, evalChartConfigSchema, evalChartMetricSchema, evalChartTooltipExtraSchema, evalChartTypeSchema, evalChartsConfigSchema, evalColumnOverrideSchema, evalColumnsSchema, evalDeriveConfigSchema, evalExpect, evalFreshnessStatusSchema, evalLog, evalSpan, evalStatAggregateSchema, evalStatItemSchema, evalStatsConfigSchema, evalSummarySchema, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, fileRefSchema, getCaseRowCaseKey, getCaseRowEvalKey, getCurrentScope, getEvalCaseInput, getEvalDisplayStatus, getEvalRegistry, getEvalStartTime, getEvalTitle, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, jsonCellSchema, llmCallCostCurrencySchema, llmCallMetricFormatSchema, llmCallMetricPlacementSchema, llmCallMetricSchema, llmCallPricingRateSchema, llmCallPricingSchema, llmCallsConfigSchema, manualInputBooleanFieldSchema, manualInputDescriptorSchema, manualInputFieldDescriptorSchema, manualInputFileValueSchema, manualInputJsonFieldSchema, manualInputMultilineFieldSchema, manualInputNumberFieldSchema, manualInputSelectFieldSchema, manualInputSelectOptionSchema, manualInputTextFieldSchema, materializeManualInputFiles, mergeEvalOutput, nextEvalId, numberDisplayOptionsSchema, readManualInputFile, removeDefaultConfigSchema, repoFile, repoFileRefSchema, resolveApiCallsConfig, resolveLlmCallsConfig, runArtifactRefSchema, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, runLogEntrySchema, runLogLevelSchema, runLogLocationSchema, runLogPhaseSchema, runLogsConfigSchema, runManifestSchema, runSummarySchema, scoreTraceSchema, serializeCacheRecording, serializeCacheValue, serializedCacheSpanSchema, setEvalOutput, setScopeCacheContext, spanCacheOptionsSchema, sseEnvelopeSchema, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, traceAttributeDisplayFormatSchema, traceAttributeDisplayInputSchema, traceAttributeDisplayPlacementSchema, traceAttributeDisplaySchema, traceCacheRefSchema, traceDisplayConfigSchema, traceDisplayInputConfigSchema, traceSpanErrorSchema, traceSpanKindSchema, traceSpanSchema, traceSpanWarningSchema, trialSelectionModeSchema, updateManualScoreRequestSchema, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, G as runSummarySchema,
|
|
1
|
+
import { A as createRunRequestSchema, C as loadConfig, D as createFsCacheStore, Et as evalStatsConfigSchema, G as runSummarySchema, Jn as configureEvalRunLogs, Pt as manualInputDescriptorSchema, Sn as columnDefSchema, W as runManifestSchema, Xt as evalChartsConfigSchema, gt as buildEvalKey, r as getTargetEvals$1, t as executeRun, x as parseEvalDiscovery } from "./runOrchestration-CAyVXPFz.mjs";
|
|
2
2
|
import { createHash } from "node:crypto";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { relative } from "node:path";
|
|
@@ -3219,6 +3219,8 @@ const llmCallMetricPlacementSchema = z.enum(["header", "body"]);
|
|
|
3219
3219
|
/** Where an API-call metric is rendered inside the API calls tab. */
|
|
3220
3220
|
const apiCallMetricPlacementSchema = llmCallMetricPlacementSchema;
|
|
3221
3221
|
const callDerivedAttributeSchema = z.custom((value) => typeof value === "function", { message: "Expected a derived attribute function" });
|
|
3222
|
+
const callDerivedAttributesFnSchema = z.custom((value) => typeof value === "function", { message: "Expected a derived attributes function" });
|
|
3223
|
+
const callDerivedAttributesConfigSchema = z.union([z.record(z.string().min(1), callDerivedAttributeSchema), callDerivedAttributesFnSchema]);
|
|
3222
3224
|
/**
|
|
3223
3225
|
* Schema for a single user-defined metric attached to LLM call rows.
|
|
3224
3226
|
*
|
|
@@ -3314,6 +3316,20 @@ const llmCallPricingSchema = llmCallPricingRateSchema.extend({
|
|
|
3314
3316
|
*/
|
|
3315
3317
|
providers: z.record(z.string().min(1), llmCallPricingRateSchema).optional()
|
|
3316
3318
|
});
|
|
3319
|
+
/**
|
|
3320
|
+
* Schema for extra currencies displayed in the LLM calls breakdown table.
|
|
3321
|
+
* Costs are still derived in USD, then multiplied by `usdToCurrencyRate`.
|
|
3322
|
+
*/
|
|
3323
|
+
const llmCallCostCurrencySchema = z.object({
|
|
3324
|
+
/** Currency code or short display token, such as `BRL` or `EUR`. */
|
|
3325
|
+
code: z.string().min(1),
|
|
3326
|
+
/** Optional display label for tooltips and future UI surfaces. */
|
|
3327
|
+
label: z.string().min(1).optional(),
|
|
3328
|
+
/** Multiplier used to convert one USD to this currency. */
|
|
3329
|
+
usdToCurrencyRate: z.number().nonnegative(),
|
|
3330
|
+
/** Number presentation options for the converted value. */
|
|
3331
|
+
numberFormat: numberDisplayOptionsSchema.optional()
|
|
3332
|
+
});
|
|
3317
3333
|
/** Schema for the global LLM calls config block in `agent-evals.config.ts`. */
|
|
3318
3334
|
const llmCallsConfigSchema = z.object({
|
|
3319
3335
|
/** Span kinds treated as LLM calls. Defaults to `['llm']`. */
|
|
@@ -3345,15 +3361,21 @@ const llmCallsConfigSchema = z.object({
|
|
|
3345
3361
|
/**
|
|
3346
3362
|
* Derived attributes persisted onto every matching LLM span before
|
|
3347
3363
|
* `deriveFromTracing`, default outputs, trace display, and call metrics read
|
|
3348
|
-
* the trace.
|
|
3349
|
-
*
|
|
3364
|
+
* the trace. Use a keyed map for one-off fields, or one callback returning a
|
|
3365
|
+
* path/value object for multiple fields. Keys are dot-paths under
|
|
3366
|
+
* `span.attributes`; return `undefined` to skip one span or one returned key.
|
|
3350
3367
|
*/
|
|
3351
|
-
derivedAttributes:
|
|
3368
|
+
derivedAttributes: callDerivedAttributesConfigSchema.optional(),
|
|
3352
3369
|
/**
|
|
3353
3370
|
* Model-keyed pricing registry used to calculate LLM-call costs from token
|
|
3354
3371
|
* counts. Built-in LLM cost fields are only derived from this registry.
|
|
3355
3372
|
*/
|
|
3356
3373
|
pricing: z.record(z.string().min(1), llmCallPricingSchema).optional(),
|
|
3374
|
+
/**
|
|
3375
|
+
* Additional currencies shown as columns in the LLM calls breakdown table.
|
|
3376
|
+
* These do not change persisted `costUsd` outputs, stats, or charts.
|
|
3377
|
+
*/
|
|
3378
|
+
costCurrencies: z.array(llmCallCostCurrencySchema).optional(),
|
|
3357
3379
|
/** Custom user-defined metrics surfaced on each LLM call. */
|
|
3358
3380
|
metrics: z.array(llmCallMetricSchema).optional()
|
|
3359
3381
|
});
|
|
@@ -3380,11 +3402,12 @@ const apiCallsConfigSchema = z.object({
|
|
|
3380
3402
|
}).optional(),
|
|
3381
3403
|
/**
|
|
3382
3404
|
* Derived attributes persisted onto every matching API span before trace
|
|
3383
|
-
* display and call metrics read the trace.
|
|
3384
|
-
*
|
|
3385
|
-
*
|
|
3405
|
+
* display and call metrics read the trace. Use a keyed map for one-off
|
|
3406
|
+
* fields, or one callback returning a path/value object for multiple fields.
|
|
3407
|
+
* Keys are dot-paths under `span.attributes`; return `undefined` to skip one
|
|
3408
|
+
* span or one returned key.
|
|
3386
3409
|
*/
|
|
3387
|
-
derivedAttributes:
|
|
3410
|
+
derivedAttributes: callDerivedAttributesConfigSchema.optional(),
|
|
3388
3411
|
/** Custom user-defined metrics surfaced on each API call. */
|
|
3389
3412
|
metrics: z.array(apiCallMetricSchema).optional()
|
|
3390
3413
|
});
|
|
@@ -3418,7 +3441,8 @@ const DEFAULT_LLM_CALLS_CONFIG = {
|
|
|
3418
3441
|
},
|
|
3419
3442
|
derivedAttributes: [],
|
|
3420
3443
|
metrics: [],
|
|
3421
|
-
pricing: []
|
|
3444
|
+
pricing: [],
|
|
3445
|
+
costCurrencies: []
|
|
3422
3446
|
};
|
|
3423
3447
|
/** Default API-calls config the UI uses before the workspace fetch resolves. */
|
|
3424
3448
|
const DEFAULT_API_CALLS_CONFIG = {
|
|
@@ -3444,7 +3468,9 @@ const DEFAULT_API_CALLS_CONFIG = {
|
|
|
3444
3468
|
metrics: []
|
|
3445
3469
|
};
|
|
3446
3470
|
function resolveDerivedAttributes(input) {
|
|
3447
|
-
|
|
3471
|
+
if (input === void 0) return [];
|
|
3472
|
+
if (typeof input === "function") return [{ computeMany: input }];
|
|
3473
|
+
return Object.entries(input).map(([path, compute]) => ({
|
|
3448
3474
|
path,
|
|
3449
3475
|
compute
|
|
3450
3476
|
}));
|
|
@@ -3496,6 +3522,14 @@ function resolveLlmCallPricingEntries(model, pricing) {
|
|
|
3496
3522
|
});
|
|
3497
3523
|
return entries;
|
|
3498
3524
|
}
|
|
3525
|
+
function resolveLlmCallCostCurrency(currency) {
|
|
3526
|
+
return {
|
|
3527
|
+
code: currency.code,
|
|
3528
|
+
label: currency.label,
|
|
3529
|
+
usdToCurrencyRate: currency.usdToCurrencyRate,
|
|
3530
|
+
numberFormat: currency.numberFormat
|
|
3531
|
+
};
|
|
3532
|
+
}
|
|
3499
3533
|
/**
|
|
3500
3534
|
* Resolve the user-authored LLM-calls config to a fully-defaulted shape used
|
|
3501
3535
|
* by the UI to derive the LLM calls tab.
|
|
@@ -3507,6 +3541,8 @@ function resolveLlmCallPricingEntries(model, pricing) {
|
|
|
3507
3541
|
* - Missing `metrics[].placements` defaults to `['body']`.
|
|
3508
3542
|
* - Missing `pricing` defaults to an empty registry; built-in costs are only
|
|
3509
3543
|
* derived from configured model-keyed pricing and token counts.
|
|
3544
|
+
* - Missing `costCurrencies` defaults to an empty list; extra currencies only
|
|
3545
|
+
* affect the expanded LLM calls breakdown table.
|
|
3510
3546
|
*/
|
|
3511
3547
|
function resolveLlmCallsConfig(input) {
|
|
3512
3548
|
return {
|
|
@@ -3517,7 +3553,8 @@ function resolveLlmCallsConfig(input) {
|
|
|
3517
3553
|
},
|
|
3518
3554
|
derivedAttributes: resolveDerivedAttributes(input?.derivedAttributes),
|
|
3519
3555
|
metrics: (input?.metrics ?? []).map(resolveLlmCallMetric),
|
|
3520
|
-
pricing: Object.entries(input?.pricing ?? {}).flatMap(([model, pricing]) => resolveLlmCallPricingEntries(model, pricing))
|
|
3556
|
+
pricing: Object.entries(input?.pricing ?? {}).flatMap(([model, pricing]) => resolveLlmCallPricingEntries(model, pricing)),
|
|
3557
|
+
costCurrencies: (input?.costCurrencies ?? []).map(resolveLlmCallCostCurrency)
|
|
3521
3558
|
};
|
|
3522
3559
|
}
|
|
3523
3560
|
/**
|
|
@@ -3809,11 +3846,31 @@ function mergeNestedAttribute$1(value, path, attributeValue) {
|
|
|
3809
3846
|
function applyDerivedAttributesForKind(params) {
|
|
3810
3847
|
let attributes = params.span.attributes;
|
|
3811
3848
|
for (const derivedAttribute of params.derivedAttributes) {
|
|
3812
|
-
if (derivedAttribute.compute === void 0) continue;
|
|
3813
3849
|
const span = {
|
|
3814
3850
|
...params.span,
|
|
3815
3851
|
attributes
|
|
3816
3852
|
};
|
|
3853
|
+
if (derivedAttribute.computeMany !== void 0) {
|
|
3854
|
+
const values = (() => {
|
|
3855
|
+
try {
|
|
3856
|
+
return derivedAttribute.computeMany({
|
|
3857
|
+
attributes,
|
|
3858
|
+
span,
|
|
3859
|
+
get: (path) => getNestedAttribute(attributes, path)
|
|
3860
|
+
});
|
|
3861
|
+
} catch {
|
|
3862
|
+
return;
|
|
3863
|
+
}
|
|
3864
|
+
})();
|
|
3865
|
+
if (!isRecord$3(values)) continue;
|
|
3866
|
+
for (const [path, value] of Object.entries(values)) {
|
|
3867
|
+
if (value === void 0) continue;
|
|
3868
|
+
attributes = mergeNestedAttribute$1(attributes, path, value);
|
|
3869
|
+
}
|
|
3870
|
+
continue;
|
|
3871
|
+
}
|
|
3872
|
+
if (derivedAttribute.path === void 0) continue;
|
|
3873
|
+
if (derivedAttribute.compute === void 0) continue;
|
|
3817
3874
|
const value = (() => {
|
|
3818
3875
|
try {
|
|
3819
3876
|
return derivedAttribute.compute({
|
|
@@ -7161,4 +7218,4 @@ function toLastRunStatus(status) {
|
|
|
7161
7218
|
return status === "pending" ? null : status;
|
|
7162
7219
|
}
|
|
7163
7220
|
//#endregion
|
|
7164
|
-
export { defaultConfigKeySchema as $,
|
|
7221
|
+
export { defaultConfigKeySchema as $, getEvalStartTime as $n, cacheEntrySchema as $t, createRunRequestSchema as A, z$1 as An, runLogLocationSchema as At, getEvalDisplayStatus as B, serializeCacheValue as Bn, manualInputSelectOptionSchema as Bt, loadConfig as C, columnFormatSchema as Cn, evalFreshnessStatusSchema as Ct, createFsCacheStore as D, numberDisplayOptionsSchema as Dn, evalSummarySchema as Dt, validateCharts as E, jsonCellSchema as En, evalStatsConfigSchema as Et, extractApiCalls as F, hashCacheKey as Fn, manualInputFieldDescriptorSchema as Ft, runSummarySchema as G, EvalAssertionError as Gn, evalChartColorSchema as Gt, deriveStatusFromCaseRows as H, manualInputFileValueSchema as Hn, evalChartAggregateSchema as Ht, extractLlmCalls as I, hashCacheKeySync as In, manualInputJsonFieldSchema as It, agentEvalsConfigSchema as J, configureEvalRunLogs as Jn, evalChartTooltipExtraSchema as Jt, DEFAULT_API_CALLS_CONFIG as K, advanceEvalTime as Kn, evalChartConfigSchema as Kt, applyDerivedCallAttributes as L, deserializeCacheRecording as Ln, manualInputMultilineFieldSchema as Lt, sseEnvelopeSchema as M, captureEvalSpanError as Mn, scoreTraceSchema as Mt, extractCacheEntries as N, evalSpan as Nn, manualInputBooleanFieldSchema as Nt, configReloadStateSchema as O, repoFileRefSchema as On, runLogEntrySchema as Ot, extractCacheHits as P, evalTracer as Pn, manualInputDescriptorSchema as Pt, apiCallsConfigSchema as Q, getEvalCaseInput as Qn, cacheDebugKeyFileSchema as Qt, getNestedAttribute as R, deserializeCacheValue as Rn, manualInputNumberFieldSchema as Rt, resolveEvalDefaultConfig as S, columnDefSchema as Sn, discoveryIssueSchema as St, normalizeScoreDef as T, fileRefSchema as Tn, evalStatItemSchema as Tt, deriveStatusFromChildStatuses as U, readManualInputFile as Un, evalChartAxisSchema as Ut, deriveScopedSummaryFromCases as V, repoFile as Vn, manualInputTextFieldSchema as Vt, runManifestSchema as W, evalExpect as Wn, evalChartBuiltinMetricSchema as Wt, apiCallMetricPlacementSchema as X, evalLog as Xn, evalChartsConfigSchema as Xt, apiCallMetricFormatSchema as Y, evalAssert as Yn, evalChartTypeSchema as Yt, apiCallMetricSchema as Z, getCurrentScope as Zn, cacheDebugKeyEntrySchema as Zt, buildManualInputDescriptor as _, traceSpanErrorSchema as _n, getCaseRowCaseKey as _t, getLastRunStatuses as a, cacheRecordingOpSchema as an, runInEvalScope as ar, llmCallMetricPlacementSchema as at, loadEvalModule as b, traceSpanWarningSchema as bn, caseDetailSchema as bt, loadPersistedRunSnapshots as c, serializedCacheSpanSchema as cn, setScopeCacheContext as cr, llmCallPricingSchema as ct, persistRunState as d, traceAttributeDisplayFormatSchema as dn, getEvalRegistry as dr, resolveApiCallsConfig as dt, cacheEntryWithDebugKeySchema as en, incrementEvalOutput as er, evalColumnOverrideSchema as et, recomputeEvalStatusesInRuns as f, traceAttributeDisplayInputSchema as fn, resolveLlmCallsConfig as ft, resolveArtifactPath as g, traceDisplayInputConfigSchema as gn, buildEvalKey as gt, resolveTracePresentation as h, traceDisplayConfigSchema as hn, buildCaseKey as ht, generateRunId as i, cacheOperationTypeSchema as in, runInEvalRuntimeScope as ir, llmCallMetricFormatSchema as it, updateManualScoreRequestSchema as j, buildTraceTree as jn, runLogPhaseSchema as jt, configReloadStatusSchema as k, runArtifactRefSchema as kn, runLogLevelSchema as kt, nextShortIdFromSnapshots as l, spanCacheOptionsSchema as ln, startEvalBackgroundJob as lr, llmCallsConfigSchema as lt, runTouchesEval as m, traceAttributeDisplaySchema as mn, trialSelectionModeSchema as mt, getTargetEvalKeys as n, cacheListItemSchema as nn, mergeEvalOutput as nr, evalDeriveConfigSchema as nt, getLatestRunInfos as o, cacheRecordingSchema as on, runInExistingEvalScope as or, llmCallMetricSchema as ot, recomputePersistedCaseStatus as p, traceAttributeDisplayPlacementSchema as pn, runLogsConfigSchema as pt, DEFAULT_LLM_CALLS_CONFIG as q, appendToEvalOutput as qn, evalChartMetricSchema as qt, getTargetEvals as r, cacheModeSchema as rn, nextEvalId as rr, llmCallCostCurrencySchema as rt, loadPersistedRunSnapshot as s, cacheStatusSchema as sn, setEvalOutput as sr, llmCallPricingRateSchema as st, executeRun as t, cacheFileSchema as tn, isInEvalScope as tr, evalColumnsSchema as tt, persistCaseDetail as u, traceCacheRefSchema as un, defineEval as ur, removeDefaultConfigSchema as ut, parseManualInputValues as v, traceSpanKindSchema as vn, getCaseRowEvalKey as vt, buildDeclaredColumnDefs as w, columnKindSchema as wn, evalStatAggregateSchema as wt, parseEvalDiscovery as x, cellValueSchema as xn, caseRowSchema as xt, deriveEvalFreshness as y, traceSpanSchema as yn, assertionFailureSchema as yt, getEvalTitle as z, serializeCacheRecording as zn, manualInputSelectFieldSchema as zt };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-CMPmuY7W.mjs";
|
|
2
|
+
import "./src-gZm9nyTp.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-Bnm1nz0U.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
package/package.json
CHANGED
|
@@ -341,12 +341,15 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
|
|
|
341
341
|
tokens/sec, and USD costs are derived. Override `kinds` to broaden the filter,
|
|
342
342
|
override `attributes.<field>` for non-default primitive span shapes, configure
|
|
343
343
|
model-keyed `pricing` to derive USD costs from token counts, with nested
|
|
344
|
-
`providers` entries for provider-specific rates, add `
|
|
345
|
-
|
|
346
|
-
|
|
344
|
+
`providers` entries for provider-specific rates, add `costCurrencies` to show
|
|
345
|
+
converted cost columns in the expanded breakdown table only, add
|
|
346
|
+
`derivedAttributes` to persist computed values back onto matching LLM spans
|
|
347
|
+
before trace consumers run, and add entries to `metrics` to surface arbitrary user metrics
|
|
347
348
|
(`format: 'string' | 'number' | 'duration' | 'json' | 'boolean'`,
|
|
348
|
-
`placements: ['header' | 'body']`). `derivedAttributes`
|
|
349
|
-
|
|
349
|
+
`placements: ['header' | 'body']`). `derivedAttributes` can be a keyed map
|
|
350
|
+
for one-off fields or one callback that returns multiple path/value pairs.
|
|
351
|
+
Derived keys are dot-paths under `span.attributes`; return `undefined` to
|
|
352
|
+
skip one span or one returned key. For saved runs,
|
|
350
353
|
the case drawer more menu can recalculate configured LLM/API derived
|
|
351
354
|
attributes for one case and persist the updated trace artifacts without
|
|
352
355
|
re-running the eval.
|
|
@@ -374,9 +377,10 @@ cacheCreationInputTokens` so cache details are not double-counted.
|
|
|
374
377
|
and `'fetch'` spans with `method`, `url`, `statusCode`, `request`,
|
|
375
378
|
`response`, `requestBody`, `responseBody`, `headers`, `durationMs`, and
|
|
376
379
|
`error` read from conventional attribute paths. Override `kinds` or
|
|
377
|
-
`attributes.<field>` for external tracers, add `derivedAttributes`
|
|
378
|
-
|
|
379
|
-
formats and placements as
|
|
380
|
+
`attributes.<field>` for external tracers, add `derivedAttributes` as a
|
|
381
|
+
keyed map or object-returning callback for computed persisted API span
|
|
382
|
+
attributes, and add `metrics` with the same formats and placements as
|
|
383
|
+
LLM-call metrics.
|
|
380
384
|
- `runLogs` (in `agent-evals.config.ts`) controls case log capture. Use
|
|
381
385
|
`runLogs: { captureConsole: false }` to keep console output in the terminal
|
|
382
386
|
without persisting console calls to case details. Manual `evalLog(...)` calls
|