@ls-stack/agent-eval 0.42.0 → 0.42.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{app-1vE5Ryry.mjs → app-mOYjX9zq.mjs} +4 -4
- package/dist/apps/web/dist/assets/{index-5CB9eJZy.js → index-CANDLTsq.js} +2 -2
- package/dist/apps/web/dist/index.html +1 -1
- package/dist/bin.mjs +1 -1
- package/dist/{cli-Bk5g-bat.mjs → cli-DbVfkr9T.mjs} +3 -3
- package/dist/index.d.mts +108 -107
- package/dist/index.mjs +3 -3
- package/dist/runChild.mjs +1 -1
- package/dist/{runOrchestration-DhTiT4V0.mjs → runOrchestration-SPaHx-SC.mjs} +6 -7
- package/dist/{runner-B1Cyevvr.mjs → runner-BYOdLBle.mjs} +1 -1
- package/dist/{runner-BG0L4yId.mjs → runner-DiCQ57JQ.mjs} +2 -2
- package/dist/{src-t6OVp1li.mjs → src-CANi3gpd.mjs} +2 -2
- package/package.json +1 -1
- package/skills/agent-eval/SKILL.md +1 -1
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, Rt as getEvalRegistry, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-
|
|
2
|
-
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-
|
|
3
|
-
import { n as matchesEvalTags, t as defineEval } from "./src-
|
|
1
|
+
import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, Rt as getEvalRegistry, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-SPaHx-SC.mjs";
|
|
2
|
+
import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-DbVfkr9T.mjs";
|
|
3
|
+
import { n as matchesEvalTags, t as defineEval } from "./src-CANi3gpd.mjs";
|
|
4
4
|
export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
|
package/dist/runChild.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as evalChartsConfigSchema, It as columnDefSchema, Nt as evalStatsConfigSchema, Pt as manualInputDescriptorSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-
|
|
1
|
+
import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as evalChartsConfigSchema, It as columnDefSchema, Nt as evalStatsConfigSchema, Pt as manualInputDescriptorSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-SPaHx-SC.mjs";
|
|
2
2
|
import { z } from "zod/v4";
|
|
3
3
|
import { readFile } from "node:fs/promises";
|
|
4
4
|
import { relative } from "node:path";
|
|
@@ -2056,12 +2056,11 @@ function computeTotalTokens({ input, output }) {
|
|
|
2056
2056
|
if (input === null && output === null) return null;
|
|
2057
2057
|
return (input ?? 0) + (output ?? 0);
|
|
2058
2058
|
}
|
|
2059
|
-
function computeTokensPerSecond({ outputTokens, durationMs
|
|
2059
|
+
function computeTokensPerSecond({ outputTokens, durationMs }) {
|
|
2060
2060
|
if (outputTokens === null || durationMs === null) return null;
|
|
2061
2061
|
if (outputTokens === 0) return 0;
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
return outputTokens / (generationMs / 1e3);
|
|
2062
|
+
if (durationMs <= 0) return null;
|
|
2063
|
+
return outputTokens / (durationMs / 1e3);
|
|
2065
2064
|
}
|
|
2066
2065
|
function readSteps(attributes, path) {
|
|
2067
2066
|
const raw = getNestedAttribute(attributes, path);
|
|
@@ -2094,7 +2093,8 @@ function pickError$1(span) {
|
|
|
2094
2093
|
* `getNestedAttribute` from the configured paths, with safe coercion to
|
|
2095
2094
|
* `string | null` / `number | null`. `latencyMs` is an explicit
|
|
2096
2095
|
* time-to-first-token attribute; full span elapsed time is reported separately
|
|
2097
|
-
* as `durationMs`.
|
|
2096
|
+
* as `durationMs`. `tokensPerSecond` is output tokens divided by that full
|
|
2097
|
+
* elapsed duration. Built-in USD costs are derived only from configured model
|
|
2098
2098
|
* pricing and token counts. `totalTokens` is always derived from input +
|
|
2099
2099
|
* output tokens. Cached input and cache creation tokens are reported
|
|
2100
2100
|
* separately because they are subsets of input/output usage. The main cache
|
|
@@ -2189,8 +2189,7 @@ function extractLlmCalls(spans, config) {
|
|
|
2189
2189
|
latencyMs,
|
|
2190
2190
|
tokensPerSecond: computeTokensPerSecond({
|
|
2191
2191
|
outputTokens,
|
|
2192
|
-
durationMs
|
|
2193
|
-
latencyMs
|
|
2192
|
+
durationMs
|
|
2194
2193
|
}),
|
|
2195
2194
|
costUsd,
|
|
2196
2195
|
inputCostUsd,
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
import { n as initRunner, t as getRunnerInstance } from "./runner-
|
|
1
|
+
import { n as initRunner, t as getRunnerInstance } from "./runner-DiCQ57JQ.mjs";
|
|
2
2
|
export { getRunnerInstance, initRunner };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { n as createRunner } from "./cli-
|
|
2
|
-
import "./src-
|
|
1
|
+
import { n as createRunner } from "./cli-DbVfkr9T.mjs";
|
|
2
|
+
import "./src-CANi3gpd.mjs";
|
|
3
3
|
//#region ../../apps/server/src/runner.ts
|
|
4
4
|
let runnerInstance = null;
|
|
5
5
|
function getRunnerInstance() {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { Lt as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-
|
|
2
|
-
import "./cli-
|
|
1
|
+
import { Lt as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-SPaHx-SC.mjs";
|
|
2
|
+
import "./cli-DbVfkr9T.mjs";
|
|
3
3
|
//#region src/index.ts
|
|
4
4
|
/** Register an eval definition with typed tag support. */
|
|
5
5
|
function defineEval(definition) {
|
package/package.json
CHANGED
|
@@ -371,7 +371,7 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
|
|
|
371
371
|
summarized for review. Defaults to `kind: 'llm'` spans with `model`,
|
|
372
372
|
`usage.*`, `latencyMs`, `input`, `output`, etc. read from conventional
|
|
373
373
|
attribute paths. `latencyMs` is time to first token; duration, total tokens,
|
|
374
|
-
tokens/sec, and USD costs are derived. Override `kinds` to broaden the filter,
|
|
374
|
+
output tokens/sec, and USD costs are derived. Override `kinds` to broaden the filter,
|
|
375
375
|
override `attributes.<field>` for non-default primitive span shapes, configure
|
|
376
376
|
model-keyed `pricing` to derive USD costs from token counts, with nested
|
|
377
377
|
`providers` entries for provider-specific rates, add `costCurrencies` to show
|