@ls-stack/agent-eval 0.42.0 → 0.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, Rt as getEvalRegistry, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-DhTiT4V0.mjs";
2
- import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Bk5g-bat.mjs";
3
- import { n as matchesEvalTags, t as defineEval } from "./src-t6OVp1li.mjs";
1
+ import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, Rt as getEvalRegistry, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-SPaHx-SC.mjs";
2
+ import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-DbVfkr9T.mjs";
3
+ import { n as matchesEvalTags, t as defineEval } from "./src-CANi3gpd.mjs";
4
4
  export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };
package/dist/runChild.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as evalChartsConfigSchema, It as columnDefSchema, Nt as evalStatsConfigSchema, Pt as manualInputDescriptorSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-DhTiT4V0.mjs";
1
+ import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as evalChartsConfigSchema, It as columnDefSchema, Nt as evalStatsConfigSchema, Pt as manualInputDescriptorSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-SPaHx-SC.mjs";
2
2
  import { z } from "zod/v4";
3
3
  import { readFile } from "node:fs/promises";
4
4
  import { relative } from "node:path";
@@ -2056,12 +2056,11 @@ function computeTotalTokens({ input, output }) {
2056
2056
  if (input === null && output === null) return null;
2057
2057
  return (input ?? 0) + (output ?? 0);
2058
2058
  }
2059
- function computeTokensPerSecond({ outputTokens, durationMs, latencyMs }) {
2059
+ function computeTokensPerSecond({ outputTokens, durationMs }) {
2060
2060
  if (outputTokens === null || durationMs === null) return null;
2061
2061
  if (outputTokens === 0) return 0;
2062
- const generationMs = latencyMs === null ? durationMs : durationMs - latencyMs;
2063
- if (generationMs <= 0) return null;
2064
- return outputTokens / (generationMs / 1e3);
2062
+ if (durationMs <= 0) return null;
2063
+ return outputTokens / (durationMs / 1e3);
2065
2064
  }
2066
2065
  function readSteps(attributes, path) {
2067
2066
  const raw = getNestedAttribute(attributes, path);
@@ -2094,7 +2093,8 @@ function pickError$1(span) {
2094
2093
  * `getNestedAttribute` from the configured paths, with safe coercion to
2095
2094
  * `string | null` / `number | null`. `latencyMs` is an explicit
2096
2095
  * time-to-first-token attribute; full span elapsed time is reported separately
2097
- * as `durationMs`. Built-in USD costs are derived only from configured model
2096
+ * as `durationMs`. `tokensPerSecond` is output tokens divided by that full
2097
+ * elapsed duration. Built-in USD costs are derived only from configured model
2098
2098
  * pricing and token counts. `totalTokens` is always derived from input +
2099
2099
  * output tokens. Cached input and cache creation tokens are reported
2100
2100
  * separately because they are subsets of input/output usage. The main cache
@@ -2189,8 +2189,7 @@ function extractLlmCalls(spans, config) {
2189
2189
  latencyMs,
2190
2190
  tokensPerSecond: computeTokensPerSecond({
2191
2191
  outputTokens,
2192
- durationMs,
2193
- latencyMs
2192
+ durationMs
2194
2193
  }),
2195
2194
  costUsd,
2196
2195
  inputCostUsd,
@@ -1,2 +1,2 @@
1
- import { n as initRunner, t as getRunnerInstance } from "./runner-BG0L4yId.mjs";
1
+ import { n as initRunner, t as getRunnerInstance } from "./runner-DiCQ57JQ.mjs";
2
2
  export { getRunnerInstance, initRunner };
@@ -1,5 +1,5 @@
1
- import { n as createRunner } from "./cli-Bk5g-bat.mjs";
2
- import "./src-t6OVp1li.mjs";
1
+ import { n as createRunner } from "./cli-DbVfkr9T.mjs";
2
+ import "./src-CANi3gpd.mjs";
3
3
  //#region ../../apps/server/src/runner.ts
4
4
  let runnerInstance = null;
5
5
  function getRunnerInstance() {
@@ -1,5 +1,5 @@
1
- import { Lt as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-DhTiT4V0.mjs";
2
- import "./cli-Bk5g-bat.mjs";
1
+ import { Lt as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-SPaHx-SC.mjs";
2
+ import "./cli-DbVfkr9T.mjs";
3
3
  //#region src/index.ts
4
4
  /** Register an eval definition with typed tag support. */
5
5
  function defineEval(definition) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ls-stack/agent-eval",
3
- "version": "0.42.0",
3
+ "version": "0.42.1",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "agent-evals": "./dist/bin.mjs"
@@ -371,7 +371,7 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
371
371
  summarized for review. Defaults to `kind: 'llm'` spans with `model`,
372
372
  `usage.*`, `latencyMs`, `input`, `output`, etc. read from conventional
373
373
  attribute paths. `latencyMs` is time to first token; duration, total tokens,
374
- tokens/sec, and USD costs are derived. Override `kinds` to broaden the filter,
374
+ output tokens/sec, and USD costs are derived. Override `kinds` to broaden the filter,
375
375
  override `attributes.<field>` for non-default primitive span shapes, configure
376
376
  model-keyed `pricing` to derive USD costs from token counts, with nested
377
377
  `providers` entries for provider-specific rates, add `costCurrencies` to show