npm - @ls-stack/agent-eval - Versions diffs - 0.42.0 → 0.42.1 - Mend

@ls-stack/agent-eval 0.42.0 → 0.42.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/{app-1vE5Ryry.mjs → app-mOYjX9zq.mjs} +4 -4
package/dist/apps/web/dist/assets/{index-5CB9eJZy.js → index-CANDLTsq.js} +2 -2
package/dist/apps/web/dist/index.html +1 -1
package/dist/bin.mjs +1 -1
package/dist/{cli-Bk5g-bat.mjs → cli-DbVfkr9T.mjs} +3 -3
package/dist/index.d.mts +108 -107
package/dist/index.mjs +3 -3
package/dist/runChild.mjs +1 -1
package/dist/{runOrchestration-DhTiT4V0.mjs → runOrchestration-SPaHx-SC.mjs} +6 -7
package/dist/{runner-B1Cyevvr.mjs → runner-BYOdLBle.mjs} +1 -1
package/dist/{runner-BG0L4yId.mjs → runner-DiCQ57JQ.mjs} +2 -2
package/dist/{src-t6OVp1li.mjs → src-CANi3gpd.mjs} +2 -2
package/package.json +1 -1
package/skills/agent-eval/SKILL.md +1 -1

package/dist/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, Rt as getEvalRegistry, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-DhTiT4V0.mjs";
-import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Bk5g-bat.mjs";
-import { n as matchesEvalTags, t as defineEval } from "./src-t6OVp1li.mjs";
+import { $ as getCurrentScope, B as deserializeCacheValue, F as evalSpan, G as readManualInputFile, H as serializeCacheValue, I as evalTracer, J as appendToEvalOutput, K as evalExpect, L as hashCacheKey, M as z, N as buildTraceTree, P as captureEvalSpanError, Q as evalTime, R as hashCacheKeySync, Rt as getEvalRegistry, U as repoFile, V as serializeCacheRecording, W as manualInputFileValueSchema, X as evalAssert, Z as evalLog, _t as extractLlmCalls, at as nextEvalId, ct as runInExistingEvalScope, dt as startEvalBackgroundJob, et as getEvalCaseInput, gt as extractApiCalls, ht as extractCacheHits, it as mergeEvalOutput, lt as setEvalOutput, mt as extractCacheEntries, nt as isInEvalScope, ot as runInEvalRuntimeScope, q as EvalAssertionError, st as runInEvalScope, tt as incrementEvalOutput, ut as setScopeCacheContext, vt as simulateLlmCallCost, xt as getNestedAttribute, yt as simulateTokenAllocation, z as deserializeCacheRecording } from "./runOrchestration-SPaHx-SC.mjs";
+import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-DbVfkr9T.mjs";
+import { n as matchesEvalTags, t as defineEval } from "./src-CANi3gpd.mjs";
 export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };

package/dist/runChild.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as evalChartsConfigSchema, It as columnDefSchema, Nt as evalStatsConfigSchema, Pt as manualInputDescriptorSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-DhTiT4V0.mjs";
+import { At as buildEvalKey, D as loadConfig, Dt as runSummarySchema, Et as runManifestSchema, Ft as evalChartsConfigSchema, It as columnDefSchema, Nt as evalStatsConfigSchema, Pt as manualInputDescriptorSchema, T as parseEvalDiscovery, Y as configureEvalRunLogs, ft as createRunRequestSchema, h as persistRunState, j as createFsCacheStore, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-SPaHx-SC.mjs";
 import { z } from "zod/v4";
 import { readFile } from "node:fs/promises";
 import { relative } from "node:path";

package/dist/{runOrchestration-DhTiT4V0.mjs → runOrchestration-SPaHx-SC.mjs} RENAMED Viewed

@@ -2056,12 +2056,11 @@ function computeTotalTokens({ input, output }) {
 	if (input === null && output === null) return null;
 	return (input ?? 0) + (output ?? 0);
 }
-function computeTokensPerSecond({ outputTokens, durationMs, latencyMs }) {
+function computeTokensPerSecond({ outputTokens, durationMs }) {
 	if (outputTokens === null || durationMs === null) return null;
 	if (outputTokens === 0) return 0;
-	const generationMs = latencyMs === null ? durationMs : durationMs - latencyMs;
-	if (generationMs <= 0) return null;
-	return outputTokens / (generationMs / 1e3);
+	if (durationMs <= 0) return null;
+	return outputTokens / (durationMs / 1e3);
 }
 function readSteps(attributes, path) {
 	const raw = getNestedAttribute(attributes, path);
@@ -2094,7 +2093,8 @@ function pickError$1(span) {
 * `getNestedAttribute` from the configured paths, with safe coercion to
 * `string | null` / `number | null`. `latencyMs` is an explicit
 * time-to-first-token attribute; full span elapsed time is reported separately
-* as `durationMs`. Built-in USD costs are derived only from configured model
+* as `durationMs`. `tokensPerSecond` is output tokens divided by that full
+* elapsed duration. Built-in USD costs are derived only from configured model
 * pricing and token counts. `totalTokens` is always derived from input +
 * output tokens. Cached input and cache creation tokens are reported
 * separately because they are subsets of input/output usage. The main cache
@@ -2189,8 +2189,7 @@ function extractLlmCalls(spans, config) {
 			latencyMs,
 			tokensPerSecond: computeTokensPerSecond({
 				outputTokens,
-				durationMs,
-				latencyMs
+				durationMs
 			}),
 			costUsd,
 			inputCostUsd,

package/dist/{runner-B1Cyevvr.mjs → runner-BYOdLBle.mjs} RENAMED Viewed

@@ -1,2 +1,2 @@
-import { n as initRunner, t as getRunnerInstance } from "./runner-BG0L4yId.mjs";
+import { n as initRunner, t as getRunnerInstance } from "./runner-DiCQ57JQ.mjs";
 export { getRunnerInstance, initRunner };

package/dist/{runner-BG0L4yId.mjs → runner-DiCQ57JQ.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { n as createRunner } from "./cli-Bk5g-bat.mjs";
-import "./src-t6OVp1li.mjs";
+import { n as createRunner } from "./cli-DbVfkr9T.mjs";
+import "./src-CANi3gpd.mjs";
 //#region ../../apps/server/src/runner.ts
 let runnerInstance = null;
 function getRunnerInstance() {

package/dist/{src-t6OVp1li.mjs → src-CANi3gpd.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { Lt as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-DhTiT4V0.mjs";
-import "./cli-Bk5g-bat.mjs";
+import { Lt as defineEval$1, rt as matchesEvalTags$1 } from "./runOrchestration-SPaHx-SC.mjs";
+import "./cli-DbVfkr9T.mjs";
 //#region src/index.ts
 /** Register an eval definition with typed tag support. */
 function defineEval(definition) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ls-stack/agent-eval",
-  "version": "0.42.0",
+  "version": "0.42.1",
   "type": "module",
   "bin": {
     "agent-evals": "./dist/bin.mjs"

package/skills/agent-eval/SKILL.md CHANGED Viewed

@@ -371,7 +371,7 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
   summarized for review. Defaults to `kind: 'llm'` spans with `model`,
   `usage.*`, `latencyMs`, `input`, `output`, etc. read from conventional
   attribute paths. `latencyMs` is time to first token; duration, total tokens,
-  tokens/sec, and USD costs are derived. Override `kinds` to broaden the filter,
+  output tokens/sec, and USD costs are derived. Override `kinds` to broaden the filter,
   override `attributes.<field>` for non-default primitive span shapes, configure
   model-keyed `pricing` to derive USD costs from token counts, with nested
   `providers` entries for provider-specific rates, add `costCurrencies` to show