npm - @ls-stack/agent-eval - Versions diffs - 0.56.0 → 0.57.0 - Mend

@ls-stack/agent-eval 0.56.0 → 0.57.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/{app-Bpe6Monh.mjs → app-Db_x-Rit.mjs} +4 -4
package/dist/apps/web/dist/assets/{index-CfSiAVmi.js → index-o4o2EktS.js} +76 -76
package/dist/apps/web/dist/index.html +1 -1
package/dist/bin.mjs +1 -1
package/dist/caseChild.mjs +1 -1
package/dist/{cli-DQO2Fpt2.mjs → cli-Ck0mqxd-.mjs} +4 -4
package/dist/index.d.mts +110 -110
package/dist/index.mjs +3 -3
package/dist/runChild.mjs +2 -2
package/dist/{runExecution-6lrtj48K.mjs → runExecution-BH7DlMXl.mjs} +5 -1
package/dist/{runOrchestration-BYaN2mzS.mjs → runOrchestration-C1Ex9QI-.mjs} +1 -1
package/dist/{runner-DYlwuAT3.mjs → runner-B3hEOT_I.mjs} +2 -2
package/dist/{runner-C3CiS2o7.mjs → runner-DbVYcapC.mjs} +1 -1
package/dist/{src-DCGrFAmO.mjs → src-B3iq-tuv.mjs} +2 -2
package/package.json +1 -1
package/skills/agent-eval/SKILL.md +5 -1

package/dist/index.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-6lrtj48K.mjs";
-import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-DQO2Fpt2.mjs";
-import { n as matchesEvalTags, t as defineEval } from "./src-DCGrFAmO.mjs";
+import { $ as startEvalBackgroundJob, A as repoFile, B as getCurrentScope, C as evalTracer, D as deserializeCacheValue, E as deserializeCacheRecording, F as appendToEvalOutput, G as mergeEvalOutput, H as incrementEvalOutput, J as runInEvalScope, K as nextEvalId, L as evalAssert, M as readManualInputFile, N as evalExpect, Nt as getEvalRegistry, O as serializeCacheRecording, P as EvalAssertionError, Q as setScopeCacheContext, R as evalLog, S as evalSpan, T as hashCacheKeySync, U as isInEvalScope, V as getEvalCaseInput, Y as runInExistingEvalScope, Z as setEvalOutput, at as extractLlmCalls, b as buildTraceTree, it as extractApiCalls, j as manualInputFileValueSchema, k as serializeCacheValue, lt as getNestedAttribute, nt as extractCacheEntries, ot as simulateLlmCallCost, q as runInEvalRuntimeScope, rt as extractCacheHits, st as simulateTokenAllocation, w as hashCacheKey, x as captureEvalSpanError, y as z, z as evalTime } from "./runExecution-BH7DlMXl.mjs";
+import { a as materializeManualInputFiles, i as isManualInputFileValue, n as createRunner, o as stageManualInputFile, r as cleanupStagedManualInputFiles, s as stageManualInputFileFromPath, t as runCli } from "./cli-Ck0mqxd-.mjs";
+import { n as matchesEvalTags, t as defineEval } from "./src-B3iq-tuv.mjs";
 export { EvalAssertionError, appendToEvalOutput, buildTraceTree, captureEvalSpanError, cleanupStagedManualInputFiles, createRunner, defineEval, deserializeCacheRecording, deserializeCacheValue, evalAssert, evalExpect, evalLog, evalSpan, evalTime, evalTracer, extractApiCalls, extractCacheEntries, extractCacheHits, extractLlmCalls, getCurrentScope, getEvalCaseInput, getEvalRegistry, getNestedAttribute, hashCacheKey, hashCacheKeySync, incrementEvalOutput, isInEvalScope, isManualInputFileValue, manualInputFileValueSchema, matchesEvalTags, materializeManualInputFiles, mergeEvalOutput, nextEvalId, readManualInputFile, repoFile, runCli, runInEvalRuntimeScope, runInEvalScope, runInExistingEvalScope, serializeCacheRecording, serializeCacheValue, setEvalOutput, setScopeCacheContext, simulateLlmCallCost, simulateTokenAllocation, stageManualInputFile, stageManualInputFileFromPath, startEvalBackgroundJob, z };

package/dist/runChild.mjs CHANGED Viewed

@@ -1,5 +1,5 @@
-import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-6lrtj48K.mjs";
-import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-BYaN2mzS.mjs";
+import { At as evalChartsConfigSchema, Ct as buildEvalKey, Dt as evalStatAggregateSchema, I as configureEvalRunLogs, Ot as evalStatsConfigSchema, bt as runSummarySchema, et as createRunRequestSchema, jt as columnDefSchema, kt as manualInputDescriptorSchema, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, v as createFsCacheStore, yt as runManifestSchema } from "./runExecution-BH7DlMXl.mjs";
+import { S as parseEvalDiscovery, m as persistRunState, r as getTargetEvals$1, t as executeRun } from "./runOrchestration-C1Ex9QI-.mjs";
 import { z } from "zod/v4";
 import { readFile } from "node:fs/promises";
 import { relative } from "node:path";

package/dist/{runExecution-6lrtj48K.mjs → runExecution-BH7DlMXl.mjs} RENAMED Viewed

@@ -6181,6 +6181,10 @@ function sumNullable(values) {
 	}
 	return hasValue ? total : void 0;
 }
+function getMaxLlmTurns(calls) {
+	if (calls.length === 0) return void 0;
+	return Math.max(...calls.map((call) => Math.max(call.stepCount ?? 1, 1)));
+}
 function assignIfMissing(params) {
 	if (!params.activeKeys.has(params.key)) return;
 	if (params.key in params.outputs) return;
@@ -6202,7 +6206,7 @@ function addDefaultOutputs(params) {
 	assignIfMissing({
 		outputs: params.outputs,
 		key: "llmTurns",
-		value: calls.length,
+		value: getMaxLlmTurns(calls),
 		activeKeys
 	});
 	assignIfMissing({

package/dist/{runOrchestration-BYaN2mzS.mjs → runOrchestration-C1Ex9QI-.mjs} RENAMED Viewed

@@ -1,4 +1,4 @@
-import { Et as caseRowSchema, Pt as runWithEvalRegistry, Tt as caseDetailSchema, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, g as commitPendingCacheWrites, gt as matchesTagsFilter, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, wt as getCaseRowCaseKey, yt as runManifestSchema } from "./runExecution-6lrtj48K.mjs";
+import { Et as caseRowSchema, Pt as runWithEvalRegistry, Tt as caseDetailSchema, X as runWithEvalClock, _t as validateEvalTagName, bt as runSummarySchema, d as loadEvalModule, f as resolveEvalDefaultConfig, ft as deriveScopedSummaryFromCases, g as commitPendingCacheWrites, gt as matchesTagsFilter, ht as dedupeEvalTags, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromChildStatuses, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveStatusFromCaseRows, q as runInEvalRuntimeScope, t as filterEvalCases, u as runWithModuleIsolation, vt as validateTagsFilterExpression, wt as getCaseRowCaseKey, yt as runManifestSchema } from "./runExecution-BH7DlMXl.mjs";
 import { readFile, readdir, rm, writeFile } from "node:fs/promises";
 import { dirname, join } from "node:path";
 import { existsSync } from "node:fs";

package/dist/{runner-DYlwuAT3.mjs → runner-B3hEOT_I.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { n as createRunner } from "./cli-DQO2Fpt2.mjs";
-import "./src-DCGrFAmO.mjs";
+import { n as createRunner } from "./cli-Ck0mqxd-.mjs";
+import "./src-B3iq-tuv.mjs";
 //#region ../../apps/server/src/runner.ts
 let runnerInstance = null;
 function getRunnerInstance() {

package/dist/{runner-C3CiS2o7.mjs → runner-DbVYcapC.mjs} RENAMED Viewed

@@ -1,2 +1,2 @@
-import { n as initRunner, t as getRunnerInstance } from "./runner-DYlwuAT3.mjs";
+import { n as initRunner, t as getRunnerInstance } from "./runner-B3hEOT_I.mjs";
 export { getRunnerInstance, initRunner };

package/dist/{src-DCGrFAmO.mjs → src-B3iq-tuv.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-6lrtj48K.mjs";
-import "./cli-DQO2Fpt2.mjs";
+import { Mt as defineEval$1, W as matchesEvalTags$1 } from "./runExecution-BH7DlMXl.mjs";
+import "./cli-Ck0mqxd-.mjs";
 //#region src/index.ts
 /** Register an eval definition with typed tag support. */
 function defineEval(definition) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ls-stack/agent-eval",
-  "version": "0.56.0",
+  "version": "0.57.0",
   "type": "module",
   "bin": {
     "agent-evals": "./dist/bin.mjs"

package/skills/agent-eval/SKILL.md CHANGED Viewed

@@ -393,7 +393,9 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
   tokens, and output tokens separately and use `dedupeConsecutiveValues: true`
   to skip repeated adjacent chart values. `totalTokens` is input + output only;
   cache read/write tokens stay separate and affect `costUsd` at their own
-  rates.
+  rates. `llmTurns` is the maximum per-call turn count in the case run, using
+  configured steps when available and otherwise one turn per matched LLM call
+  span.
   Derived base input cost uses `inputTokens - cachedInputTokens -
 cacheCreationInputTokens` so cache details are not double-counted.
   `cacheCreationInputTokens` is the total cache-write count; optional
@@ -568,6 +570,8 @@ Targeted evals in `run.json` are recorded by exact `evalKeys`
 unambiguous in saved history.
 Temporary runs use the same directory layout, but are removed before the next
 run of any kind starts.
+When a saved case needs to be handed to another agent, the app can copy the
+saved case detail path or the saved run folder path directly.
 Use `agent-evals show-runs` when you need stable file
 paths before reading saved output: