npm - @ls-stack/agent-eval - Versions diffs - 0.58.3 → 0.58.5 - Mend

@ls-stack/agent-eval 0.58.3 → 0.58.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/{app-ROCEce9X.mjs → app-DLNmRUqH.mjs} +64 -12
package/dist/apps/web/dist/assets/index-BD6FXk5p.js +377 -0
package/dist/apps/web/dist/assets/index-C2fbGEsB.css +1 -0
package/dist/apps/web/dist/index.html +2 -2
package/dist/bin.mjs +1 -1
package/dist/caseChild.mjs +4 -3
package/dist/{cli-SP4kEtYL.mjs → cli-ClAkjTvo.mjs} +8 -7
package/dist/index.d.mts +91 -79
package/dist/index.mjs +3 -3
package/dist/runChild.mjs +5 -4
package/dist/{runExecution-CFw0MQFs.mjs → runExecution-BMnJXWhN.mjs} +139 -14
package/dist/{runOrchestration-CxjiQmof.mjs → runOrchestration-CvbTAoEb.mjs} +2 -2
package/dist/{runner-CY3bgsjU.mjs → runner-BfHgVhGS.mjs} +2 -2
package/dist/{runner-BlFQyvN2.mjs → runner-DJJekv9f.mjs} +1 -1
package/dist/{src-7GbQj1sb.mjs → src-DfzidkYr.mjs} +2 -2
package/package.json +2 -2
package/skills/agent-eval/SKILL.md +15 -7
package/dist/apps/web/dist/assets/index-CHH7m5Cv.css +0 -1
package/dist/apps/web/dist/assets/index-PTikBbhf.js +0 -377

package/dist/{runExecution-CFw0MQFs.mjs → runExecution-BMnJXWhN.mjs} RENAMED Viewed

@@ -2,6 +2,7 @@ import { createRequire, registerHooks } from "node:module";
 import { AsyncLocalStorage } from "node:async_hooks";
 import { formatWithOptions, isDeepStrictEqual, stripVTControlCharacters } from "node:util";
 import { z, z as z$1 } from "zod/v4";
+import { resultify } from "t-result";
 import dayjs from "dayjs";
 import { Blob as Blob$1, Buffer as Buffer$1, File as File$1 } from "node:buffer";
 import { mkdir, readFile, readdir, rename, rm, writeFile } from "node:fs/promises";
@@ -10,7 +11,6 @@ import { createHash, randomUUID } from "node:crypto";
 import { getCompositeKey } from "@ls-stack/utils/getCompositeKey";
 import { existsSync } from "node:fs";
 import { brotliCompressSync, brotliDecompressSync } from "node:zlib";
-import { resultify } from "t-result";
 import { fileURLToPath, pathToFileURL } from "node:url";
 //#region ../sdk/src/defineEval.ts
 const evalRegistry = /* @__PURE__ */ new Map();
@@ -1401,6 +1401,10 @@ function resolveApiCallsConfig(input) {
 		metrics: (input?.metrics ?? []).map(resolveApiCallMetric)
 	};
 }
+const cacheMaxEntriesSchema = z.union([z.number(), z.object({
+	default: z.number().optional(),
+	namespaces: z.record(z.string(), z.number()).optional()
+})]).optional();
 /** Zod schema for validating `agent-evals.config.ts` input. */
 const agentEvalsConfigSchema = z.object({
 	workspaceRoot: z.string().optional(),
@@ -1424,11 +1428,26 @@ const agentEvalsConfigSchema = z.object({
 	cache: z.object({
 		enabled: z.boolean().optional(),
 		dir: z.string().optional(),
-		maxEntriesPerNamespace: z.preprocess((value) => typeof value === "number" && Number.isFinite(value) ? value : void 0, z.number().optional()),
+		maxEntries: cacheMaxEntriesSchema,
+		maxEntriesPerNamespace: z.number().optional(),
 		maxEntriesByNamespace: z.record(z.string(), z.number()).optional(),
 		pruneIdleDelayMs: z.preprocess((value) => typeof value === "number" && Number.isFinite(value) ? value : void 0, z.number().optional()),
 		lastAccessedAtUpdateIntervalMs: z.preprocess((value) => typeof value === "number" && Number.isFinite(value) ? value : void 0, z.number().optional()),
-		maxEntriesPerEval: z.preprocess((value) => typeof value === "number" && Number.isFinite(value) ? value : void 0, z.number().optional())
+		maxEntriesPerEval: z.number().optional()
+	}).transform(({ maxEntries, maxEntriesByNamespace, maxEntriesPerEval, maxEntriesPerNamespace, ...cache }) => {
+		const defaultMaxEntries = maxEntriesPerNamespace ?? maxEntriesPerEval;
+		if (maxEntries !== void 0) return {
+			...cache,
+			maxEntries
+		};
+		if (defaultMaxEntries !== void 0 || maxEntriesByNamespace !== void 0) return {
+			...cache,
+			maxEntries: {
+				default: defaultMaxEntries,
+				namespaces: maxEntriesByNamespace
+			}
+		};
+		return cache;
 	}).optional()
 });
 //#endregion
@@ -1884,7 +1903,7 @@ function getEvalTitle(evalLike) {
 }
 //#endregion
 //#region ../shared/src/utils/getNestedAttribute.ts
-function isRecord$5(value) {
+function isRecord$6(value) {
 	return typeof value === "object" && value !== null;
 }
 /**
@@ -1899,14 +1918,14 @@ function getNestedAttribute(value, path) {
 	const parts = path.split(".");
 	let current = value;
 	for (const part of parts) {
-		if (!isRecord$5(current) || !(part in current)) return;
+		if (!isRecord$6(current) || !(part in current)) return;
 		current = current[part];
 	}
 	return current;
 }
 //#endregion
 //#region ../shared/src/utils/deriveCallAttributes.ts
-function isRecord$4(value) {
+function isRecord$5(value) {
 	return typeof value === "object" && value !== null;
 }
 function mergeNestedAttribute$1(value, path, attributeValue) {
@@ -1919,7 +1938,7 @@ function mergeNestedAttribute$1(value, path, attributeValue) {
 			continue;
 		}
 		const nextValue = current[part];
-		const nextRecord = isRecord$4(nextValue) ? { ...nextValue } : {};
+		const nextRecord = isRecord$5(nextValue) ? { ...nextValue } : {};
 		current[part] = nextRecord;
 		current = nextRecord;
 	}
@@ -1944,7 +1963,7 @@ function applyDerivedAttributesForKind(params) {
 					return;
 				}
 			})();
-			if (!isRecord$4(values)) continue;
+			if (!isRecord$5(values)) continue;
 			for (const [path, value] of Object.entries(values)) {
 				if (value === void 0) continue;
 				attributes = mergeNestedAttribute$1(attributes, path, value);
@@ -2006,6 +2025,13 @@ function readString$2(attributes, path) {
 	const raw = getNestedAttribute(attributes, path);
 	return typeof raw === "string" && raw.length > 0 ? raw : null;
 }
+function isRecord$4(value) {
+	return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+function readRecordValue(value, key) {
+	if (!isRecord$4(value)) return void 0;
+	return value[key];
+}
 function computeTokenCost(tokens, usdPerMillion) {
 	if (tokens === null) return null;
 	if (tokens === 0) return 0;
@@ -2216,6 +2242,76 @@ function buildModelStepsByParent(spans) {
 	}
 	return stepsByParent;
 }
+function buildChildrenByParent(spans) {
+	const childrenByParent = /* @__PURE__ */ new Map();
+	for (const span of spans) {
+		if (span.parentId === null) continue;
+		const current = childrenByParent.get(span.parentId);
+		if (current === void 0) {
+			childrenByParent.set(span.parentId, [span]);
+			continue;
+		}
+		current.push(span);
+	}
+	return childrenByParent;
+}
+function appendToolCallValues(out, value) {
+	if (Array.isArray(value)) {
+		out.push(...value);
+		return value.length > 0;
+	}
+	if (value === void 0 || value === null) return false;
+	out.push(value);
+	return true;
+}
+function parseJsonRecord(value) {
+	if (typeof value !== "string") return null;
+	const parsed = resultify(() => JSON.parse(value));
+	if (parsed.error || !isRecord$4(parsed.value)) return null;
+	return parsed.value;
+}
+function readMastraModelStepOutput(step) {
+	return parseJsonRecord(readRecordValue(readRecordValue(readRecordValue(step, "attributes"), "genAI"), "mastra.model_step.output"));
+}
+function isTraceSpan(value) {
+	return isRecord$4(value) && typeof value.id === "string" && typeof value.kind === "string" && typeof value.name === "string";
+}
+function toolCallSpanToEntry(span) {
+	const attrs = span.attributes;
+	const genAI = readRecordValue(attrs, "genAI");
+	return {
+		id: span.id,
+		name: span.name,
+		kind: span.kind,
+		status: span.status,
+		input: getNestedAttribute(attrs, "input"),
+		output: getNestedAttribute(attrs, "output"),
+		arguments: readRecordValue(genAI, "gen_ai.tool.call.arguments"),
+		result: readRecordValue(genAI, "gen_ai.tool.call.result")
+	};
+}
+function appendToolCallsFromStep({ out, step, childrenByParent }) {
+	let foundStepCalls = false;
+	foundStepCalls = appendToolCallValues(out, getNestedAttribute(step, "toolCalls")) || foundStepCalls;
+	foundStepCalls = appendToolCallValues(out, getNestedAttribute(step, "output.toolCalls")) || foundStepCalls;
+	foundStepCalls = appendToolCallValues(out, getNestedAttribute(step, "attributes.output.toolCalls")) || foundStepCalls;
+	const mastraOutput = readMastraModelStepOutput(step);
+	if (!foundStepCalls && mastraOutput !== null) foundStepCalls = appendToolCallValues(out, mastraOutput.toolCalls) || foundStepCalls;
+	if (!isTraceSpan(step)) return;
+	const childToolSpans = childrenByParent.get(step.id)?.filter((child) => child.kind === "tool_call") ?? [];
+	if (childToolSpans.length === 0) return;
+	out.push(...childToolSpans.map((child) => toolCallSpanToEntry(child)));
+}
+function readToolCalls({ attributes, path, stepDetails, childrenByParent }) {
+	const out = [];
+	appendToolCallValues(out, getNestedAttribute(attributes, path));
+	if (stepDetails !== null) for (const step of stepDetails) appendToolCallsFromStep({
+		out,
+		step,
+		childrenByParent
+	});
+	return out.length > 0 ? out : void 0;
+}
 function collectWarnings$1(span) {
 	const out = [];
 	if (span.warning) out.push(span.warning);
@@ -2259,6 +2355,7 @@ function pickError$1(span) {
 function extractLlmCalls(spans, config) {
 	const kindSet = new Set(config.kinds);
 	const modelStepsByParent = buildModelStepsByParent(spans);
+	const childrenByParent = buildChildrenByParent(spans);
 	const result = [];
 	for (const span of spans) {
 		if (!kindSet.has(span.kind)) continue;
@@ -2317,6 +2414,8 @@ function extractLlmCalls(spans, config) {
 				placements: metric.placements
 			});
 		}
+		const childModelSteps = modelStepsByParent.get(span.id) ?? [];
+		const stepInfo = readSteps(attrs, config.attributes.steps, childModelSteps);
 		result.push({
 			id: span.id,
 			name: span.name,
@@ -2344,13 +2443,18 @@ function extractLlmCalls(spans, config) {
 			cachedInputCostUsd,
 			cacheCreationInputCostUsd,
 			reasoningCostUsd,
-			...readSteps(attrs, config.attributes.steps, modelStepsByParent.get(span.id) ?? []),
+			...stepInfo,
 			finishReason: readString$2(attrs, config.attributes.finishReason),
 			durationMs,
 			input: getNestedAttribute(attrs, config.attributes.input),
 			output: getNestedAttribute(attrs, config.attributes.output),
 			reasoning: getNestedAttribute(attrs, config.attributes.reasoning),
-			toolCalls: getNestedAttribute(attrs, config.attributes.toolCalls),
+			toolCalls: readToolCalls({
+				attributes: attrs,
+				path: config.attributes.toolCalls,
+				stepDetails: stepInfo.stepDetails,
+				childrenByParent
+			}),
 			metrics,
 			warnings: collectWarnings$1(span),
 			error: pickError$1(span)
@@ -5038,6 +5142,9 @@ function buildTraceTree(spans, checkpoints) {
 		visit(null);
 		return result;
 	};
+	const isToolCallSpan = (span) => {
+		return span.kind === "tool" || span.kind === "tool_call";
+	};
 	const filterSpanNames = (sourceSpans, kind) => {
 		return sourceSpans.filter((span) => kind === void 0 || span.kind === kind).map((span) => span.name);
 	};
@@ -5057,13 +5164,18 @@ function buildTraceTree(spans, checkpoints) {
 			return spans.filter((s) => s.kind === kind);
 		},
 		findToolCallSpans() {
-			return spans.filter((s) => s.kind === "tool");
+			return spans.filter(isToolCallSpan);
 		},
 		listToolCallSpanNames() {
-			return filterSpanNames(spans, "tool");
+			return spans.filter(isToolCallSpan).map((span) => span.name);
 		},
 		hasToolCallSpan(name) {
-			return spans.some((s) => s.kind === "tool" && s.name === name);
+			return spans.some((s) => isToolCallSpan(s) && s.name === name);
+		},
+		hasNToolCallSpans(toolName, expectedCalls) {
+			return spans.filter((span) => {
+				return isToolCallSpan(span) && span.name === toolName;
+			}).length === expectedCalls;
 		},
 		listSpanNames(kind) {
 			return filterSpanNames(spans, kind);
@@ -5078,6 +5190,19 @@ function buildTraceTree(spans, checkpoints) {
 	};
 }
 //#endregion
+//#region ../runner/src/cacheConfig.ts
+function getCacheRetentionOptions(cacheConfig) {
+	const maxEntries = cacheConfig?.maxEntries;
+	if (typeof maxEntries === "number") return {
+		maxEntriesPerNamespace: maxEntries,
+		maxEntriesByNamespace: void 0
+	};
+	return {
+		maxEntriesPerNamespace: maxEntries?.default,
+		maxEntriesByNamespace: maxEntries?.namespaces
+	};
+}
+//#endregion
 //#region ../runner/src/cacheAccessTime.ts
 const defaultLastAccessedAtUpdateIntervalMs = 14400 * 1e3;
 function normalizeLastAccessedAtUpdateIntervalMs(value) {
@@ -7026,4 +7151,4 @@ function recordAssertionFailure(scope, failure) {
 	});
 }
 //#endregion
-export { setScopeCacheContext as $, repoFile as A, evalStatsConfigSchema as At, evalTime as B, evalTracer as C, resolveLlmCallsConfig as Ct, deserializeCacheValue as D, caseDetailSchema as Dt, deserializeCacheRecording as E, getCaseRowCaseKey as Et, EvalRuntimeUsageError as F, getEvalRegistry as Ft, matchesEvalTags as G, getEvalCaseInput as H, appendToEvalOutput as I, runWithEvalRegistry as It, runInEvalRuntimeScope as J, mergeEvalOutput as K, configureEvalRunLogs as L, readManualInputFile as M, evalChartsConfigSchema as Mt, evalExpect as N, columnDefSchema as Nt, serializeCacheRecording as O, caseRowSchema as Ot, EvalAssertionError as P, defineEval as Pt, setEvalOutput as Q, evalAssert as R, evalSpan as S, resolveApiCallsConfig as St, hashCacheKeySync as T, buildEvalKey as Tt, incrementEvalOutput as U, getCurrentScope as V, isInEvalScope as W, runInExistingEvalScope as X, runInEvalScope as Y, runWithEvalClock as Z, createBufferedCacheStore as _, matchesTagsFilter as _t, isCaseChildParentMessage as a, extractApiCalls as at, buildTraceTree as b, runManifestSchema as bt, resolveArtifactPath as c, simulateTokenAllocation as ct, loadEvalModule as d, getEvalTitle as dt, startEvalBackgroundJob as et, resolveEvalDefaultConfig as f, getEvalDisplayStatus as ft, commitPendingCacheWrites as g, dedupeEvalTags as gt, normalizeScoreDef as h, deriveStatusFromChildStatuses as ht, isCaseChildMessage as i, extractCacheHits as it, manualInputFileValueSchema as j, manualInputDescriptorSchema as jt, serializeCacheValue as k, evalStatAggregateSchema as kt, registerAgentEvalsPackageResolutionHooks as l, applyDerivedCallAttributes as lt, buildDeclaredColumnDefs as m, deriveStatusFromCaseRows as mt, resolveRunnableEvalCases as n, updateManualScoreRequestSchema as nt, stripTerminalControlCodes as o, extractLlmCalls as ot, loadConfig as p, deriveScopedSummaryFromCases as pt, nextEvalId as q, runCase as r, extractCacheEntries as rt, resolveTracePresentation as s, simulateLlmCallCost as st, filterEvalCases as t, createRunRequestSchema as tt, runWithModuleIsolation as u, getNestedAttribute as ut, createFsCacheStore as v, validateEvalTagName as vt, hashCacheKey as w, buildCaseKey as wt, captureEvalSpanError as x, runSummarySchema as xt, z$1 as y, validateTagsFilterExpression as yt, evalLog as z };
+export { setEvalOutput as $, serializeCacheValue as A, evalStatAggregateSchema as At, evalLog as B, evalSpan as C, resolveApiCallsConfig as Ct, deserializeCacheRecording as D, getCaseRowCaseKey as Dt, hashCacheKeySync as E, buildEvalKey as Et, EvalAssertionError as F, defineEval as Ft, isInEvalScope as G, getCurrentScope as H, EvalRuntimeUsageError as I, getEvalRegistry as It, nextEvalId as J, matchesEvalTags as K, appendToEvalOutput as L, runWithEvalRegistry as Lt, manualInputFileValueSchema as M, manualInputDescriptorSchema as Mt, readManualInputFile as N, evalChartsConfigSchema as Nt, deserializeCacheValue as O, caseDetailSchema as Ot, evalExpect as P, columnDefSchema as Pt, runWithEvalClock as Q, configureEvalRunLogs as R, captureEvalSpanError as S, runSummarySchema as St, hashCacheKey as T, buildCaseKey as Tt, getEvalCaseInput as U, evalTime as V, incrementEvalOutput as W, runInEvalScope as X, runInEvalRuntimeScope as Y, runInExistingEvalScope as Z, createBufferedCacheStore as _, dedupeEvalTags as _t, isCaseChildParentMessage as a, extractCacheHits as at, z$1 as b, validateTagsFilterExpression as bt, resolveArtifactPath as c, simulateLlmCallCost as ct, loadEvalModule as d, getNestedAttribute as dt, setScopeCacheContext as et, resolveEvalDefaultConfig as f, getEvalTitle as ft, commitPendingCacheWrites as g, deriveStatusFromChildStatuses as gt, normalizeScoreDef as h, deriveStatusFromCaseRows as ht, isCaseChildMessage as i, extractCacheEntries as it, repoFile as j, evalStatsConfigSchema as jt, serializeCacheRecording as k, caseRowSchema as kt, registerAgentEvalsPackageResolutionHooks as l, simulateTokenAllocation as lt, buildDeclaredColumnDefs as m, deriveScopedSummaryFromCases as mt, resolveRunnableEvalCases as n, createRunRequestSchema as nt, stripTerminalControlCodes as o, extractApiCalls as ot, loadConfig as p, getEvalDisplayStatus as pt, mergeEvalOutput as q, runCase as r, updateManualScoreRequestSchema as rt, resolveTracePresentation as s, extractLlmCalls as st, filterEvalCases as t, startEvalBackgroundJob as tt, runWithModuleIsolation as u, applyDerivedCallAttributes as ut, createFsCacheStore as v, matchesTagsFilter as vt, evalTracer as w, resolveLlmCallsConfig as wt, buildTraceTree as x, runManifestSchema as xt, getCacheRetentionOptions as y, validateEvalTagName as yt, evalAssert as z };

package/dist/{runOrchestration-CxjiQmof.mjs → runOrchestration-CvbTAoEb.mjs} RENAMED Viewed

@@ -1,8 +1,8 @@
-import { Dt as caseDetailSchema, Et as getCaseRowCaseKey, It as runWithEvalRegistry, J as runInEvalRuntimeScope, Ot as caseRowSchema, Z as runWithEvalClock, _t as matchesTagsFilter, bt as runManifestSchema, d as loadEvalModule, f as resolveEvalDefaultConfig, g as commitPendingCacheWrites, gt as dedupeEvalTags, ht as deriveStatusFromChildStatuses, i as isCaseChildMessage, m as buildDeclaredColumnDefs, mt as deriveStatusFromCaseRows, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, pt as deriveScopedSummaryFromCases, t as filterEvalCases, u as runWithModuleIsolation, vt as validateEvalTagName, wt as buildCaseKey, xt as runSummarySchema, yt as validateTagsFilterExpression } from "./runExecution-CFw0MQFs.mjs";
+import { Dt as getCaseRowCaseKey, Lt as runWithEvalRegistry, Ot as caseDetailSchema, Q as runWithEvalClock, St as runSummarySchema, Tt as buildCaseKey, Y as runInEvalRuntimeScope, _t as dedupeEvalTags, bt as validateTagsFilterExpression, d as loadEvalModule, f as resolveEvalDefaultConfig, g as commitPendingCacheWrites, gt as deriveStatusFromChildStatuses, ht as deriveStatusFromCaseRows, i as isCaseChildMessage, kt as caseRowSchema, m as buildDeclaredColumnDefs, mt as deriveScopedSummaryFromCases, n as resolveRunnableEvalCases, o as stripTerminalControlCodes, t as filterEvalCases, u as runWithModuleIsolation, vt as matchesTagsFilter, xt as runManifestSchema, yt as validateEvalTagName } from "./runExecution-BMnJXWhN.mjs";
+import { Result, resultify } from "t-result";
 import { readFile, readdir, rm, writeFile } from "node:fs/promises";
 import { dirname, join } from "node:path";
 import { existsSync } from "node:fs";
-import { Result, resultify } from "t-result";
 import { fileURLToPath } from "node:url";
 import { spawn } from "node:child_process";
 //#region ../runner/src/chartValidation.ts

package/dist/{runner-CY3bgsjU.mjs → runner-BfHgVhGS.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { n as createRunner } from "./cli-SP4kEtYL.mjs";
-import "./src-7GbQj1sb.mjs";
+import { n as createRunner } from "./cli-ClAkjTvo.mjs";
+import "./src-DfzidkYr.mjs";
 //#region ../../apps/server/src/runner.ts
 let runnerInstance = null;
 function getRunnerInstance() {

package/dist/{runner-BlFQyvN2.mjs → runner-DJJekv9f.mjs} RENAMED Viewed

@@ -1,2 +1,2 @@
-import { n as initRunner, t as getRunnerInstance } from "./runner-CY3bgsjU.mjs";
+import { n as initRunner, t as getRunnerInstance } from "./runner-BfHgVhGS.mjs";
 export { getRunnerInstance, initRunner };

package/dist/{src-7GbQj1sb.mjs → src-DfzidkYr.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { G as matchesEvalTags$1, Pt as defineEval$1 } from "./runExecution-CFw0MQFs.mjs";
-import "./cli-SP4kEtYL.mjs";
+import { Ft as defineEval$1, K as matchesEvalTags$1 } from "./runExecution-BMnJXWhN.mjs";
+import "./cli-ClAkjTvo.mjs";
 //#region src/index.ts
 /** Register an eval definition with typed tag support. */
 function defineEval(definition) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ls-stack/agent-eval",
-  "version": "0.58.3",
+  "version": "0.58.5",
   "type": "module",
   "bin": {
     "agent-evals": "./dist/bin.mjs"
@@ -32,8 +32,8 @@
   "devDependencies": {
     "@types/node": "^24.7.2",
     "typescript": "^5.9.2",
-    "@agent-evals/runner": "0.0.1",
     "@agent-evals/sdk": "0.0.1",
+    "@agent-evals/runner": "0.0.1",
     "@agent-evals/shared": "0.0.1"
   },
   "scripts": {

package/skills/agent-eval/SKILL.md CHANGED Viewed

@@ -374,8 +374,12 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
   Useful trace helpers include `trace.findSpan(name)`, `trace.findSpans(name)`,
   `trace.hasSpan(name)`, `trace.findSpansByKind(kind)`,
   `trace.findToolCallSpans()`, `trace.listToolCallSpanNames()`,
-  `trace.hasToolCallSpan(name)`, `trace.listSpanNames(kind?)`,
-  `trace.listSpanNamesDfs(kind?)`, and `trace.flattenDfs()`.
+  `trace.hasToolCallSpan(name)`,
+  `trace.hasNToolCallSpans(toolName, expectedCalls)`,
+  `trace.listSpanNames(kind?)`, `trace.listSpanNamesDfs(kind?)`, and
+  `trace.flattenDfs()`.
+  The tool-call helpers include both `kind: 'tool'` spans and imported
+  execution spans recorded as `kind: 'tool_call'`.
 - `traceDisplay` promotes selected span attributes into the trace tree and
   detail pane; it supports aggregation across subtrees (`scope`, `mode`) and
   user-defined `transform(...)` for derived views (e.g. currency conversion).
@@ -385,9 +389,13 @@ See `EvalScoreDef` / `EvalManualScoreDef` in the types for the full shape
   `usage.*`, `latencyMs`, `input`, `output`, etc. read from conventional
   attribute paths. The default `steps` path reads an array from
   `span.attributes.steps`; if it is missing, direct child `model_step` spans are
-  shown as that call's steps. `latencyMs` is time to first token; duration,
-  total tokens, output tokens/sec, and USD costs are derived. Override `kinds`
-  to broaden the filter,
+  shown as that call's steps. Tool calls are aggregated from the configured
+  `toolCalls` path plus step-level `toolCalls` on authored step arrays or
+  direct `model_step` child spans, including Mastra's serialized
+  `mastra.model_step.output` format, and child `tool_call` execution spans
+  under each model step. `latencyMs` is time to first token; duration, total
+  tokens, output tokens/sec, and USD costs are derived. Override `kinds` to
+  broaden the filter,
   override `attributes.<field>` for non-default primitive span shapes, configure
   model-keyed `pricing` to derive USD costs from token counts, with nested
   `providers` entries for provider-specific rates, add `costCurrencies` to show
@@ -546,8 +554,8 @@ Mental model:
   JSON blob refs. Each namespace is capped at 100 entries by default. The runner
   prunes least recently accessed indexed entries after a run finishes and the
   runner stays idle for `cache.pruneIdleDelayMs ?? 5000` milliseconds. Configure
-  `cache.maxEntriesPerNamespace` for the default cap and
-  `cache.maxEntriesByNamespace` for exact namespace-specific caps.
+  `cache.maxEntries` as a number for the default cap, or as
+  `{ default, namespaces }` for exact namespace-specific caps.
 - Unindexed legacy cache files are ignored by normal lookup/listing. Use
   `agent-evals cache repair` to remove unindexed cache files, stale index rows,
   debug sidecars, and unreferenced blob files.