npm - @ls-stack/agent-eval - Versions diffs - 0.23.0 → 0.25.0 - Mend

@ls-stack/agent-eval 0.23.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/{app-Cw79dJDr.mjs → app-DS3j_AyX.mjs} +6 -3
package/dist/apps/web/dist/assets/index-DNsZjOms.css +1 -0
package/dist/apps/web/dist/assets/index-DqR1YaMG.js +118 -0
package/dist/apps/web/dist/index.html +2 -2
package/dist/bin.mjs +1 -1
package/dist/{cli-D3QNOcPN.mjs → cli-ETfZ15RB.mjs} +151 -42
package/dist/index.d.mts +111 -12
package/dist/index.mjs +4 -4
package/dist/runChild.mjs +28 -18
package/dist/{runOrchestration-CimthgI7.mjs → runOrchestration-B31SV_Bq.mjs} +364 -117
package/dist/{runner-B-SYzW8w.mjs → runner-B2f2TEjp.mjs} +1 -1
package/dist/{runner-4yNYRvmF.mjs → runner-cj1TkR-H.mjs} +2 -2
package/dist/src-CyNb2ycA.mjs +3 -0
package/package.json +1 -1
package/skills/agent-eval/SKILL.md +14 -6
package/dist/apps/web/dist/assets/index-AUDD3rNB.js +0 -118
package/dist/apps/web/dist/assets/index-r0dVFK0B.css +0 -1
package/dist/src-CcXfWT4M.mjs +0 -3

package/dist/{runOrchestration-CimthgI7.mjs → runOrchestration-B31SV_Bq.mjs} RENAMED Viewed

@@ -1330,7 +1330,7 @@ const errorCoreFields = new Set([
 	"stack",
 	"capturedAt"
 ]);
-function isRecord$4(value) {
+function isRecord$5(value) {
 	return typeof value === "object" && value !== null && !Array.isArray(value);
 }
 function formatUnknownErrorMessage(error) {
@@ -1358,7 +1358,7 @@ function normalizeTraceError(error, capturedAt = void 0) {
 		stack: error.stack,
 		capturedAt
 	};
-	if (isRecord$4(error)) {
+	if (isRecord$5(error)) {
 		const extraFields = getErrorExtraFields(error);
 		const name = typeof error.name === "string" ? error.name : void 0;
 		const stack = typeof error.stack === "string" ? error.stack : void 0;
@@ -1383,7 +1383,7 @@ function normalizeTraceWarnings(warningOrWarnings, additionalWarnings, capturedA
 	return (additionalWarnings.length > 0 ? [warningOrWarnings, ...additionalWarnings] : Array.isArray(warningOrWarnings) ? warningOrWarnings : [warningOrWarnings]).map((warning) => normalizeTraceError(warning, capturedAt));
 }
 function isCaptureEvalSpanErrorOptions(value) {
-	if (!isRecord$4(value)) return false;
+	if (!isRecord$5(value)) return false;
 	const keys = Object.keys(value);
 	if (keys.length === 0) return false;
 	if (!keys.every((key) => key === "level")) return false;
@@ -1506,7 +1506,7 @@ function createTraceCache(generateSpanId) {
 				namespace,
 				operationType: "value",
 				operationName: info.name,
-				storedAt: (/* @__PURE__ */ new Date()).toISOString(),
+				storedAt: new Date(getRealDateNowMs()).toISOString(),
 				codeFingerprint: cacheCtx.codeFingerprint,
 				recording: await serializeCacheRecording(recording)
 			}, {
@@ -1940,7 +1940,7 @@ async function traceSpanInternal(info, fn) {
 					operationName: info.name,
 					spanName: info.name,
 					spanKind: info.kind,
-					storedAt: (/* @__PURE__ */ new Date()).toISOString(),
+					storedAt: new Date(getRealDateNowMs()).toISOString(),
 					codeFingerprint: ctx.codeFingerprint,
 					recording: await serializeCacheRecording(recording)
 				};
@@ -2541,6 +2541,12 @@ const evalStatItemSchema = z.discriminatedUnion("kind", [
 const evalStatsConfigSchema = z.array(evalStatItemSchema);
 /** Schema summarizing a discovered eval for list and overview screens. */
 const evalSummarySchema = z.object({
+	/**
+	* Stable eval identity derived from the workspace-relative file path and
+	* authored eval id. Older clients should display `id`; callers that need an
+	* exact eval must use `key`.
+	*/
+	key: z.string().default(""),
 	id: z.string(),
 	title: z.string().optional(),
 	/** Eval file path relative to the active workspace root. */
@@ -2580,6 +2586,16 @@ const evalSummarySchema = z.object({
 });
 /** Schema for one case row in an eval run result table. */
 const caseRowSchema = z.object({
+	/**
+	* Stable eval identity for this case row. Legacy rows may omit it and fall
+	* back to `evalId`.
+	*/
+	evalKey: z.string().optional(),
+	/**
+	* Stable case identity derived from file path, eval id, and case id. Legacy
+	* rows may omit it and fall back to `caseId`.
+	*/
+	caseKey: z.string().optional(),
 	caseId: z.string(),
 	evalId: z.string(),
 	status: z.enum([
@@ -2657,6 +2673,10 @@ const scoreTraceSchema = z.object({
 });
 /** Schema for the detailed payload shown when opening a specific case. */
 const caseDetailSchema = z.object({
+	/** Stable eval identity for this case detail. */
+	evalKey: z.string().optional(),
+	/** Stable case identity for this case detail. */
+	caseKey: z.string().optional(),
 	caseId: z.string(),
 	evalId: z.string(),
 	status: z.enum([
@@ -2694,6 +2714,36 @@ const caseDetailSchema = z.object({
 	*/
 	cacheRefs: z.array(traceCacheRefSchema).default([])
 });
+/** Schema for discovery problems that should be shown before running evals. */
+const discoveryIssueSchema = z.object({
+	type: z.enum(["duplicate-eval-id"]),
+	severity: z.enum(["error"]),
+	filePath: z.string(),
+	evalId: z.string(),
+	message: z.string()
+});
+//#endregion
+//#region ../shared/src/evalIdentity.ts
+/** Build the stable identity for one eval inside a workspace. */
+function buildEvalKey(params) {
+	return `${encodeURIComponent(params.filePath)}#${encodeURIComponent(params.evalId)}`;
+}
+/** Build the stable identity for one eval case inside a workspace. */
+function buildCaseKey(params) {
+	return [
+		encodeURIComponent(params.filePath),
+		encodeURIComponent(params.evalId),
+		encodeURIComponent(params.caseId)
+	].join("#");
+}
+/** Return the collision-safe eval key stored on a row, falling back for legacy data. */
+function getCaseRowEvalKey(row) {
+	return row.evalKey ?? row.evalId;
+}
+/** Return the collision-safe case key stored on a row, falling back for legacy data. */
+function getCaseRowCaseKey(row) {
+	return row.caseKey ?? row.caseId;
+}
 //#endregion
 //#region ../shared/src/schemas/config.ts
 /** Strategy used to collapse repeated trials into one stored case result. */
@@ -2727,13 +2777,16 @@ const apiCallMetricFormatSchema = llmCallMetricFormatSchema;
 const llmCallMetricPlacementSchema = z.enum(["header", "body"]);
 /** Where an API-call metric is rendered inside the API calls tab. */
 const apiCallMetricPlacementSchema = llmCallMetricPlacementSchema;
+const callDerivedAttributeSchema = z.custom((value) => typeof value === "function", { message: "Expected a derived attribute function" });
 /**
 * Schema for a single user-defined metric attached to LLM call rows.
 *
 * Each metric reads `path` from the span's `attributes` and renders the value
-* with the configured `format` and `numberFormat`. `placements` controls
-* whether the metric appears as a chip on the collapsed row header, as a row
-* inside the expanded body, or both. Defaults to `['body']` when omitted.
+* with the configured `format` and `numberFormat`. Use
+* `llmCalls.derivedAttributes` when a metric should read a value computed from
+* other attributes. `placements` controls whether the metric appears as a chip
+* on the collapsed row header, as a row inside the expanded body, or both.
+* Defaults to `['body']` when omitted.
 */
 const llmCallMetricSchema = z.object({
 	/** Display label for the metric row or header chip. */
@@ -2760,9 +2813,11 @@ const llmCallMetricSchema = z.object({
 * Schema for a single user-defined metric attached to API call rows.
 *
 * Each metric reads `path` from the span's `attributes` and renders the value
-* with the configured `format` and `numberFormat`. `placements` controls
-* whether the metric appears as a chip on the collapsed row header, as a row
-* inside the expanded body, or both. Defaults to `['body']` when omitted.
+* with the configured `format` and `numberFormat`. Use
+* `apiCalls.derivedAttributes` when a metric should read a value computed from
+* other attributes. `placements` controls whether the metric appears as a chip
+* on the collapsed row header, as a row inside the expanded body, or both.
+* Defaults to `['body']` when omitted.
 */
 const apiCallMetricSchema = z.object({
 	/** Display label for the metric row or header chip. */
@@ -2839,6 +2894,13 @@ const llmCallsConfigSchema = z.object({
 		toolCalls: z.string().optional()
 	}).optional(),
 	/**
+	* Derived attributes persisted onto every matching LLM span before
+	* `deriveFromTracing`, default outputs, trace display, and call metrics read
+	* the trace. Keys are dot-paths under `span.attributes`; return `undefined`
+	* to skip writing the attribute for one span.
+	*/
+	derivedAttributes: z.record(z.string().min(1), callDerivedAttributeSchema).optional(),
+	/**
 	* Model/provider pricing registry used to calculate LLM-call costs from
 	* token counts. Built-in LLM cost fields are only derived from this registry.
 	*/
@@ -2867,6 +2929,13 @@ const apiCallsConfigSchema = z.object({
 		durationMs: z.string().optional(),
 		error: z.string().optional()
 	}).optional(),
+	/**
+	* Derived attributes persisted onto every matching API span before trace
+	* display and call metrics read the trace. Keys are dot-paths under
+	* `span.attributes`; return `undefined` to skip writing the attribute for
+	* one span.
+	*/
+	derivedAttributes: z.record(z.string().min(1), callDerivedAttributeSchema).optional(),
 	/** Custom user-defined metrics surfaced on each API call. */
 	metrics: z.array(apiCallMetricSchema).optional()
 });
@@ -2898,6 +2967,7 @@ const DEFAULT_LLM_CALLS_CONFIG = {
 		reasoning: "reasoning",
 		toolCalls: "toolCalls"
 	},
+	derivedAttributes: [],
 	metrics: [],
 	pricing: []
 };
@@ -2921,8 +2991,35 @@ const DEFAULT_API_CALLS_CONFIG = {
 		durationMs: "durationMs",
 		error: "error"
 	},
+	derivedAttributes: [],
 	metrics: []
 };
+function resolveDerivedAttributes(input) {
+	return Object.entries(input ?? {}).map(([path, compute]) => ({
+		path,
+		compute
+	}));
+}
+function resolveLlmCallMetric(metric) {
+	return {
+		label: metric.label,
+		tooltip: metric.tooltip,
+		path: metric.path,
+		format: metric.format ?? "string",
+		numberFormat: metric.numberFormat,
+		placements: metric.placements ? [...metric.placements] : ["body"]
+	};
+}
+function resolveApiCallMetric(metric) {
+	return {
+		label: metric.label,
+		tooltip: metric.tooltip,
+		path: metric.path,
+		format: metric.format ?? "string",
+		numberFormat: metric.numberFormat,
+		placements: metric.placements ? [...metric.placements] : ["body"]
+	};
+}
 /**
 * Resolve the user-authored LLM-calls config to a fully-defaulted shape used
 * by the UI to derive the LLM calls tab.
@@ -2942,14 +3039,8 @@ function resolveLlmCallsConfig(input) {
 			...DEFAULT_LLM_CALLS_CONFIG.attributes,
 			...input?.attributes
 		},
-		metrics: (input?.metrics ?? []).map((m) => ({
-			label: m.label,
-			tooltip: m.tooltip,
-			path: m.path,
-			format: m.format ?? "string",
-			numberFormat: m.numberFormat,
-			placements: m.placements ? [...m.placements] : ["body"]
-		})),
+		derivedAttributes: resolveDerivedAttributes(input?.derivedAttributes),
+		metrics: (input?.metrics ?? []).map(resolveLlmCallMetric),
 		pricing: (input?.pricing ?? []).map((p) => ({
 			model: p.model,
 			provider: p.provider,
@@ -2979,14 +3070,8 @@ function resolveApiCallsConfig(input) {
 			...DEFAULT_API_CALLS_CONFIG.attributes,
 			...input?.attributes
 		},
-		metrics: (input?.metrics ?? []).map((m) => ({
-			label: m.label,
-			tooltip: m.tooltip,
-			path: m.path,
-			format: m.format ?? "string",
-			numberFormat: m.numberFormat,
-			placements: m.placements ? [...m.placements] : ["body"]
-		}))
+		derivedAttributes: resolveDerivedAttributes(input?.derivedAttributes),
+		metrics: (input?.metrics ?? []).map(resolveApiCallMetric)
 	};
 }
 /** Zod schema for validating `agent-evals.config.ts` input. */
@@ -3037,8 +3122,8 @@ const runManifestSchema = z.object({
 	*/
 	commitSha: z.string().nullable().optional().default(null),
 	/**
-	* Eval-file fingerprints captured for this run, keyed by eval id. Older
-	* persisted runs may not include this field.
+	* Eval-file fingerprints captured for this run, keyed by exact eval key.
+	* Older persisted runs may use authored eval ids or omit this field.
 	*/
 	evalSourceFingerprints: z.record(z.string(), z.string()).optional().default({}),
 	target: z.object({
@@ -3047,6 +3132,10 @@ const runManifestSchema = z.object({
 			"evalIds",
 			"caseIds"
 		]),
+		/** Exact stable eval identities (`filePath + evalId`) selected by UI/API callers. */
+		evalKeys: z.array(z.string()).optional(),
+		/** Workspace-relative file paths or glob patterns used to filter selected evals. */
+		files: z.array(z.string()).optional(),
 		evalIds: z.array(z.string()).optional(),
 		caseIds: z.array(z.string()).optional()
 	}),
@@ -3206,7 +3295,7 @@ function getEvalTitle(evalLike) {
 }
 //#endregion
 //#region ../shared/src/utils/getNestedAttribute.ts
-function isRecord$3(value) {
+function isRecord$4(value) {
 	return typeof value === "object" && value !== null;
 }
 /**
@@ -3221,12 +3310,84 @@ function getNestedAttribute(value, path) {
 	const parts = path.split(".");
 	let current = value;
 	for (const part of parts) {
-		if (!isRecord$3(current) || !(part in current)) return;
+		if (!isRecord$4(current) || !(part in current)) return;
 		current = current[part];
 	}
 	return current;
 }
 //#endregion
+//#region ../shared/src/utils/deriveCallAttributes.ts
+function isRecord$3(value) {
+	return typeof value === "object" && value !== null;
+}
+function mergeNestedAttribute$1(value, path, attributeValue) {
+	const root = value === void 0 ? {} : { ...value };
+	const parts = path.split(".");
+	let current = root;
+	for (const [index, part] of parts.entries()) {
+		if (index === parts.length - 1) {
+			current[part] = attributeValue;
+			continue;
+		}
+		const nextValue = current[part];
+		const nextRecord = isRecord$3(nextValue) ? { ...nextValue } : {};
+		current[part] = nextRecord;
+		current = nextRecord;
+	}
+	return root;
+}
+function applyDerivedAttributesForKind(params) {
+	let attributes = params.span.attributes;
+	for (const derivedAttribute of params.derivedAttributes) {
+		if (derivedAttribute.compute === void 0) continue;
+		const span = {
+			...params.span,
+			attributes
+		};
+		const value = (() => {
+			try {
+				return derivedAttribute.compute({
+					attributes,
+					span,
+					get: (path) => getNestedAttribute(attributes, path)
+				});
+			} catch {
+				return;
+			}
+		})();
+		if (value === void 0) continue;
+		attributes = mergeNestedAttribute$1(attributes, derivedAttribute.path, value);
+	}
+	if (attributes === params.span.attributes) return params.span;
+	return {
+		...params.span,
+		attributes
+	};
+}
+/**
+* Persist configured derived attributes onto matching LLM/API spans.
+*
+* These derived attributes are applied before trace consumers run, so
+* `deriveFromTracing`, default usage extraction, trace display, and call
+* metrics can all read them by normal dot-path lookup.
+*/
+function applyDerivedCallAttributes(params) {
+	const llmKinds = new Set(params.llmCallsConfig.kinds);
+	const apiKinds = new Set(params.apiCallsConfig.kinds);
+	return params.spans.map((span) => {
+		let nextSpan = span;
+		if (llmKinds.has(span.kind)) nextSpan = applyDerivedAttributesForKind({
+			span: nextSpan,
+			derivedAttributes: params.llmCallsConfig.derivedAttributes
+		});
+		if (apiKinds.has(span.kind)) nextSpan = applyDerivedAttributesForKind({
+			span: nextSpan,
+			derivedAttributes: params.apiCallsConfig.derivedAttributes
+		});
+		return nextSpan;
+	});
+}
+//#endregion
 //#region ../shared/src/utils/extractLlmCalls.ts
 function readNumber$2(attributes, path) {
 	const raw = getNestedAttribute(attributes, path);
@@ -3701,6 +3862,10 @@ const createRunRequestSchema = z.object({
 			"evalIds",
 			"caseIds"
 		]),
+		/** Exact stable eval identities (`filePath + evalId`) selected by UI/API callers. */
+		evalKeys: z.array(z.string()).optional(),
+		/** Workspace-relative file paths or glob patterns used to filter selected evals. */
+		files: z.array(z.string()).optional(),
 		evalIds: z.array(z.string()).optional(),
 		caseIds: z.array(z.string()).optional()
 	}),
@@ -4671,7 +4836,8 @@ function addDefaultOutputs(params) {
 //#region ../runner/src/discovery.ts
 const evalIdMatchRegex = /\bid\s*:\s*['"]([^'"]+)['"]/;
 const evalTitleMatchRegex = /\btitle\s*:\s*['"]([^'"]+)['"]/;
-function parseEvalMetas(filePath, content) {
+/** Parse static eval metadata and discovery issues from one eval file. */
+function parseEvalDiscovery(filePath, content) {
 	const metas = [];
 	let searchIndex = 0;
 	while (searchIndex < content.length) {
@@ -4694,7 +4860,20 @@ function parseEvalMetas(filePath, content) {
 		}
 		searchIndex = extracted.nextIndex;
 	}
-	return metas;
+	const countsById = /* @__PURE__ */ new Map();
+	for (const meta of metas) countsById.set(meta.id, (countsById.get(meta.id) ?? 0) + 1);
+	const duplicateIds = new Set([...countsById].filter(([, count]) => count > 1).map(([id]) => id));
+	const issues = [...duplicateIds].map((evalId) => ({
+		type: "duplicate-eval-id",
+		severity: "error",
+		filePath,
+		evalId,
+		message: `Duplicate eval id "${evalId}" in ${filePath}. Eval ids must be unique within one file.`
+	}));
+	return {
+		metas: metas.filter((meta) => !duplicateIds.has(meta.id)),
+		issues
+	};
 }
 function extractDefineEvalObject(content, defineEvalIndex) {
 	const openParenIndex = content.indexOf("(", defineEvalIndex);
@@ -4813,40 +4992,6 @@ function getRunFreshnessTimestamp(manifest) {
 	return manifest.endedAt ?? manifest.startedAt;
 }
 //#endregion
-//#region ../runner/src/evalSummaries.ts
-/** Build the API/UI summary payload for one discovered eval. */
-function buildEvalSummary(params) {
-	const { meta, config, gitState, latestRun, lastRunStatus } = params;
-	const { sourceFingerprint, ...summaryMeta } = meta;
-	const freshness = deriveEvalFreshness({
-		latestRun,
-		gitState,
-		currentEvalSourceFingerprint: sourceFingerprint,
-		staleAfterDays: config.staleAfterDays ?? 14
-	});
-	return {
-		...summaryMeta,
-		stale: freshness.stale,
-		outdated: freshness.outdated,
-		freshnessStatus: freshness.freshnessStatus,
-		latestRunAt: latestRun?.startedAt ?? null,
-		latestRunCommitSha: latestRun?.commitSha ?? null,
-		currentCommitSha: gitState.commitSha,
-		lastRunStatus
-	};
-}
-/** Resolve which eval ids a run request should mark as the latest run. */
-function getTargetEvalIds(params) {
-	const { request, sortedEvalIds, knownEvalIds } = params;
-	if (request.target.evalIds && request.target.evalIds.length > 0) return request.target.evalIds.filter((evalId) => knownEvalIds.has(evalId));
-	return sortedEvalIds;
-}
-/** Write one latest-run snapshot to each targeted eval id. */
-function setLatestRunInfoMap(params) {
-	const { latestRunInfoMap, evalIds, info } = params;
-	for (const evalId of evalIds) latestRunInfoMap.set(evalId, info);
-}
-//#endregion
 //#region ../runner/src/outputArtifacts.ts
 const mimeTypeExtensionMap = {
 	"application/json": ".json",
@@ -4957,9 +5102,9 @@ function recomputePersistedCaseStatus(caseRow, caseDetail, scoreThresholds) {
 	return caseRow.status === "error" ? "error" : "pass";
 }
 function runTouchesEval(params) {
-	if (params.caseRows.some((caseRow) => caseRow.evalId === params.evalId)) return true;
+	if (params.caseRows.some((caseRow) => getCaseRowEvalKey(caseRow) === params.evalKey || caseRow.evalKey === void 0 && caseRow.evalId === params.evalId)) return true;
 	if (params.target.mode === "all") return params.evalExists;
-	if (params.target.mode === "evalIds") return params.target.evalIds?.includes(params.evalId) ?? false;
+	if (params.target.mode === "evalIds") return params.target.evalKeys?.includes(params.evalKey) ?? params.target.evalIds?.includes(params.evalId ?? params.evalKey) ?? false;
 	return false;
 }
 async function recomputeEvalStatusesInRuns(params) {
@@ -4968,14 +5113,15 @@ async function recomputeEvalStatusesInRuns(params) {
 		if (!runTouchesEval({
 			target: run.manifest.target,
 			caseRows: run.cases,
+			evalKey: params.evalKey,
 			evalId: params.evalId,
 			evalExists: params.evalExists
 		})) continue;
 		if (run.manifest.status === "running") continue;
 		let changed = false;
 		for (const caseRow of run.cases) {
-			if (caseRow.evalId !== params.evalId) continue;
-			const caseDetail = run.caseDetails.get(caseRow.caseId);
+			if (getCaseRowEvalKey(caseRow) !== params.evalKey && !(caseRow.evalKey === void 0 && caseRow.evalId === params.evalId)) continue;
+			const caseDetail = run.caseDetails.get(getCaseRowCaseKey(caseRow));
 			const nextStatus = recomputePersistedCaseStatus(caseRow, caseDetail, params.scoreThresholds);
 			if (caseRow.status === nextStatus) continue;
 			caseRow.status = nextStatus;
@@ -5043,8 +5189,8 @@ async function loadPersistedRunSnapshots(localStateDir) {
 	}
 	return snapshots;
 }
-async function persistCaseDetail(runDir, caseDetail) {
-	await writeFile(join(runDir, "case-details", `${encodeCaseDetailFileName(caseDetail.caseId)}.json`), JSON.stringify(caseDetail, null, 2));
+async function persistCaseDetail(runDir, caseDetail, fileId = caseDetail.caseId) {
+	await writeFile(join(runDir, "case-details", `${encodeCaseDetailFileName(fileId)}.json`), JSON.stringify(caseDetail, null, 2));
 }
 function getLastRunStatuses(params) {
 	const latestRunInfos = getLatestRunInfos(params);
@@ -5057,14 +5203,15 @@ function getLastRunStatuses(params) {
 function getLatestRunInfos(params) {
 	const { runs, knownEvals } = params;
 	const knownEvalMetas = [...knownEvals];
-	const manualScoreKeysByEval = new Map(knownEvalMetas.map((evalMeta) => [evalMeta.id, evalMeta.columnDefs.filter((columnDef) => columnDef.isManualScore === true).map((columnDef) => columnDef.key)]));
+	const evalIdByKey = new Map(knownEvalMetas.map((evalMeta) => [evalMeta.key, evalMeta.id]));
+	const manualScoreKeysByEval = new Map(knownEvalMetas.map((evalMeta) => [evalMeta.key, evalMeta.columnDefs.filter((columnDef) => columnDef.isManualScore === true).map((columnDef) => columnDef.key)]));
 	const orderedRuns = [...runs].toSorted((a, b) => new Date(getRunFreshnessTimestamp(a.manifest)).getTime() - new Date(getRunFreshnessTimestamp(b.manifest)).getTime());
 	const latestRunInfos = /* @__PURE__ */ new Map();
-	for (const run of orderedRuns) for (const evalId of getRunEvalIds(run, knownEvalMetas.map((evalMeta) => evalMeta.id))) latestRunInfos.set(evalId, {
-		status: getEvalStatusForRun(run, evalId, manualScoreKeysByEval.get(evalId) ?? []),
+	for (const run of orderedRuns) for (const evalKey of getRunEvalKeys(run, knownEvalMetas)) latestRunInfos.set(evalKey, {
+		status: getEvalStatusForRun(run, evalKey, evalIdByKey.get(evalKey), manualScoreKeysByEval.get(evalKey) ?? []),
 		startedAt: getRunFreshnessTimestamp(run.manifest),
 		commitSha: run.manifest.commitSha ?? null,
-		evalSourceFingerprint: run.manifest.evalSourceFingerprints[evalId] ?? null
+		evalSourceFingerprint: run.manifest.evalSourceFingerprints[evalKey] ?? run.manifest.evalSourceFingerprints[evalIdByKey.get(evalKey) ?? ""] ?? null
 	});
 	return latestRunInfos;
 }
@@ -5117,18 +5264,25 @@ async function readCaseDetails(runDir) {
 		if (!entry.isFile() || !entry.name.endsWith(".json")) continue;
 		const detail = await readParsedJsonFile(join(detailsDir, entry.name), { safeParse: caseDetailSchema.safeParse.bind(caseDetailSchema) });
 		if (!detail) continue;
-		caseDetails.set(detail.caseId, detail);
+		caseDetails.set(detail.caseKey ?? detail.caseId, detail);
 	}
 	return caseDetails;
 }
-function getRunEvalIds(run, knownEvalIds) {
-	const evalIds = new Set(run.cases.map((caseRow) => caseRow.evalId));
-	if (run.manifest.target.mode === "evalIds") for (const evalId of run.manifest.target.evalIds ?? []) evalIds.add(evalId);
-	else if (run.manifest.target.mode === "all" && evalIds.size === 0) for (const evalId of knownEvalIds) evalIds.add(evalId);
-	return [...evalIds];
-}
-function getEvalStatusForRun(run, evalId, manualScoreKeys) {
-	const evalCases = run.cases.filter((caseRow) => caseRow.evalId === evalId);
+function getRunEvalKeys(run, knownEvals) {
+	const knownEvalMetas = [...knownEvals];
+	const evalKeys = new Set(run.cases.map(getCaseRowEvalKey));
+	for (const caseRow of run.cases) {
+		if (caseRow.evalKey !== void 0) continue;
+		for (const evalMeta of knownEvalMetas) if (evalMeta.id === caseRow.evalId) evalKeys.add(evalMeta.key);
+	}
+	if (run.manifest.target.mode === "evalIds") {
+		for (const evalKey of run.manifest.target.evalKeys ?? []) evalKeys.add(evalKey);
+		for (const evalId of run.manifest.target.evalIds ?? []) for (const evalMeta of knownEvalMetas) if (evalMeta.id === evalId) evalKeys.add(evalMeta.key);
+	} else if (run.manifest.target.mode === "all" && evalKeys.size === 0) for (const evalMeta of knownEvalMetas) evalKeys.add(evalMeta.key);
+	return [...evalKeys];
+}
+function getEvalStatusForRun(run, evalKey, evalId, manualScoreKeys) {
+	const evalCases = run.cases.filter((caseRow) => getCaseRowEvalKey(caseRow) === evalKey || caseRow.evalKey === void 0 && caseRow.evalId === evalId);
 	if (evalCases.length > 0) {
 		if (hasPendingManualScores(evalCases, manualScoreKeys)) return "unscored";
 		return toLastRunStatus$1(deriveStatusFromCaseRows({ caseRows: evalCases }));
@@ -5299,8 +5453,7 @@ function resolveTracePresentation(spans, globalTraceDisplay, evalTraceDisplay) {
 }
 //#endregion
 //#region ../runner/src/runExecution.ts
-function filterEvalCases(cases, evalIds, caseIds, evalId) {
-	if (evalIds && evalIds.length > 0 && !evalIds.includes(evalId)) return [];
+function filterEvalCases(cases, caseIds) {
 	if (!caseIds || caseIds.length === 0) return cases;
 	const selectedCaseIds = new Set(caseIds);
 	return cases.filter((evalCase) => selectedCaseIds.has(evalCase.id));
@@ -5329,13 +5482,18 @@ async function callWithUnknownResult(fn, args) {
 	return await Reflect.apply(fn, void 0, args);
 }
 async function runCase(params) {
-	const { evalDef, evalId, evalCase, globalTraceDisplay, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, codeFingerprint, moduleIsolation, evalFilePath, workspaceRoot, artifactDir, runId } = params;
+	const { evalDef, evalId, evalKey = evalId, evalCase, globalTraceDisplay, llmCallsConfig = resolveLlmCallsConfig(void 0), apiCallsConfig = resolveApiCallsConfig(void 0), globalRemoveDefaultConfig, trial, startTime, cacheAdapter, cacheMode, codeFingerprint, moduleIsolation, evalFilePath, evalFileRelativePath = evalFilePath, workspaceRoot, artifactDir, runId } = params;
 	const scopedIdPrefix = buildScopedEvalIdPrefix({
 		evalId,
 		evalFilePath,
 		caseId: evalCase.id,
 		workspaceRoot
 	});
+	const caseKey = buildCaseKey({
+		filePath: evalFileRelativePath,
+		evalId,
+		caseId: evalCase.id
+	});
 	const { scope, error: executeError } = await runInEvalScope(evalCase.id, async () => {
 		const execute = async () => {
 			await Reflect.apply(evalDef.execute, evalDef, [{
@@ -5361,7 +5519,12 @@ async function runCase(params) {
 		startTime: evalDef.startTime,
 		freezeTime: evalDef.freezeTime
 	});
-	const traceTree = buildTraceTree(scope.spans, scope.checkpoints);
+	const spansWithDerivedAttributes = applyDerivedCallAttributes({
+		spans: scope.spans,
+		llmCallsConfig,
+		apiCallsConfig
+	});
+	const traceTree = buildTraceTree(spansWithDerivedAttributes, scope.checkpoints);
 	const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
 	if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) scope.assertionFailures.push(toAssertionFailure(executeError.message, executeError));
 	if (!nonAssertError && evalDef.deriveFromTracing) {
@@ -5383,7 +5546,7 @@ async function runCase(params) {
 	}
 	if (!nonAssertError) addDefaultOutputs({
 		outputs: scope.outputs,
-		spans: scope.spans,
+		spans: spansWithDerivedAttributes,
 		llmCallsConfig,
 		apiCallsConfig,
 		globalRemove: globalRemoveDefaultConfig,
@@ -5471,7 +5634,7 @@ async function runCase(params) {
 		}
 	}
 	const status = nonAssertError ? "error" : passed ? "pass" : "fail";
-	const { trace: displayTrace, traceDisplay } = resolveTracePresentation(scope.spans, globalTraceDisplay, evalDef.traceDisplay);
+	const { trace: displayTrace, traceDisplay } = resolveTracePresentation(spansWithDerivedAttributes, globalTraceDisplay, evalDef.traceDisplay);
 	const columns = {};
 	const columnOverrides = mergeDefaultColumns({
 		columns: evalDef.columns,
@@ -5496,6 +5659,8 @@ async function runCase(params) {
 		stack: nonAssertError.stack
 	} : null;
 	const caseDetail = {
+		evalKey,
+		caseKey,
 		caseId: evalCase.id,
 		evalId,
 		status,
@@ -5581,6 +5746,56 @@ async function executeQueuedCase(params) {
 	await queuedCase.onComplete(result);
 }
 //#endregion
+//#region ../runner/src/targeting.ts
+function escapeRegex(value) {
+	return value.replace(/[|\\{}()[\]^$+?.]/g, "\\$&");
+}
+function globToRegex(pattern) {
+	const normalized = pattern.replaceAll("\\", "/");
+	let regex = "^";
+	for (let i = 0; i < normalized.length; i++) {
+		const char = normalized[i];
+		const next = normalized[i + 1];
+		if (char === "*" && next === "*") {
+			regex += ".*";
+			i++;
+		} else if (char === "*") regex += "[^/]*";
+		else if (char === "?") regex += "[^/]";
+		else regex += escapeRegex(char ?? "");
+	}
+	regex += "$";
+	return new RegExp(regex);
+}
+function fileMatches(pattern, filePath) {
+	const normalizedPattern = pattern.replaceAll("\\", "/");
+	if (normalizedPattern === filePath) return true;
+	return globToRegex(normalizedPattern).test(filePath);
+}
+function matchesFiles(evalMeta, files) {
+	if (files === void 0 || files.length === 0) return true;
+	return files.some((file) => fileMatches(file, evalMeta.filePath));
+}
+function matchesEvalIds(evalMeta, evalIds) {
+	if (evalIds === void 0 || evalIds.length === 0) return true;
+	return evalIds.includes(evalMeta.id);
+}
+function matchesEvalKeys(evalMeta, evalKeys) {
+	if (evalKeys === void 0 || evalKeys.length === 0) return true;
+	return evalKeys.includes(evalMeta.key);
+}
+/** Return the discovered evals selected by a run target. */
+function getTargetEvals(params) {
+	const { target } = params.request;
+	return [...params.evals].filter((evalMeta) => matchesEvalKeys(evalMeta, target.evalKeys)).filter((evalMeta) => matchesEvalIds(evalMeta, target.evalIds)).filter((evalMeta) => matchesFiles(evalMeta, target.files)).toSorted((a, b) => a.filePath.localeCompare(b.filePath));
+}
+/** Resolve which exact eval keys a run request can affect. */
+function getTargetEvalKeys(params) {
+	return getTargetEvals({
+		evals: params.sortedEvals,
+		request: params.request
+	}).map((evalMeta) => evalMeta.key);
+}
+//#endregion
 //#region ../runner/src/runOrchestration.ts
 /**
 * Ranks case statuses from worst to best. Used to order trial attempts so the
@@ -5631,6 +5846,20 @@ function formatUnknownErrorDetails(error) {
 	if (typeof error === "string") return error;
 	return String(error);
 }
+function findDuplicateCaseIds(cases) {
+	const counts = /* @__PURE__ */ new Map();
+	for (const evalCase of cases) counts.set(evalCase.id, (counts.get(evalCase.id) ?? 0) + 1);
+	return [...counts].filter(([, count]) => count > 1).map(([caseId]) => caseId).toSorted();
+}
+function findAmbiguousTargetCaseIds(preparedEvals) {
+	const ownersByCaseId = /* @__PURE__ */ new Map();
+	for (const preparedEval of preparedEvals) for (const preparedCase of preparedEval.preparedCases) {
+		const owners = ownersByCaseId.get(preparedCase.caseId) ?? /* @__PURE__ */ new Set();
+		owners.add(`${preparedEval.evalMeta.filePath}#${preparedEval.evalMeta.id}`);
+		ownersByCaseId.set(preparedCase.caseId, owners);
+	}
+	return [...ownersByCaseId].filter(([, owners]) => owners.size > 1).map(([caseId, owners]) => `${caseId} (${[...owners].join(", ")})`);
+}
 function buildRunErrorMessage(errors) {
 	return errors.map((entry) => {
 		const [firstLine, ...detailLines] = entry.details.split("\n");
@@ -5650,14 +5879,15 @@ async function finalizePreparedCase(params) {
 		scoreKeys: preparedEval.scoreKeys
 	});
 	if (winningTrial.bufferedCacheStore !== null) await winningTrial.bufferedCacheStore.commit();
+	const artifactFileId = getCaseArtifactFileId(runState, winningTrial.caseRow);
 	runState.cases.push(winningTrial.caseRow);
-	runState.caseDetails.set(preparedCase.caseId, winningTrial.caseDetail);
+	runState.caseDetails.set(getCaseRowCaseKey(winningTrial.caseRow), winningTrial.caseDetail);
 	preparedEval.mergeColumns(winningTrial.caseDetail.columns);
 	if (winningTrial.caseRow.status === "pass") runState.summary.passedCases++;
 	else if (winningTrial.caseRow.status === "error") runState.summary.errorCases++;
 	else runState.summary.failedCases++;
-	await writeFile(join(runDir, "traces", `${preparedCase.caseId}.json`), JSON.stringify(winningTrial.caseDetail.trace, null, 2));
-	await persistCaseDetail(runDir, winningTrial.caseDetail);
+	await writeFile(join(runDir, "traces", `${encodeURIComponent(artifactFileId)}.json`), JSON.stringify(winningTrial.caseDetail.trace, null, 2));
+	await persistCaseDetail(runDir, winningTrial.caseDetail, artifactFileId);
 	onCaseFinished?.(winningTrial.caseDetail, winningTrial.caseRow);
 	emitEvent(runState, {
 		type: "case.finished",
@@ -5668,20 +5898,24 @@ async function finalizePreparedCase(params) {
 	preparedEval.evalCaseRows.push(winningTrial.caseRow);
 }
 function getPreparedCaseOrderKey(caseRow) {
-	return `${caseRow.evalId}\u0000${caseRow.caseId}`;
+	return `${caseRow.evalKey ?? caseRow.evalId}\u0000${caseRow.caseId}`;
+}
+function getCaseArtifactFileId(runState, caseRow) {
+	const caseKey = getCaseRowCaseKey(caseRow);
+	return runState.cases.some((existing) => existing.caseId === caseRow.caseId && getCaseRowCaseKey(existing) !== caseKey) ? caseKey : caseRow.caseId;
 }
 function sortCaseRowsByPreparedOrder(caseRows, preparedEvals) {
 	const orderByCase = /* @__PURE__ */ new Map();
 	let order = 0;
 	for (const preparedEval of preparedEvals) for (const preparedCase of preparedEval.preparedCases) {
-		orderByCase.set(`${preparedEval.evalMeta.id}\u0000${preparedCase.caseId}`, order);
+		orderByCase.set(`${preparedEval.evalMeta.key}\u0000${preparedCase.caseId}`, order);
 		order++;
 	}
 	caseRows.sort((left, right) => {
 		return (orderByCase.get(getPreparedCaseOrderKey(left)) ?? Number.MAX_SAFE_INTEGER) - (orderByCase.get(getPreparedCaseOrderKey(right)) ?? Number.MAX_SAFE_INTEGER);
 	});
 }
-async function executeRun({ runState, request, runDir, config, evals, cacheStore, lastRunStatusMap, latestRunInfoMap, emitEvent, emitDiscoveryEvent, workspaceRoot, getSourceFingerprint, getConfiguredConcurrency, getSortedEvalMetas, getTargetEvals, onCaseFinished }) {
+async function executeRun({ runState, request, runDir, config, cacheStore, lastRunStatusMap, latestRunInfoMap, emitEvent, emitDiscoveryEvent, workspaceRoot, getSourceFingerprint, getConfiguredConcurrency, getSortedEvalMetas, getTargetEvals, onCaseFinished }) {
 	try {
 		const targetEvals = getTargetEvals(request);
 		emitEvent(runState, {
@@ -5710,10 +5944,10 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
 				codeFingerprint = "";
 			}
 			if (codeFingerprint.length > 0) {
-				runState.manifest.evalSourceFingerprints[evalMeta.id] = codeFingerprint;
+				runState.manifest.evalSourceFingerprints[evalMeta.key] = codeFingerprint;
 				evalMeta.sourceFingerprint = codeFingerprint;
 			} else {
-				delete runState.manifest.evalSourceFingerprints[evalMeta.id];
+				delete runState.manifest.evalSourceFingerprints[evalMeta.key];
 				evalMeta.sourceFingerprint = null;
 			}
 			try {
@@ -5734,10 +5968,13 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
 				await runWithModuleIsolation(moduleIsolation, async () => {
 					await runInEvalRuntimeScope("cases", async () => {
 						await entry.use(async (evalDef) => {
-							const cases = filterEvalCases(resolveRunnableEvalCases({
+							const runnableCases = resolveRunnableEvalCases({
 								cases: await runWithEvalClock(evalDef.startTime, async () => typeof evalDef.cases === "function" ? await evalDef.cases() : evalDef.cases ?? [], { freezeTime: evalDef.freezeTime }),
 								evalId: evalMeta.id
-							}), request.target.evalIds, request.target.caseIds, evalMeta.id);
+							});
+							const duplicateCaseIds = findDuplicateCaseIds(runnableCases);
+							if (duplicateCaseIds.length > 0) throw new Error(`Duplicate case id${duplicateCaseIds.length === 1 ? "" : "s"} in ${evalMeta.filePath}#${evalMeta.id}: ${duplicateCaseIds.join(", ")}`);
+							const cases = filterEvalCases(runnableCases, request.target.caseIds);
 							runState.summary.totalCases += cases.length;
 							const defaultConfig = resolveEvalDefaultConfig({
 								evalDef,
@@ -5783,6 +6020,7 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
 											const { caseDetail, caseRowUpdate } = await runCase({
 												evalDef,
 												evalId: evalMeta.id,
+												evalKey: evalMeta.key,
 												evalCase,
 												globalTraceDisplay,
 												llmCallsConfig,
@@ -5795,6 +6033,7 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
 												codeFingerprint,
 												moduleIsolation,
 												evalFilePath,
+												evalFileRelativePath: evalMeta.filePath,
 												workspaceRoot,
 												artifactDir: join(runDir, "artifacts"),
 												runId: runState.manifest.id
@@ -5804,6 +6043,8 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
 												caseRow: {
 													caseId: evalCase.id,
 													evalId: evalMeta.id,
+													evalKey: evalMeta.key,
+													caseKey: caseDetail.caseKey,
 													status: caseRowUpdate.status ?? "pending",
 													durationMs: caseRowUpdate.durationMs ?? null,
 													columns: caseRowUpdate.columns ?? {},
@@ -5839,16 +6080,23 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
 					evalId: evalMeta.id,
 					details: formatUnknownErrorDetails(error)
 				});
-				lastRunStatusMap.set(evalMeta.id, "error");
-				latestRunInfoMap.set(evalMeta.id, {
+				lastRunStatusMap.set(evalMeta.key, "error");
+				latestRunInfoMap.set(evalMeta.key, {
 					status: "error",
 					startedAt: runState.manifest.endedAt ?? runState.manifest.startedAt,
 					commitSha: runState.manifest.commitSha ?? null,
-					evalSourceFingerprint: runState.manifest.evalSourceFingerprints[evalMeta.id] ?? null
+					evalSourceFingerprint: runState.manifest.evalSourceFingerprints[evalMeta.key] ?? null
 				});
 			}
 		}
-		await executeQueuedCases({
+		const ambiguousCaseTargets = request.target.caseIds && request.target.caseIds.length > 0 ? findAmbiguousTargetCaseIds(preparedEvals) : [];
+		if (ambiguousCaseTargets.length > 0) {
+			queuedCases.length = 0;
+			evalErrors.push({
+				evalId: "target",
+				details: `Ambiguous --case target. Narrow it with --file and/or --eval: ${ambiguousCaseTargets.join("; ")}`
+			});
+		} else await executeQueuedCases({
 			queuedCases,
 			concurrency: getConfiguredConcurrency(),
 			globalTraceDisplay: config.traceDisplay
@@ -5863,13 +6111,13 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
 				emitEvent
 			});
 			preparedEval.evalMeta.columnDefs = [...preparedEval.accumulatedColumns.values()];
-			lastRunStatusMap.set(preparedEval.evalMeta.id, toLastRunStatus(deriveStatusFromCaseRows({ caseRows: preparedEval.evalCaseRows })));
-			const latestStatus = lastRunStatusMap.get(preparedEval.evalMeta.id) ?? null;
-			latestRunInfoMap.set(preparedEval.evalMeta.id, {
+			lastRunStatusMap.set(preparedEval.evalMeta.key, toLastRunStatus(deriveStatusFromCaseRows({ caseRows: preparedEval.evalCaseRows })));
+			const latestStatus = lastRunStatusMap.get(preparedEval.evalMeta.key) ?? null;
+			latestRunInfoMap.set(preparedEval.evalMeta.key, {
 				status: latestStatus,
 				startedAt: runState.manifest.endedAt ?? runState.manifest.startedAt,
 				commitSha: runState.manifest.commitSha ?? null,
-				evalSourceFingerprint: runState.manifest.evalSourceFingerprints[preparedEval.evalMeta.id] ?? null
+				evalSourceFingerprint: runState.manifest.evalSourceFingerprints[preparedEval.evalMeta.key] ?? null
 			});
 		}
 		sortCaseRowsByPreparedOrder(runState.cases, preparedEvals);
@@ -5882,20 +6130,19 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
 		const completedRunAt = endTime.toISOString();
 		runState.manifest.endedAt = completedRunAt;
 		runState.summary.errorMessage = evalErrors.length > 0 ? buildRunErrorMessage(evalErrors) : null;
-		for (const evalId of getTargetEvalIds({
+		for (const evalKey of getTargetEvalKeys({
 			request,
-			sortedEvalIds: getSortedEvalMetas().map((meta) => meta.id),
-			knownEvalIds: new Set(evals.keys())
+			sortedEvals: getSortedEvalMetas()
 		})) {
-			const latestStatus = lastRunStatusMap.get(evalId) ?? toLastRunStatus(deriveStatusFromCaseRows({
+			const latestStatus = lastRunStatusMap.get(evalKey) ?? toLastRunStatus(deriveStatusFromCaseRows({
 				caseRows: [],
 				lifecycleStatus: runState.manifest.status
 			}));
-			latestRunInfoMap.set(evalId, {
+			latestRunInfoMap.set(evalKey, {
 				status: latestStatus,
 				startedAt: completedRunAt,
 				commitSha: runState.manifest.commitSha ?? null,
-				evalSourceFingerprint: runState.manifest.evalSourceFingerprints[evalId] ?? null
+				evalSourceFingerprint: runState.manifest.evalSourceFingerprints[evalKey] ?? null
 			});
 		}
 		await persistRunState(runState);
@@ -5938,4 +6185,4 @@ function toLastRunStatus(status) {
 	return status === "pending" ? null : status;
 }
 //#endregion
-export { removeDefaultConfigSchema as $, columnKindSchema as $t, extractApiCalls as A, runInEvalScope as An, cacheFileSchema as At, DEFAULT_API_CALLS_CONFIG as B, traceAttributeDisplayFormatSchema as Bt, validateCharts as C, getEvalCaseInput as Cn, evalChartTooltipExtraSchema as Ct, sseEnvelopeSchema as D, mergeEvalOutput as Dn, cacheDebugKeyFileSchema as Dt, updateManualScoreRequestSchema as E, isInEvalScope as En, cacheDebugKeyEntrySchema as Et, deriveScopedSummaryFromCases as F, repoFile as Fn, cacheRecordingSchema as Ft, apiCallMetricSchema as G, traceDisplayInputConfigSchema as Gt, agentEvalsConfigSchema as H, traceAttributeDisplayPlacementSchema as Ht, deriveStatusFromCaseRows as I, defineEval as In, cacheStatusSchema as It, llmCallMetricFormatSchema as J, traceSpanSchema as Jt, apiCallsConfigSchema as K, traceSpanErrorSchema as Kt, deriveStatusFromChildStatuses as L, getEvalRegistry as Ln, serializedCacheSpanSchema as Lt, getNestedAttribute as M, setEvalOutput as Mn, cacheModeSchema as Mt, getEvalTitle as N, setScopeCacheContext as Nn, cacheOperationTypeSchema as Nt, extractCacheEntries as O, nextEvalId as On, cacheEntrySchema as Ot, getEvalDisplayStatus as P, startEvalBackgroundJob as Pn, cacheRecordingOpSchema as Pt, llmCallsConfigSchema as Q, columnFormatSchema as Qt, runManifestSchema as R, spanCacheOptionsSchema as Rt, normalizeScoreDef as S, getCurrentScope as Sn, evalChartMetricSchema as St, createRunRequestSchema as T, incrementEvalOutput as Tn, evalChartsConfigSchema as Tt, apiCallMetricFormatSchema as U, traceAttributeDisplaySchema as Ut, DEFAULT_LLM_CALLS_CONFIG as V, traceAttributeDisplayInputSchema as Vt, apiCallMetricPlacementSchema as W, traceDisplayConfigSchema as Wt, llmCallMetricSchema as X, cellValueSchema as Xt, llmCallMetricPlacementSchema as Y, traceSpanWarningSchema as Yt, llmCallPricingSchema as Z, columnDefSchema as Zt, loadEvalModule as _, advanceEvalTime as _n, evalChartAggregateSchema as _t, loadPersistedRunSnapshot as a, z$1 as an, caseDetailSchema as at, loadConfig as b, evalAssert as bn, evalChartColorSchema as bt, persistCaseDetail as c, evalSpan as cn, evalStatAggregateSchema as ct, recomputePersistedCaseStatus as d, hashCacheKeySync as dn, evalSummarySchema as dt, fileRefSchema as en, resolveApiCallsConfig as et, runTouchesEval as f, deserializeCacheRecording as fn, runLogEntrySchema as ft, setLatestRunInfoMap as g, EvalAssertionError as gn, scoreTraceSchema as gt, getTargetEvalIds as h, serializeCacheValue as hn, runLogPhaseSchema as ht, getLatestRunInfos as i, runArtifactRefSchema as in, assertionFailureSchema as it, extractLlmCalls as j, runInExistingEvalScope as jn, cacheListItemSchema as jt, extractCacheHits as k, runInEvalRuntimeScope as kn, cacheEntryWithDebugKeySchema as kt, persistRunState as l, evalTracer as ln, evalStatItemSchema as lt, buildEvalSummary as m, serializeCacheRecording as mn, runLogLocationSchema as mt, generateRunId as n, numberDisplayOptionsSchema as nn, runLogsConfigSchema as nt, loadPersistedRunSnapshots as o, buildTraceTree as on, caseRowSchema as ot, resolveArtifactPath as p, deserializeCacheValue as pn, runLogLevelSchema as pt, defaultConfigKeySchema as q, traceSpanKindSchema as qt, getLastRunStatuses as r, repoFileRefSchema as rn, trialSelectionModeSchema as rt, nextShortIdFromSnapshots as s, captureEvalSpanError as sn, evalFreshnessStatusSchema as st, executeRun as t, jsonCellSchema as tn, resolveLlmCallsConfig as tt, recomputeEvalStatusesInRuns as u, hashCacheKey as un, evalStatsConfigSchema as ut, parseEvalMetas as v, appendToEvalOutput as vn, evalChartAxisSchema as vt, createFsCacheStore as w, getEvalStartTime as wn, evalChartTypeSchema as wt, buildDeclaredColumnDefs as x, evalLog as xn, evalChartConfigSchema as xt, resolveEvalDefaultConfig as y, configureEvalRunLogs as yn, evalChartBuiltinMetricSchema as yt, runSummarySchema as z, traceCacheRefSchema as zt };
+export { llmCallsConfigSchema as $, traceSpanKindSchema as $t, extractApiCalls as A, getEvalStartTime as An, evalChartTypeSchema as At, runSummarySchema as B, startEvalBackgroundJob as Bn, cacheRecordingOpSchema as Bt, validateCharts as C, advanceEvalTime as Cn, evalChartAggregateSchema as Ct, sseEnvelopeSchema as D, evalLog as Dn, evalChartConfigSchema as Dt, updateManualScoreRequestSchema as E, evalAssert as En, evalChartColorSchema as Et, getEvalDisplayStatus as F, runInEvalRuntimeScope as Fn, cacheEntryWithDebugKeySchema as Ft, apiCallMetricPlacementSchema as G, traceCacheRefSchema as Gt, DEFAULT_LLM_CALLS_CONFIG as H, defineEval as Hn, cacheStatusSchema as Ht, deriveScopedSummaryFromCases as I, runInEvalScope as In, cacheFileSchema as It, defaultConfigKeySchema as J, traceAttributeDisplayPlacementSchema as Jt, apiCallMetricSchema as K, traceAttributeDisplayFormatSchema as Kt, deriveStatusFromCaseRows as L, runInExistingEvalScope as Ln, cacheListItemSchema as Lt, applyDerivedCallAttributes as M, isInEvalScope as Mn, cacheDebugKeyEntrySchema as Mt, getNestedAttribute as N, mergeEvalOutput as Nn, cacheDebugKeyFileSchema as Nt, extractCacheEntries as O, getCurrentScope as On, evalChartMetricSchema as Ot, getEvalTitle as P, nextEvalId as Pn, cacheEntrySchema as Pt, llmCallPricingSchema as Q, traceSpanErrorSchema as Qt, deriveStatusFromChildStatuses as R, setEvalOutput as Rn, cacheModeSchema as Rt, normalizeScoreDef as S, EvalAssertionError as Sn, scoreTraceSchema as St, createRunRequestSchema as T, configureEvalRunLogs as Tn, evalChartBuiltinMetricSchema as Tt, agentEvalsConfigSchema as U, getEvalRegistry as Un, serializedCacheSpanSchema as Ut, DEFAULT_API_CALLS_CONFIG as V, repoFile as Vn, cacheRecordingSchema as Vt, apiCallMetricFormatSchema as W, spanCacheOptionsSchema as Wt, llmCallMetricPlacementSchema as X, traceDisplayConfigSchema as Xt, llmCallMetricFormatSchema as Y, traceAttributeDisplaySchema as Yt, llmCallMetricSchema as Z, traceDisplayInputConfigSchema as Zt, loadEvalModule as _, hashCacheKeySync as _n, evalSummarySchema as _t, getLastRunStatuses as a, columnKindSchema as an, buildCaseKey as at, loadConfig as b, serializeCacheRecording as bn, runLogLocationSchema as bt, loadPersistedRunSnapshots as c, numberDisplayOptionsSchema as cn, getCaseRowEvalKey as ct, persistRunState as d, z$1 as dn, caseRowSchema as dt, traceSpanSchema as en, removeDefaultConfigSchema as et, recomputeEvalStatusesInRuns as f, buildTraceTree as fn, discoveryIssueSchema as ft, deriveEvalFreshness as g, hashCacheKey as gn, evalStatsConfigSchema as gt, resolveArtifactPath as h, evalTracer as hn, evalStatItemSchema as ht, generateRunId as i, columnFormatSchema as in, trialSelectionModeSchema as it, extractLlmCalls as j, incrementEvalOutput as jn, evalChartsConfigSchema as jt, extractCacheHits as k, getEvalCaseInput as kn, evalChartTooltipExtraSchema as kt, nextShortIdFromSnapshots as l, repoFileRefSchema as ln, assertionFailureSchema as lt, runTouchesEval as m, evalSpan as mn, evalStatAggregateSchema as mt, getTargetEvalKeys as n, cellValueSchema as nn, resolveLlmCallsConfig as nt, getLatestRunInfos as o, fileRefSchema as on, buildEvalKey as ot, recomputePersistedCaseStatus as p, captureEvalSpanError as pn, evalFreshnessStatusSchema as pt, apiCallsConfigSchema as q, traceAttributeDisplayInputSchema as qt, getTargetEvals as r, columnDefSchema as rn, runLogsConfigSchema as rt, loadPersistedRunSnapshot as s, jsonCellSchema as sn, getCaseRowCaseKey as st, executeRun as t, traceSpanWarningSchema as tn, resolveApiCallsConfig as tt, persistCaseDetail as u, runArtifactRefSchema as un, caseDetailSchema as ut, parseEvalDiscovery as v, deserializeCacheRecording as vn, runLogEntrySchema as vt, createFsCacheStore as w, appendToEvalOutput as wn, evalChartAxisSchema as wt, buildDeclaredColumnDefs as x, serializeCacheValue as xn, runLogPhaseSchema as xt, resolveEvalDefaultConfig as y, deserializeCacheValue as yn, runLogLevelSchema as yt, runManifestSchema as z, setScopeCacheContext as zn, cacheOperationTypeSchema as zt };