npm - @ls-stack/agent-eval - Versions diffs - 0.14.0 → 0.16.0 - Mend

@ls-stack/agent-eval 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/{app-DKWm1oxc.mjs → app-B8e-oWYc.mjs} +3 -3
package/dist/apps/web/dist/assets/index-9hO8NpgZ.js +117 -0
package/dist/apps/web/dist/assets/{index-BVnLr79e.css → index-MARPw1bH.css} +1 -1
package/dist/apps/web/dist/index.html +2 -2
package/dist/bin.mjs +1 -1
package/dist/{cli-CMiCEQ-3.mjs → cli-BmrtjQj_.mjs} +155 -99
package/dist/index.d.mts +40 -6
package/dist/index.mjs +4 -4
package/dist/runChild.mjs +41 -3
package/dist/{runOrchestration-H0pSUl3I.mjs → runOrchestration-BDyNrRQT.mjs} +271 -124
package/dist/{runner-DLnj18MO.mjs → runner-CsZqhbiA.mjs} +2 -2
package/dist/{runner-Dx1sMCbh.mjs → runner-DABFPXkx.mjs} +1 -1
package/dist/src-CEAJYN_X.mjs +3 -0
package/package.json +5 -4
package/skills/agent-eval/SKILL.md +408 -0
package/dist/apps/web/dist/assets/index-Cx2CvM6a.js +0 -117
package/dist/src-BgGL7DDp.mjs +0 -3

package/dist/{runOrchestration-H0pSUl3I.mjs → runOrchestration-BDyNrRQT.mjs} RENAMED Viewed

@@ -46,7 +46,9 @@ function repoFile(path, mimeType) {
 //#endregion
 //#region ../sdk/src/runtime.ts
 const scopeStorage = new AsyncLocalStorage();
+const runtimeScopeStorage = new AsyncLocalStorage();
 let activeEvalScopeCount = 0;
+let activeEvalRuntimeScopeCount = 0;
 /** Error thrown when an eval assertion fails during case execution. */
 var EvalAssertionError = class extends Error {
 	constructor(message) {
@@ -60,13 +62,16 @@ function getCurrentScope() {
 	return scopeStorage.getStore();
 }
 /**
-* Return whether the current async execution is inside an active eval case.
+* Return the current eval runner phase for this async execution.
 *
-* This is useful for shared workflow code that wants to branch on eval-only
-* behavior without importing or inspecting the full eval scope.
+* Returns `null` outside eval-owned work, `env` while the runner is loading
+* eval modules for a run, `cases` while generating cases, `eval` while running
+* case `execute`, `derive` while deriving outputs from traces, `outputsSchema`
+* while validating outputs, and `scorer` while computing scores.
 */
 function isInEvalScope() {
-	return getCurrentScope() !== void 0;
+	if (activeEvalRuntimeScopeCount === 0) return null;
+	return runtimeScopeStorage.getStore() ?? null;
 }
 function registerBackgroundJobInScope(scope, promise) {
 	const trackedPromise = promise.then(() => {
@@ -122,6 +127,31 @@ function getEvalCaseInput(path = void 0) {
 function setScopeCacheContext(scope, context) {
 	scope.cacheContext = context;
 }
+/** Execute a callback while `isInEvalScope()` reports a runner phase. */
+async function runInEvalRuntimeScope(runtimeScope, fn) {
+	activeEvalRuntimeScopeCount++;
+	try {
+		return await runtimeScopeStorage.run(runtimeScope, fn);
+	} finally {
+		activeEvalRuntimeScopeCount--;
+	}
+}
+/**
+* Execute a callback with an existing case scope and a specific runner phase.
+*
+* Runner-internal helper for post-execute phases that still need access to the
+* completed case scope through output, trace, assertion, and input helpers.
+*/
+async function runInExistingEvalScope(scope, runtimeScope, fn) {
+	activeEvalScopeCount++;
+	try {
+		return await scopeStorage.run(scope, async () => {
+			return await runInEvalRuntimeScope(runtimeScope, fn);
+		});
+	} finally {
+		activeEvalScopeCount--;
+	}
+}
 /**
 * Execute a callback inside a fresh eval case scope and capture its outputs,
 * trace data, and terminal error state.
@@ -144,29 +174,24 @@ async function runInEvalScope(caseId, fn, options = {}) {
 		caseCacheRefs: [],
 		pendingBackgroundJobs: /* @__PURE__ */ new Set()
 	};
-	activeEvalScopeCount++;
-	try {
-		return await scopeStorage.run(scope, async () => {
-			try {
-				const result = await fn();
-				if (options.waitForBackgroundJobs !== false) await drainBackgroundJobs(scope);
-				return {
-					result,
-					scope,
-					error: void 0
-				};
-			} catch (error) {
-				if (options.waitForBackgroundJobs !== false) await drainBackgroundJobs(scope);
-				return {
-					result: void 0,
-					scope,
-					error: error instanceof Error ? error : new Error(String(error))
-				};
-			}
-		});
-	} finally {
-		activeEvalScopeCount--;
-	}
+	return await runInExistingEvalScope(scope, options.runtimeScope ?? "eval", async () => {
+		try {
+			const result = await fn();
+			if (options.waitForBackgroundJobs !== false) await drainBackgroundJobs(scope);
+			return {
+				result,
+				scope,
+				error: void 0
+			};
+		} catch (error) {
+			if (options.waitForBackgroundJobs !== false) await drainBackgroundJobs(scope);
+			return {
+				result: void 0,
+				scope,
+				error: error instanceof Error ? error : new Error(String(error))
+			};
+		}
+	});
 }
 /**
 * Return the next deterministic ID for the active eval case execution.
@@ -3888,6 +3913,7 @@ const agentEvalsConfigSchema = z.object({
 	trialSelection: trialSelectionModeSchema.optional(),
 	concurrency: z.number().optional(),
 	staleAfterDays: z.number().optional(),
+	allowCliRunAll: z.boolean().optional(),
 	traceDisplay: traceDisplayInputConfigSchema.optional(),
 	llmCalls: llmCallsConfigSchema.optional(),
 	apiCalls: apiCallsConfigSchema.optional(),
@@ -4856,6 +4882,7 @@ const defaultConfig = {
 	trialSelection: "lowestScore",
 	concurrency: 2,
 	staleAfterDays: 14,
+	allowCliRunAll: false,
 	traceDisplay: { attributes: [{
 		path: "input",
 		label: "Input",
@@ -4886,6 +4913,98 @@ async function loadConfig() {
 	}
 }
 //#endregion
+//#region ../runner/src/discovery.ts
+const evalIdMatchRegex = /\bid\s*:\s*['"]([^'"]+)['"]/;
+const evalTitleMatchRegex = /\btitle\s*:\s*['"]([^'"]+)['"]/;
+function parseEvalMetas(filePath, content) {
+	const metas = [];
+	let searchIndex = 0;
+	while (searchIndex < content.length) {
+		const defineEvalIndex = content.indexOf("defineEval", searchIndex);
+		if (defineEvalIndex === -1) break;
+		const extracted = extractDefineEvalObject(content, defineEvalIndex);
+		if (!extracted) {
+			searchIndex = defineEvalIndex + 10;
+			continue;
+		}
+		const id = evalIdMatchRegex.exec(extracted.objectText)?.[1];
+		if (id !== void 0) {
+			const result = {
+				filePath,
+				id
+			};
+			const title = evalTitleMatchRegex.exec(extracted.objectText)?.[1];
+			if (title !== void 0) result.title = title;
+			metas.push(result);
+		}
+		searchIndex = extracted.nextIndex;
+	}
+	return metas;
+}
+function extractDefineEvalObject(content, defineEvalIndex) {
+	const openParenIndex = content.indexOf("(", defineEvalIndex);
+	if (openParenIndex === -1) return void 0;
+	const objectStartIndex = content.indexOf("{", openParenIndex);
+	if (objectStartIndex === -1) return void 0;
+	let depth = 0;
+	let quote;
+	let inBlockComment = false;
+	let inLineComment = false;
+	let isEscaped = false;
+	for (let index = objectStartIndex; index < content.length; index++) {
+		const currentChar = content[index];
+		const nextChar = content[index + 1];
+		if (inLineComment) {
+			if (currentChar === "\n") inLineComment = false;
+			continue;
+		}
+		if (inBlockComment) {
+			if (currentChar === "*" && nextChar === "/") {
+				inBlockComment = false;
+				index++;
+			}
+			continue;
+		}
+		if (quote) {
+			if (isEscaped) {
+				isEscaped = false;
+				continue;
+			}
+			if (currentChar === "\\") {
+				isEscaped = true;
+				continue;
+			}
+			if (currentChar === quote) quote = void 0;
+			continue;
+		}
+		if (currentChar === "/" && nextChar === "/") {
+			inLineComment = true;
+			index++;
+			continue;
+		}
+		if (currentChar === "/" && nextChar === "*") {
+			inBlockComment = true;
+			index++;
+			continue;
+		}
+		if (currentChar === "\"" || currentChar === "'" || currentChar === "`") {
+			quote = currentChar;
+			continue;
+		}
+		if (currentChar === "{") {
+			depth++;
+			continue;
+		}
+		if (currentChar === "}") {
+			depth--;
+			if (depth === 0) return {
+				nextIndex: index + 1,
+				objectText: content.slice(objectStartIndex, index + 1)
+			};
+		}
+	}
+}
+//#endregion
 //#region ../runner/src/evalModuleLoader.ts
 /**
 * Import one eval module with a cache key derived from its current source so
@@ -5280,6 +5399,7 @@ const isolationParam = "agent-evals-isolate";
 const pathSegmentSeparatorPattern = /[\\/]+/;
 const isolationStorage = new AsyncLocalStorage();
 const activeIsolationRoots = /* @__PURE__ */ new Map();
+const clearedRequireCacheKeys = /* @__PURE__ */ new Set();
 let hooksRegistered = false;
 const requireFromRunner = createRequire(import.meta.url);
 const agentPackageUrlBySpecifier = new Map([
@@ -5305,7 +5425,10 @@ function getIsolationKeyFromParent(parentURL) {
 }
 function isWorkspaceFile(url, workspaceRoot) {
 	if (url.protocol !== "file:") return false;
-	const relativePath = relative(workspaceRoot, fileURLToPath(url));
+	return isWorkspaceFilePath(fileURLToPath(url), workspaceRoot);
+}
+function isWorkspaceFilePath(filePath, workspaceRoot) {
+	const relativePath = relative(workspaceRoot, filePath);
 	if (relativePath === "" || relativePath.startsWith("..") || isAbsolute(relativePath)) return false;
 	const segments = relativePath.split(pathSegmentSeparatorPattern);
 	return !segments.includes("node_modules") && !segments.includes(".agent-evals");
@@ -5340,15 +5463,23 @@ function registerModuleIsolationHooks() {
 		};
 	} });
 }
+function clearWorkspaceRequireCacheOnce(context) {
+	if (clearedRequireCacheKeys.has(context.key)) return;
+	clearedRequireCacheKeys.add(context.key);
+	for (const filePath of Object.keys(requireFromRunner.cache)) if (isWorkspaceFilePath(filePath, context.workspaceRoot)) delete requireFromRunner.cache[filePath];
+}
 /**
 * Execute module loading and eval code with fresh workspace module URLs.
 *
 * Node does not expose an ESM cache reset API, so the runner appends a
-* run-scoped query parameter to workspace file imports. Package imports are
-* left alone so SDK singletons, such as the eval registry, remain shared.
+* run-scoped query parameter to workspace file imports. CommonJS modules use
+* `require.cache` behind ESM imports, so workspace entries are cleared once per
+* run. Package imports are left alone so SDK singletons, such as the eval
+* registry, remain shared.
 */
 async function runWithModuleIsolation(context, fn) {
 	registerModuleIsolationHooks();
+	clearWorkspaceRequireCacheOnce(context);
 	activeIsolationRoots.set(context.key, context.workspaceRoot);
 	return await isolationStorage.run(context, fn);
 }
@@ -5476,20 +5607,26 @@ async function runCase(params) {
 	const traceTree = buildTraceTree(scope.spans, scope.checkpoints);
 	const nonAssertError = executeError && !(executeError instanceof EvalAssertionError) ? executeError : null;
 	if (executeError instanceof EvalAssertionError && scope.assertionFailures.length === 0) scope.assertionFailures.push(toAssertionFailure(executeError.message, executeError));
-	if (!nonAssertError && evalDef.deriveFromTracing) try {
-		const derived = await callWithUnknownResult(evalDef.deriveFromTracing, [{
-			trace: traceTree,
-			input: evalCase.input,
-			case: evalCase
-		}]);
-		if (!isRecord(derived)) throw new Error("deriveFromTracing must return an object");
-		for (const [key, value] of Object.entries(derived)) if (!(key in scope.outputs)) scope.outputs[key] = value;
-	} catch (e) {
-		const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
-		scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
+	if (!nonAssertError && evalDef.deriveFromTracing) {
+		const { deriveFromTracing } = evalDef;
+		try {
+			const derived = await runInExistingEvalScope(scope, "derive", async () => {
+				return await callWithUnknownResult(deriveFromTracing, [{
+					trace: traceTree,
+					input: evalCase.input,
+					case: evalCase
+				}]);
+			});
+			if (!isRecord(derived)) throw new Error("deriveFromTracing must return an object");
+			for (const [key, value] of Object.entries(derived)) if (!(key in scope.outputs)) scope.outputs[key] = value;
+		} catch (e) {
+			const message = `deriveFromTracing threw: ${e instanceof Error ? e.message : String(e)}`;
+			scope.assertionFailures.push(toAssertionFailure(message, e instanceof Error ? e : void 0));
+		}
 	}
 	if (!nonAssertError && evalDef.outputsSchema) {
-		const parsedOutputs = evalDef.outputsSchema.safeParse(getOutputsSchemaInput(evalDef.outputsSchema, scope.outputs));
+		const { outputsSchema } = evalDef;
+		const parsedOutputs = await runInExistingEvalScope(scope, "outputsSchema", () => outputsSchema.safeParse(getOutputsSchemaInput(outputsSchema, scope.outputs)));
 		if (parsedOutputs.success) scope.outputs = {
 			...scope.outputs,
 			...parsedOutputs.data
@@ -5511,6 +5648,7 @@ async function runCase(params) {
 		}, {
 			input: evalCase.input,
 			idPrefix: `${scopedIdPrefix}-score-${toStableIdSegment(key)}`,
+			runtimeScope: "scorer",
 			cacheContext: cacheAdapter ? {
 				adapter: cacheAdapter,
 				mode: cacheMode,
@@ -5791,12 +5929,19 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
 			} catch {
 				codeFingerprint = "";
 			}
-			if (codeFingerprint.length > 0) runState.manifest.evalSourceFingerprints[evalMeta.id] = codeFingerprint;
-			else delete runState.manifest.evalSourceFingerprints[evalMeta.id];
+			if (codeFingerprint.length > 0) {
+				runState.manifest.evalSourceFingerprints[evalMeta.id] = codeFingerprint;
+				evalMeta.sourceFingerprint = codeFingerprint;
+			} else {
+				delete runState.manifest.evalSourceFingerprints[evalMeta.id];
+				evalMeta.sourceFingerprint = null;
+			}
 			try {
 				const registry = getEvalRegistry();
 				await runWithModuleIsolation(moduleIsolation, async () => {
-					await loadEvalModule(evalFilePath, codeFingerprint);
+					await runInEvalRuntimeScope("env", async () => {
+						await loadEvalModule(evalFilePath, codeFingerprint);
+					});
 				});
 				const entry = registry.get(evalMeta.id);
 				if (!entry) {
@@ -5807,87 +5952,89 @@ async function executeRun({ runState, request, runDir, config, evals, cacheStore
 					continue;
 				}
 				await runWithModuleIsolation(moduleIsolation, async () => {
-					await entry.use(async (evalDef) => {
-						const cases = filterEvalCases(resolveRunnableEvalCases({
-							cases: typeof evalDef.cases === "function" ? await evalDef.cases() : evalDef.cases ?? [],
-							evalId: evalMeta.id
-						}), request.target.evalIds, request.target.caseIds, evalMeta.id);
-						runState.summary.totalCases += cases.length;
-						const accumulatedColumns = /* @__PURE__ */ new Map();
-						const evalCaseRows = [];
-						const preparedCases = [];
-						const scoreKeys = Object.freeze(Object.keys(evalDef.scores ?? {}));
-						const manualScoreKeys = Object.freeze(Object.keys(evalDef.manualScores ?? {}));
-						const preparedEval = {
-							evalMeta,
-							accumulatedColumns,
-							evalCaseRows,
-							preparedCases,
-							scoreKeys: Object.freeze([...scoreKeys, ...manualScoreKeys]),
-							mergeColumns: (columns) => {
-								mergeColumnDefs(accumulatedColumns, columns, evalDef.columns, evalDef.scores, evalDef.manualScores);
-							}
-						};
-						preparedEvals.push(preparedEval);
-						for (const evalCase of cases) {
-							const trialResults = [];
-							const preparedCase = {
-								caseId: evalCase.id,
-								trialResults,
-								finalized: false
+					await runInEvalRuntimeScope("cases", async () => {
+						await entry.use(async (evalDef) => {
+							const cases = filterEvalCases(resolveRunnableEvalCases({
+								cases: typeof evalDef.cases === "function" ? await evalDef.cases() : evalDef.cases ?? [],
+								evalId: evalMeta.id
+							}), request.target.evalIds, request.target.caseIds, evalMeta.id);
+							runState.summary.totalCases += cases.length;
+							const accumulatedColumns = /* @__PURE__ */ new Map();
+							const evalCaseRows = [];
+							const preparedCases = [];
+							const scoreKeys = Object.freeze(Object.keys(evalDef.scores ?? {}));
+							const manualScoreKeys = Object.freeze(Object.keys(evalDef.manualScores ?? {}));
+							const preparedEval = {
+								evalMeta,
+								accumulatedColumns,
+								evalCaseRows,
+								preparedCases,
+								scoreKeys: Object.freeze([...scoreKeys, ...manualScoreKeys]),
+								mergeColumns: (columns) => {
+									mergeColumnDefs(accumulatedColumns, columns, evalDef.columns, evalDef.scores, evalDef.manualScores);
+								}
 							};
-							preparedCases.push(preparedCase);
-							for (let trial = 0; trial < request.trials; trial++) {
-								const bufferedCacheStore = cacheEnabled && cacheMode !== "bypass" ? createBufferedCacheStore(cacheStore) : null;
-								queuedCases.push({
-									execute: async ({ startTime, globalTraceDisplay }) => {
-										const { caseDetail, caseRowUpdate } = await runCase({
-											evalDef,
-											evalId: evalMeta.id,
-											evalCase,
-											globalTraceDisplay,
-											trial,
-											startTime,
-											cacheAdapter: bufferedCacheStore ?? (cacheEnabled ? cacheStore : null),
-											cacheMode,
-											codeFingerprint,
-											moduleIsolation,
-											evalFilePath,
-											workspaceRoot,
-											artifactDir: join(runDir, "artifacts"),
-											runId: runState.manifest.id
-										});
-										return {
-											caseDetail,
-											caseRow: {
-												caseId: evalCase.id,
+							preparedEvals.push(preparedEval);
+							for (const evalCase of cases) {
+								const trialResults = [];
+								const preparedCase = {
+									caseId: evalCase.id,
+									trialResults,
+									finalized: false
+								};
+								preparedCases.push(preparedCase);
+								for (let trial = 0; trial < request.trials; trial++) {
+									const bufferedCacheStore = cacheEnabled && cacheMode !== "bypass" ? createBufferedCacheStore(cacheStore) : null;
+									queuedCases.push({
+										execute: async ({ startTime, globalTraceDisplay }) => {
+											const { caseDetail, caseRowUpdate } = await runCase({
+												evalDef,
 												evalId: evalMeta.id,
-												status: caseRowUpdate.status ?? "pending",
-												latencyMs: caseRowUpdate.latencyMs ?? null,
-												columns: caseRowUpdate.columns ?? {},
-												trial
-											}
-										};
-									},
-									onComplete: async ({ caseDetail, caseRow }) => {
-										trialResults.push({
-											caseDetail,
-											caseRow,
-											bufferedCacheStore
-										});
-										if (trialResults.length !== request.trials) return;
-										await finalizePreparedCase({
-											runState,
-											runDir,
-											preparedEval,
-											preparedCase,
-											onCaseFinished,
-											emitEvent
-										});
-									}
-								});
+												evalCase,
+												globalTraceDisplay,
+												trial,
+												startTime,
+												cacheAdapter: bufferedCacheStore ?? (cacheEnabled ? cacheStore : null),
+												cacheMode,
+												codeFingerprint,
+												moduleIsolation,
+												evalFilePath,
+												workspaceRoot,
+												artifactDir: join(runDir, "artifacts"),
+												runId: runState.manifest.id
+											});
+											return {
+												caseDetail,
+												caseRow: {
+													caseId: evalCase.id,
+													evalId: evalMeta.id,
+													status: caseRowUpdate.status ?? "pending",
+													latencyMs: caseRowUpdate.latencyMs ?? null,
+													columns: caseRowUpdate.columns ?? {},
+													trial
+												}
+											};
+										},
+										onComplete: async ({ caseDetail, caseRow }) => {
+											trialResults.push({
+												caseDetail,
+												caseRow,
+												bufferedCacheStore
+											});
+											if (trialResults.length !== request.trials) return;
+											await finalizePreparedCase({
+												runState,
+												runDir,
+												preparedEval,
+												preparedCase,
+												onCaseFinished,
+												emitEvent
+											});
+										}
+									});
+								}
 							}
-						}
+						});
 					});
 				});
 			} catch (error) {
@@ -5995,4 +6142,4 @@ function toLastRunStatus(status) {
 	return status === "pending" ? null : status;
 }
 //#endregion
-export { evalFreshnessStatusSchema as $, evalAssert as $t, getEvalDisplayStatus as A, traceDisplayInputConfigSchema as At, apiCallMetricPlacementSchema as B, jsonCellSchema as Bt, updateManualScoreRequestSchema as C, spanCacheOptionsSchema as Ct, extractLlmCalls as D, traceAttributeDisplayPlacementSchema as Dt, extractApiCalls as E, traceAttributeDisplayInputSchema as Et, runSummarySchema as F, cellValueSchema as Ft, llmCallMetricSchema as G, buildTraceTree as Gt, apiCallsConfigSchema as H, repoFileRefSchema as Ht, DEFAULT_API_CALLS_CONFIG as I, columnDefSchema as It, resolveLlmCallsConfig as J, evalTracer as Jt, llmCallsConfigSchema as K, captureEvalSpanError as Kt, DEFAULT_LLM_CALLS_CONFIG as L, columnFormatSchema as Lt, deriveStatusFromCaseRows as M, traceSpanKindSchema as Mt, deriveStatusFromChildStatuses as N, traceSpanSchema as Nt, getNestedAttribute as O, traceAttributeDisplaySchema as Ot, runManifestSchema as P, traceSpanWarningSchema as Pt, caseRowSchema as Q, appendToEvalOutput as Qt, agentEvalsConfigSchema as R, columnKindSchema as Rt, createRunRequestSchema as S, serializedCacheSpanSchema as St, extractCacheHits as T, traceAttributeDisplayFormatSchema as Tt, llmCallMetricFormatSchema as U, runArtifactRefSchema as Ut, apiCallMetricSchema as V, numberDisplayOptionsSchema as Vt, llmCallMetricPlacementSchema as W, z$1 as Wt, assertionFailureSchema as X, hashCacheKeySync as Xt, trialSelectionModeSchema as Y, hashCacheKey as Yt, caseDetailSchema as Z, EvalAssertionError as Zt, loadEvalModule as _, cacheModeSchema as _t, loadPersistedRunSnapshot as a, nextEvalId as an, evalChartAggregateSchema as at, normalizeScoreDef as b, cacheRecordingSchema as bt, persistCaseDetail as c, setScopeCacheContext as cn, evalChartColorSchema as ct, recomputePersistedCaseStatus as d, defineEval as dn, evalChartTooltipExtraSchema as dt, getCurrentScope as en, evalStatAggregateSchema as et, runTouchesEval as f, getEvalRegistry as fn, evalChartTypeSchema as ft, setLatestRunInfoMap as g, cacheListItemSchema as gt, getTargetEvalIds as h, cacheFileSchema as ht, getLatestRunInfos as i, mergeEvalOutput as in, scoreTraceSchema as it, deriveScopedSummaryFromCases as j, traceSpanErrorSchema as jt, getEvalTitle as k, traceDisplayConfigSchema as kt, persistRunState as l, startEvalBackgroundJob as ln, evalChartConfigSchema as lt, buildEvalSummary as m, cacheEntrySchema as mt, generateRunId as n, incrementEvalOutput as nn, evalStatsConfigSchema as nt, loadPersistedRunSnapshots as o, runInEvalScope as on, evalChartAxisSchema as ot, resolveArtifactPath as p, evalChartsConfigSchema as pt, resolveApiCallsConfig as q, evalSpan as qt, getLastRunStatuses as r, isInEvalScope as rn, evalSummarySchema as rt, nextShortIdFromSnapshots as s, setEvalOutput as sn, evalChartBuiltinMetricSchema as st, executeRun as t, getEvalCaseInput as tn, evalStatItemSchema as tt, recomputeEvalStatusesInRuns as u, repoFile as un, evalChartMetricSchema as ut, loadConfig as v, cacheOperationTypeSchema as vt, sseEnvelopeSchema as w, traceCacheRefSchema as wt, createFsCacheStore as x, cacheStatusSchema as xt, buildDeclaredColumnDefs as y, cacheRecordingOpSchema as yt, apiCallMetricFormatSchema as z, fileRefSchema as zt };
+export { caseRowSchema as $, appendToEvalOutput as $t, getEvalTitle as A, traceDisplayConfigSchema as At, apiCallMetricFormatSchema as B, fileRefSchema as Bt, createRunRequestSchema as C, serializedCacheSpanSchema as Ct, extractApiCalls as D, traceAttributeDisplayInputSchema as Dt, extractCacheHits as E, traceAttributeDisplayFormatSchema as Et, runManifestSchema as F, traceSpanWarningSchema as Ft, llmCallMetricPlacementSchema as G, z$1 as Gt, apiCallMetricSchema as H, numberDisplayOptionsSchema as Ht, runSummarySchema as I, cellValueSchema as It, resolveApiCallsConfig as J, evalSpan as Jt, llmCallMetricSchema as K, buildTraceTree as Kt, DEFAULT_API_CALLS_CONFIG as L, columnDefSchema as Lt, deriveScopedSummaryFromCases as M, traceSpanErrorSchema as Mt, deriveStatusFromCaseRows as N, traceSpanKindSchema as Nt, extractLlmCalls as O, traceAttributeDisplayPlacementSchema as Ot, deriveStatusFromChildStatuses as P, traceSpanSchema as Pt, caseDetailSchema as Q, EvalAssertionError as Qt, DEFAULT_LLM_CALLS_CONFIG as R, columnFormatSchema as Rt, createFsCacheStore as S, cacheStatusSchema as St, sseEnvelopeSchema as T, traceCacheRefSchema as Tt, apiCallsConfigSchema as U, repoFileRefSchema as Ut, apiCallMetricPlacementSchema as V, jsonCellSchema as Vt, llmCallMetricFormatSchema as W, runArtifactRefSchema as Wt, trialSelectionModeSchema as X, hashCacheKey as Xt, resolveLlmCallsConfig as Y, evalTracer as Yt, assertionFailureSchema as Z, hashCacheKeySync as Zt, loadEvalModule as _, cacheListItemSchema as _t, loadPersistedRunSnapshot as a, mergeEvalOutput as an, scoreTraceSchema as at, buildDeclaredColumnDefs as b, cacheRecordingOpSchema as bt, persistCaseDetail as c, runInEvalScope as cn, evalChartBuiltinMetricSchema as ct, recomputePersistedCaseStatus as d, setScopeCacheContext as dn, evalChartMetricSchema as dt, evalAssert as en, evalFreshnessStatusSchema as et, runTouchesEval as f, startEvalBackgroundJob as fn, evalChartTooltipExtraSchema as ft, setLatestRunInfoMap as g, cacheFileSchema as gt, getTargetEvalIds as h, getEvalRegistry as hn, cacheEntrySchema as ht, getLatestRunInfos as i, isInEvalScope as in, evalSummarySchema as it, getEvalDisplayStatus as j, traceDisplayInputConfigSchema as jt, getNestedAttribute as k, traceAttributeDisplaySchema as kt, persistRunState as l, runInExistingEvalScope as ln, evalChartColorSchema as lt, buildEvalSummary as m, defineEval as mn, evalChartsConfigSchema as mt, generateRunId as n, getEvalCaseInput as nn, evalStatItemSchema as nt, loadPersistedRunSnapshots as o, nextEvalId as on, evalChartAggregateSchema as ot, resolveArtifactPath as p, repoFile as pn, evalChartTypeSchema as pt, llmCallsConfigSchema as q, captureEvalSpanError as qt, getLastRunStatuses as r, incrementEvalOutput as rn, evalStatsConfigSchema as rt, nextShortIdFromSnapshots as s, runInEvalRuntimeScope as sn, evalChartAxisSchema as st, executeRun as t, getCurrentScope as tn, evalStatAggregateSchema as tt, recomputeEvalStatusesInRuns as u, setEvalOutput as un, evalChartConfigSchema as ut, parseEvalMetas as v, cacheModeSchema as vt, updateManualScoreRequestSchema as w, spanCacheOptionsSchema as wt, normalizeScoreDef as x, cacheRecordingSchema as xt, loadConfig as y, cacheOperationTypeSchema as yt, agentEvalsConfigSchema as z, columnKindSchema as zt };

package/dist/{runner-DLnj18MO.mjs → runner-CsZqhbiA.mjs} RENAMED Viewed

@@ -1,5 +1,5 @@
-import { n as createRunner } from "./cli-CMiCEQ-3.mjs";
-import "./src-BgGL7DDp.mjs";
+import { n as createRunner } from "./cli-BmrtjQj_.mjs";
+import "./src-CEAJYN_X.mjs";
 //#region ../../apps/server/src/runner.ts
 let runnerInstance = null;
 function getRunnerInstance() {

package/dist/{runner-Dx1sMCbh.mjs → runner-DABFPXkx.mjs} RENAMED Viewed

@@ -1,2 +1,2 @@
-import { n as initRunner, t as getRunnerInstance } from "./runner-DLnj18MO.mjs";
+import { n as initRunner, t as getRunnerInstance } from "./runner-CsZqhbiA.mjs";
 export { getRunnerInstance, initRunner };

package/dist/src-CEAJYN_X.mjs ADDED Viewed

@@ -0,0 +1,3 @@
+import "./runOrchestration-BDyNrRQT.mjs";
+import "./cli-BmrtjQj_.mjs";
+export {};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ls-stack/agent-eval",
-  "version": "0.14.0",
+  "version": "0.16.0",
   "type": "module",
   "bin": {
     "agent-evals": "./dist/bin.mjs"
@@ -13,7 +13,8 @@
     }
   },
   "files": [
-    "dist"
+    "dist",
+    "skills"
   ],
   "tsdown": {
     "clean": true,
@@ -58,8 +59,8 @@
     "@types/node": "^24.7.2",
     "typescript": "^5.9.2",
     "@agent-evals/runner": "0.0.1",
-    "@agent-evals/sdk": "0.0.1",
-    "@agent-evals/shared": "0.0.1"
+    "@agent-evals/shared": "0.0.1",
+    "@agent-evals/sdk": "0.0.1"
   },
   "scripts": {
     "build": "pnpm --filter @agent-evals/web build && tsdown",