npm - @ls-stack/agent-eval - Versions diffs - 0.60.0 → 0.60.2 - Mend

@ls-stack/agent-eval 0.60.0 → 0.60.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/{app-opbcrpvt.mjs → app-DPCFFkyQ.mjs} +4 -4
package/dist/apps/web/dist/assets/{index-Dowobz-z.js → index-CM6MDNqo.js} +73 -73
package/dist/apps/web/dist/index.html +1 -1
package/dist/bin.mjs +1 -1
package/dist/caseChild.mjs +1 -1
package/dist/{cli-FOyPC8UD.mjs → cli-CbePEEua.mjs} +72 -27
package/dist/index.d.mts +50 -41
package/dist/index.mjs +3 -3
package/dist/runChild.mjs +2 -2
package/dist/{runExecution-CjWJUUZ5.mjs → runExecution-Bq0Y3y_1.mjs} +2 -2
package/dist/{runOrchestration-DE2TFAS6.mjs → runOrchestration-BpwW0AmB.mjs} +1 -1
package/dist/runner-Kp0JqxrU.mjs +15 -0
package/dist/{runner-CIxj7jYj.mjs → runner-XEP21_u9.mjs} +1 -1
package/dist/{src-p-GRSVDb.mjs → src-CVM_FqPx.mjs} +2 -2
package/package.json +1 -1
package/skills/agent-eval/SKILL.md +6 -3
package/dist/runner-Dv5cseOt.mjs +0 -15

package/dist/apps/web/dist/index.html CHANGED Viewed

@@ -25,7 +25,7 @@
       href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=JetBrains+Mono:wght@400;500&display=swap"
       rel="stylesheet"
     />
-    <script type="module" crossorigin src="/assets/index-Dowobz-z.js"></script>
+    <script type="module" crossorigin src="/assets/index-CM6MDNqo.js"></script>
     <link rel="stylesheet" crossorigin href="/assets/index-CqWfzcFb.css">
   </head>
   <body>

package/dist/bin.mjs CHANGED Viewed

@@ -1,5 +1,5 @@
 #!/usr/bin/env node
-import { t as runCli } from "./cli-FOyPC8UD.mjs";
+import { t as runCli } from "./cli-CbePEEua.mjs";
 import { spawn } from "node:child_process";
 //#region src/bin.ts
 const moduleMocksFlag = "--experimental-test-module-mocks";

package/dist/caseChild.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { Ct as resolveLlmCallsConfig, It as runWithEvalRegistry, J as runInEvalRuntimeScope, L as configureEvalRunLogs, St as resolveApiCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, r as runCase, v as createFsCacheStore, y as getCacheRetentionOptions } from "./runExecution-CjWJUUZ5.mjs";
+import { Ct as resolveLlmCallsConfig, It as runWithEvalRegistry, J as runInEvalRuntimeScope, L as configureEvalRunLogs, St as resolveApiCallsConfig, _ as createBufferedCacheStore, a as isCaseChildParentMessage, d as loadEvalModule, l as registerAgentEvalsPackageResolutionHooks, p as loadConfig, r as runCase, v as createFsCacheStore, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
 //#region ../runner/src/caseChild.ts
 let fatalErrorReported = false;
 let disconnectExpected = false;

package/dist/{cli-FOyPC8UD.mjs → cli-CbePEEua.mjs} RENAMED Viewed

@@ -1,5 +1,6 @@
-import { Ct as resolveLlmCallsConfig, Et as getCaseRowCaseKey, Ft as getEvalRegistry, Ot as caseRowSchema, St as resolveApiCallsConfig, Tt as buildEvalKey, _t as matchesTagsFilter, c as resolveArtifactPath, dt as getEvalTitle, f as resolveEvalDefaultConfig, ft as getEvalDisplayStatus, h as normalizeScoreDef, lt as applyDerivedCallAttributes, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, pt as deriveScopedSummaryFromCases, s as resolveTracePresentation, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-CjWJUUZ5.mjs";
-import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-DE2TFAS6.mjs";
+import { Ct as resolveLlmCallsConfig, Et as getCaseRowCaseKey, Ft as getEvalRegistry, Ot as caseRowSchema, St as resolveApiCallsConfig, Tt as buildEvalKey, _t as matchesTagsFilter, c as resolveArtifactPath, dt as getEvalTitle, f as resolveEvalDefaultConfig, ft as getEvalDisplayStatus, h as normalizeScoreDef, lt as applyDerivedCallAttributes, m as buildDeclaredColumnDefs, o as stripTerminalControlCodes, p as loadConfig, pt as deriveScopedSummaryFromCases, s as resolveTracePresentation, v as createFsCacheStore, xt as runSummarySchema, y as getCacheRetentionOptions } from "./runExecution-Bq0Y3y_1.mjs";
+import { C as validateCharts, S as parseEvalDiscovery, _ as runTouchesEval, a as validateTagsFilters, b as deriveEvalFreshness, c as getLatestRunInfos, d as nextShortIdFromSnapshots, f as persistCaseDetail, g as recomputePersistedCaseStatus, h as recomputeEvalStatusesInRuns, i as resolveEvalTags, l as loadPersistedRunSnapshot, m as persistRunState, n as getTargetEvalKeys, o as generateRunId, p as deleteTemporaryRuns, s as getLastRunStatuses, u as loadPersistedRunSnapshots, v as buildManualInputDescriptor, x as loadIsolatedEvalRegistry, y as parseManualInputValues } from "./runOrchestration-BpwW0AmB.mjs";
+import { parseEnv } from "node:util";
 import { resultify } from "t-result";
 import { copyFile, mkdir, readFile, rm, writeFile } from "node:fs/promises";
 import { basename, dirname, extname, isAbsolute, join, relative, resolve, sep } from "node:path";
@@ -33,7 +34,7 @@ function resolveCaseDetailLookup(run, caseId) {
 }
 //#endregion
 //#region ../runner/src/configReload.ts
-/** Coordinates idle-only reloads for `agent-evals.config.ts` in app mode. */
+/** Coordinates idle-only reloads for workspace config and `.env` in app mode. */
 function createConfigReloadController({ getActiveRunCount, closeRunnerWatchers, loadRunnerState, emitToDiscoveryListeners }) {
 	let watcher;
 	let reloadTimer;
@@ -125,7 +126,7 @@ function createConfigReloadController({ getActiveRunCount, closeRunnerWatchers,
 		await reloadConfig(state.lastChangedAt ?? (/* @__PURE__ */ new Date()).toISOString());
 	}
 	async function setupWatcher() {
-		const nextWatcher = watch(resolve(process.cwd(), "agent-evals.config.ts"), {
+		const nextWatcher = watch([resolve(process.cwd(), "agent-evals.config.ts"), resolve(process.cwd(), ".env")], {
 			awaitWriteFinish: {
 				stabilityThreshold: 100,
 				pollInterval: 20
@@ -904,10 +905,51 @@ function getWatchRootsForIncludePatterns(params) {
 	return [...roots];
 }
 //#endregion
+//#region ../runner/src/workspaceEnv.ts
+const shellEnvKeys = new Set(Object.keys(process.env));
+const appliedWorkspaceEnvValues = /* @__PURE__ */ new Map();
+async function loadWorkspaceEnv(workspaceRoot) {
+	const envPath = resolve(workspaceRoot, ".env");
+	if (!existsSync(envPath)) {
+		applyWorkspaceEnv(/* @__PURE__ */ new Map());
+		return { error: null };
+	}
+	const readResult = await resultify(() => readFile(envPath, "utf-8"));
+	if (readResult.error) return { error: `Failed to read .env at ${envPath}: ${readResult.error.message}` };
+	const parseResult = resultify(() => parseEnv(readResult.value));
+	if (parseResult.error) return { error: `Failed to parse .env at ${envPath}: ${parseResult.error.message}` };
+	applyWorkspaceEnv(new Map(getEnvEntries(parseResult.value)));
+	return { error: null };
+}
+function getEnvEntries(env) {
+	const entries = [];
+	for (const [key, value] of Object.entries(env)) if (value !== void 0) entries.push([key, value]);
+	return entries;
+}
+function applyWorkspaceEnv(nextValues) {
+	for (const [key, previousValue] of appliedWorkspaceEnvValues) {
+		if (nextValues.has(key)) continue;
+		if (process.env[key] === previousValue) delete process.env[key];
+		appliedWorkspaceEnvValues.delete(key);
+	}
+	for (const [key, value] of nextValues) {
+		if (shellEnvKeys.has(key)) continue;
+		process.env[key] = value;
+		appliedWorkspaceEnvValues.set(key, value);
+	}
+}
+//#endregion
 //#region ../runner/src/runner.ts
 const defaultCachePruneIdleDelayMs = 5e3;
-/** Create an in-memory eval runner bound to the current workspace config. */
-function createRunner({ watchForChanges = true } = {}) {
+/**
+* Create an in-memory eval runner bound to the current workspace config.
+*
+* @param options.watchForChanges Watch eval files, run history, config, and
+* workspace `.env` for live reloads.
+* @param options.loadEnv Load `.env` from the current workspace before config,
+* discovery, and runs. Shell-provided values keep precedence.
+*/
+function createRunner({ watchForChanges = true, loadEnv = true } = {}) {
 	let config;
 	let workspaceRoot;
 	let localStateDir;
@@ -1475,6 +1517,10 @@ function createRunner({ watchForChanges = true } = {}) {
 		}
 	};
 	async function loadRunnerState() {
+		if (loadEnv) {
+			const envResult = await loadWorkspaceEnv(process.cwd());
+			if (envResult.error !== null) throw new Error(envResult.error);
+		}
 		config = await loadConfig();
 		workspaceRoot = config.workspaceRoot ?? process.cwd();
 		localStateDir = resolve(workspaceRoot, ".agent-evals");
@@ -2079,7 +2125,6 @@ function parseArgs(argv) {
 */
 async function runCli(argv) {
 	const args = parseArgs(argv);
-	if (args.loadEnv && !loadWorkspaceEnv()) process.exit(1);
 	if (args.showHelp) {
 		if (args.unknownHelpTarget !== void 0) {
 			console.error(`No help found for "${args.unknownHelpTarget}".`);
@@ -2134,18 +2179,6 @@ function fileMatches(pattern, filePath) {
 	const normalized = pattern.replaceAll("\\", "/");
 	return normalized === filePath || globToRegex(normalized).test(filePath);
 }
-function loadWorkspaceEnv() {
-	const envPath = resolve(process.cwd(), ".env");
-	if (!existsSync(envPath)) return true;
-	const loadResult = resultify(() => {
-		process.loadEnvFile(envPath);
-	});
-	if (loadResult.error) {
-		console.error(`Failed to load .env at ${envPath}: ${loadResult.error.message}`);
-		return false;
-	}
-	return true;
-}
 function formatUnknownErrorDetails(error) {
 	if (error instanceof Error) return error.stack ?? error.message;
 	if (typeof error === "string") return error;
@@ -2199,19 +2232,22 @@ async function commandApp(args) {
 	const { serve } = await import("@hono/node-server");
 	const bundledWebDist = resolve(currentDir, "apps/web/dist");
 	if (existsSync(bundledWebDist)) process.env.AGENT_EVALS_WEB_DIST = bundledWebDist;
-	const appModule = await import("./app-opbcrpvt.mjs");
-	const runnerModule = await import("./runner-CIxj7jYj.mjs");
+	const appModule = await import("./app-DPCFFkyQ.mjs");
+	const runnerModule = await import("./runner-XEP21_u9.mjs");
 	if (!isHonoAppModule(appModule)) throw new Error("Server app module is invalid");
 	if (!isServerRunnerModule(runnerModule)) throw new Error("Server runner module is invalid");
-	await runnerModule.initRunner();
+	await runnerModule.initRunner({ loadEnv: args.loadEnv });
 	console.info(`Agent Evals app: http://localhost:${String(args.port)}`);
 	serve({
 		fetch: appModule.app.fetch,
 		port: args.port
 	});
 }
-async function commandList(args_) {
-	const runner = createRunner({ watchForChanges: false });
+async function commandList(args) {
+	const runner = createRunner({
+		watchForChanges: false,
+		loadEnv: args.loadEnv
+	});
 	await runner.init();
 	const discoveryIssues = runner.getDiscoveryIssues();
 	if (discoveryIssues.length > 0) {
@@ -2244,7 +2280,10 @@ async function commandList(args_) {
 	if (discoveryIssues.length > 0) process.exit(1);
 }
 async function commandRun(args) {
-	const runner = createRunner({ watchForChanges: false });
+	const runner = createRunner({
+		watchForChanges: false,
+		loadEnv: args.loadEnv
+	});
 	await runner.init();
 	if (args.evalIds.length === 0 && args.caseIds.length === 0 && args.files.length === 0 && args.tagsFilter.length === 0 && !runner.getAllowCliRunAll()) {
 		console.error("This workspace disables running all evals from the CLI. Pass --eval <id>, --file <path|glob>, --case <id>, or --tags-filter <expr> to run a targeted subset.");
@@ -2333,7 +2372,10 @@ async function commandRun(args) {
 	if (summary.status === "error" || summary.failedCases > 0 || summary.errorCases > 0) process.exit(1);
 }
 async function commandShowRuns(args) {
-	const runner = createRunner({ watchForChanges: false });
+	const runner = createRunner({
+		watchForChanges: false,
+		loadEnv: args.loadEnv
+	});
 	await runner.init();
 	const runRef = args.positionals[0];
 	if (runRef !== void 0) {
@@ -2358,7 +2400,10 @@ async function commandShowRuns(args) {
 	printRunFileIndexes(indexes);
 }
 async function commandCache(args) {
-	const runner = createRunner({ watchForChanges: false });
+	const runner = createRunner({
+		watchForChanges: false,
+		loadEnv: args.loadEnv
+	});
 	await runner.init();
 	if (args.subcommand === "list" || args.subcommand === void 0) {
 		const entries = await runner.listCache();