npm - promptfoo - Versions diffs - 0.121.4 → 0.121.7 - Mend

promptfoo 0.121.4 → 0.121.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (497) hide show

package/dist/src/main.js CHANGED Viewed

@@ -1,34 +1,45 @@
 #!/usr/bin/env node
-import { C as getConfigDirectoryPath, D as getEnvInt, E as getEnvFloat, M as isNonInteractive, N as state, O as getEnvString, T as getEnvBool, _ as extractJsonObjects, a as isDebugEnabled, b as orderKeys, c as setLogCallback, g as extractFirstJsonObject, i as initializeRunLogging, j as isCI, l as setLogLevel, m as sanitizeObject, n as getLogLevel, s as logger, t as closeLogger, w as setConfigDirectoryPath } from "./logger-DksKw1Qc.js";
-import { A as getDefaultPort, F as VERSION, L as HUMAN_ASSERTION_TYPE, M as getShareApiBaseUrl, N as getShareViewBaseUrl, T as CLOUD_PROVIDER_PREFIX, i as fetchWithTimeout, j as getDefaultShareViewBaseUrl, k as TERMINAL_MAX_WIDTH, l as sleep, m as REQUEST_TIMEOUT_MS, n as fetchWithProxy, r as fetchWithRetries, t as clearAgentCache, v as isPromptfooSampleTarget } from "./fetch-BEWnXrrG.js";
-import { t as invariant } from "./invariant-BtWWVVhl.js";
-import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-F9d_5sMC.js";
-import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-DQZ5sVjW.js";
-import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-CQPez_Jp.js";
-import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-Dm9JM6Vb.js";
-import { r as importModule, t as getDirectory } from "./esm-tVgYPY-f.js";
-import { a as getNunjucksEngine, i as extractVariablesFromTemplates, r as extractVariablesFromTemplate, t as renderEnvOnlyInObject } from "./render-CMEpfLaO.js";
-import { a as openAuthBrowser, d as getRemoteHealthUrl, f as neverGenerateRemote, i as checkServerRunning, l as getRemoteGenerationUrl, m as shouldGenerateRemote, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-BNYztJkh.js";
-import { A as renderPrompt, B as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY, C as extractVariablesFromJson, F as TokenUsageTracker, H as getMaxCharsPerMessageModifierValue, P as redteamProviderManager, S as extractPromptFromTags, T as getShortPluginId, V as getGeneratedPromptOverLimit, _ as pluginMatchesStrategyTargets, _t as getUserTeams, a as resolveProviderConfigs, b as extractGoalFromPrompt, bt as resolveTeamFromIdentifier, c as createTransformRequest, ct as canCreateTargets, d as Strategies, dt as getConfigFromCloud, f as loadStrategy, ft as getEvalConfigFromCloud, g as retrieveMedia, gt as getProviderFromCloud, h as mediaExists, i as resolveProvider, l as createTransformResponse, lt as checkCloudPermissions, m as getMediaStorage, mt as getPluginSeverityOverridesFromCloud, n as loadApiProvider, o as MCPProvider, p as validateStrategies, pt as getOrgContext, r as loadApiProviders, s as HttpProvider, t as getProviderIds, ut as getCloudDatabaseId, vt as isCloudProvider, xt as resolveTeamId, yt as makeRequest$1, z as PromptfooHarmfulCompletionProvider } from "./providers-BV_KMZje.js";
-import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-mb7c8hbp.js";
-import { i as isJavascriptFile } from "./fileExtensions-AWa2ZML4.js";
-import { A as getProviderDescription, C as deduplicateTestCases, D as resultIsForTestCase, E as getTestCaseDeduplicationKey, O as checkProviderApiKeys, S as setupEnv, T as filterRuntimeVars, a as ComparisonEvalNotFoundError, c as mergeComparisonTables, f as maybeLoadConfigFromExternalFile, i as writeOutput, k as doesProviderRefMatch, l as fetchCsvFromGoogleSheet, n as createOutputMetadata, o as evalTableToJson, p as maybeLoadFromExternalFile, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, v as readFilters, w as extractRuntimeVars, y as readOutput } from "./util-DxWpWjhc.js";
-import { r as runPython } from "./pythonUtils-CnndUbW-.js";
-import { A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, D as HarmbenchPlugin, E as ImitationPlugin, F as AegisPlugin, I as RedteamGraderBase, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, R as getCustomPolicies, S as PlinyPlugin, T as IntentPlugin, V as fetchHuggingFaceDataset, _ as PoliticsPlugin, a as UnverifiableClaimsPlugin, b as isValidPolicyObject, c as ToolDiscoveryPlugin, d as TeenSafetyDangerousContentPlugin, dt as processPrompts, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, ft as readPrompts, g as PromptExtractionPlugin, h as RbacPlugin, i as VLGuardPlugin, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, lt as doRemoteGrading, m as ShellInjectionPlugin, o as UnsafeBenchPlugin, p as SqlInjectionPlugin, pt as readProviderPromptMap, r as VLSUPlugin, s as ToxicChatPlugin, t as GRADERS, u as TeenSafetyDangerousRoleplayPlugin, ut as getDefaultProviders, v as PolicyPlugin, w as OverreliancePlugin, x as makeInlinePolicyIdSync, y as determinePolicyTypeFromId, z as retryWithDeduplication } from "./graders-eIHhRqoC.js";
-import { i as generateIdFromPrompt } from "./utils-BLJKfv0y.js";
-import { n as sha256, t as randomSequence } from "./createHash-ChI45QR1.js";
-import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CXrvO-wA.js";
-import { n as getBlobUrl, t as getBlobByHash } from "./blobs-BQWqnnvL.js";
-import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-C4CH3zRr.js";
-import { n as isBlobStorageEnabled } from "./extractor-DNSeBVOJ.js";
-import { n as escapeRegExp, t as ellipsize } from "./text-KvuD2Iko.js";
-import { n as getTraceStore } from "./store-CYEy5J2D.js";
-import { c as assertions_default, l as readAssertions, n as evaluate$1, o as accumulateNamedMetric, s as doesPromptRefMatch, u as runAssertions } from "./evaluator-IvuDYSvQ.js";
-import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-CE5G3a7x.js";
-import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-u4UVafl6.js";
-import { t as EvalResult } from "./evalResult-D3hVYFis.js";
-import { t as formatDuration } from "./formatDuration-DZzPsexs.js";
-import { n as shouldUseInkUI } from "./interactiveCheck-CLERUB0c.js";
+import { C as getConfigDirectoryPath, D as getEnvInt, E as getEnvFloat, M as isNonInteractive, N as state, O as getEnvString, T as getEnvBool, _ as extractJsonObjects, a as isDebugEnabled, b as orderKeys, c as setLogCallback, g as extractFirstJsonObject, i as initializeRunLogging, j as isCI, l as setLogLevel, m as sanitizeObject, n as getLogLevel, s as logger, t as closeLogger, w as setConfigDirectoryPath } from "./logger-BbY6ypFL.js";
+import { F as getShareApiBaseUrl, I as getShareViewBaseUrl, M as TERMINAL_MAX_WIDTH, N as getDefaultPort, O as CLOUD_PROVIDER_PREFIX, P as getDefaultShareViewBaseUrl, R as HUMAN_ASSERTION_TYPE, _ as REQUEST_TIMEOUT_MS, f as sleep, i as fetchWithTimeout, n as fetchWithProxy, r as fetchWithRetries, t as clearAgentCache, x as isPromptfooSampleTarget } from "./fetch-DXUnXkVU.js";
+import { n as VERSION } from "./version-eRkNuGv8.js";
+import { t as invariant } from "./invariant-B2Rf6avk.js";
+import { a as getAuthor, c as isLoggedIntoCloud, l as promptForEmailUnverified, n as checkEmailStatusAndMaybeExit, o as getUserEmail, r as clearUserEmail, s as getUserId, t as checkEmailStatus, u as setUserEmail } from "./accounts-CjFnOPmb.js";
+import { a as cloudConfig, c as writeGlobalConfigPartial, o as readGlobalConfig, r as CloudConfig } from "./cloud-D3DiFqH6.js";
+import { n as TelemetryEventSchema, r as telemetry } from "./telemetry-00ezXr_t.js";
+import { $ as HARM_PLUGINS, B as categoryAliases, D as ADDITIONAL_STRATEGIES, E as isUuid, F as getDefaultNFanout, G as ADDITIONAL_PLUGINS, H as riskCategorySeverityMap, J as CANARY_BREAKING_STRATEGY_IDS, K as ALL_PLUGINS, L as isFanoutStrategy, N as STRATEGY_COLLECTIONS, P as STRATEGY_COLLECTION_MAPPINGS, Q as FOUNDATION_PLUGINS, R as isMultiTurnStrategy, S as PluginConfigSchema, U as subCategoryDescriptions, W as ALIASED_PLUGIN_MAPPINGS, X as DEFAULT_PLUGINS, Y as DATASET_EXEMPT_PLUGINS, Z as FINANCIAL_PLUGINS, _ as PromptSchema, _t as CODING_AGENT_PLUGINS, a as EvaluateOptionsSchema, at as MULTI_INPUT_VAR, b as ConversationMessageSchema, c as TestSuiteConfigSchema, dt as REMOTE_ONLY_PLUGIN_IDS, et as INSURANCE_PLUGINS, g as RedteamGenerateOptionsSchema, gt as CODING_AGENT_CORE_PLUGINS, h as RedteamConfigSchema, ht as UNALIGNED_PROVIDER_HARM_PLUGINS, i as EvalResultsFilterMode, it as MULTI_INPUT_EXCLUDED_PLUGINS, j as DEFAULT_STRATEGIES, k as ALL_STRATEGIES, l as TestSuiteSchema, lt as REDTEAM_MODEL, mt as TELECOM_PLUGINS, n as BaseAssertionTypesSchema, nt as LLAMA_GUARD_REPLICATE_PROVIDER, o as OutputFileExtension, ot as PHARMACY_PLUGINS, p as isApiProvider, pt as TEEN_SAFETY_PLUGINS, q as BIAS_PLUGINS, r as CommandLineOptionsSchema, rt as MEDICAL_PLUGINS, s as ResultFailureReason, st as PII_PLUGINS, t as AssertionOrSetSchema, tt as LLAMA_GUARD_ENABLED_CATEGORIES, u as UnifiedConfigSchema, ut as REDTEAM_PROVIDER_HARM_PLUGINS, v as ProviderOptionsSchema, w as StrategyConfigSchema, x as PartialGenerationError, y as ProvidersSchema, z as Severity } from "./types-BFevViUY.js";
+import { i as isJavascriptFile } from "./fileExtensions-D4GCJ67J.js";
+import { r as importModule, t as getDirectory } from "./esm-Bexx2PFc.js";
+import { a as extractVariablesFromTemplates, i as extractVariablesFromTemplate, o as getNunjucksEngine, t as renderEnvOnlyInObject } from "./render-CSP99NLm.js";
+import { a as getRemoteHealthUrl, l as shouldGenerateRemote, n as getRemoteGenerationExplicitlyDisabledError, r as getRemoteGenerationUrl, s as neverGenerateRemote } from "./remoteGeneration-B1_XsKXU.js";
+import { a as openAuthBrowser, i as checkServerRunning, n as BrowserBehaviorNames, o as openBrowser, s as promptUser, t as BrowserBehavior } from "./server-DhMHosWj.js";
+import { _ as makeRequest$1, c as getCloudDatabaseId, d as getOrgContext, f as getPluginSeverityOverridesFromCloud, g as isCloudProvider, h as getUserTeams, i as retrieveMedia, l as getConfigFromCloud, m as getProviderFromCloud, o as canCreateTargets, r as mediaExists, s as checkCloudPermissions, t as getMediaStorage, u as getEvalConfigFromCloud, v as resolveTeamFromIdentifier, y as resolveTeamId } from "./storage-BU4qcnOb.js";
+import { n as sha256, t as randomSequence } from "./createHash-CgRvs4Fn.js";
+import { a as fetchWithCache, n as clearCache, r as disableCache, t as cache_exports } from "./cache-DpPWrkTE.js";
+import { r as runPython } from "./pythonUtils-CgYxeSmO.js";
+import { D as normalizeProviderRef, L as readFilters, M as maybeLoadFromExternalFile, R as readOutput, _ as resultIsForTestCase, a as ComparisonEvalNotFoundError, b as getProviderDescription, c as getEvalTableOutputPromptLocationsBySize, d as fetchCsvFromGoogleSheet, f as setupEnv, g as getTestCaseDeduplicationKey, h as filterRuntimeVars, i as writeOutput, j as maybeLoadConfigFromExternalFile, l as getEvalTablePromptStrippedPayload, m as extractRuntimeVars, n as createOutputMetadata, o as evalTableToJson, p as deduplicateTestCases, r as writeMultipleOutputs, s as generateEvalCsv, t as printBorder, u as mergeComparisonTables, v as checkProviderApiKeys, y as doesProviderRefMatch } from "./util-DURocbYR.js";
+import { a as resolveProviderConfigs, d as MCPProvider, f as HttpProvider, i as resolveProvider, m as createTransformResponse, n as loadApiProvider, p as createTransformRequest, r as loadApiProviders, t as getProviderIds } from "./providers-B9KzWxAX.js";
+import { a as createEmptyTokenUsage, i as createEmptyAssertions, n as accumulateResponseTokenUsage, r as accumulateTokenUsage, t as accumulateAssertionTokenUsage } from "./tokenUsageUtils-CDet74yk.js";
+import { n as escapeRegExp, t as ellipsize } from "./text-Db-Wt2u2.js";
+import { a as evalResultsTable, c as evalsToPromptsTable, d as promptsTable, g as getDb, h as closeDbIfOpen, i as datasetsTable, l as evalsToTagsTable, n as blobReferencesTable, o as evalsTable, p as tagsTable, r as configsTable, s as evalsToDatasetsTable, t as blobAssetsTable, u as modelAuditsTable } from "./tables-DafUHOeh.js";
+import { n as getBlobUrl, t as getBlobByHash } from "./blobs-DODuTK-a.js";
+import { n as createPlaceholderInputValue, r as materializeInputVariablesWithMetadata, t as buildPromptInputDescriptions } from "./inputVariables-DXFdi7AI.js";
+import { a as extractPromptFromTags, i as extractMaterializedVariablesFromJsonWithMetadata, n as extractGoalFromPrompt, r as extractInputVarsFromPrompt, s as getShortPluginId } from "./util-kDURhgJW.js";
+import { n as PromptfooHarmfulCompletionProvider } from "./promptfoo-Rjp-MeBb.js";
+import { A as DivergentRepetitionPlugin, B as sampleArray, C as getPiiLeakTestsForCategory, Ct as getDefaultProviders, D as HarmbenchPlugin, E as ImitationPlugin, F as AegisPlugin, I as RedteamGraderBase, L as RedteamPluginBase, M as CrossSessionLeakPlugin, N as ContractPlugin, O as HallucinationPlugin, P as BeavertailsPlugin, Q as readProviderPromptMap, R as getCustomPolicies, S as PlinyPlugin, T as IntentPlugin, V as fetchHuggingFaceDataset, X as processPrompts, Y as doRemoteGrading, Z as readPrompts, _ as PoliticsPlugin, a as UnverifiableClaimsPlugin, b as isValidPolicyObject, c as ToolDiscoveryPlugin, d as TeenSafetyDangerousContentPlugin, f as TeenSafetyAgeRestrictedGoodsAndServicesPlugin, g as PromptExtractionPlugin, h as RbacPlugin, i as VLGuardPlugin, j as DebugAccessPlugin, k as ExcessiveAgencyPlugin, l as TeenSafetyHarmfulBodyIdealsPlugin, m as ShellInjectionPlugin, o as UnsafeBenchPlugin, p as SqlInjectionPlugin, r as VLSUPlugin, s as ToxicChatPlugin, t as GRADERS, u as TeenSafetyDangerousRoleplayPlugin, v as PolicyPlugin, w as OverreliancePlugin, x as makeInlinePolicyIdSync, y as determinePolicyTypeFromId, z as retryWithDeduplication } from "./graders-BGP99PdK.js";
+import { f as redteamProviderManager, m as TokenUsageTracker } from "./shared-9WHQ1oNE.js";
+import { c as assertions_default, l as readAssertions, n as evaluate$1, o as accumulateNamedMetric, s as doesPromptRefMatch, u as runAssertions } from "./evaluator-DRoiYB2q.js";
+import { i as generateIdFromPrompt } from "./utils-BFOh20Gb.js";
+import { n as TRANSFORM_KEYS, t as INLINE_FUNCTION_LABEL } from "./transform-BnSTnFlp.js";
+import { n as getTraceStore } from "./store-DKd5592Q.js";
+import { n as setupSignalWatcher, t as readSignalEvalId } from "./signal-CSurUUyV.js";
+import { n as isBlobStorageEnabled } from "./extractor-CIW3iN-b.js";
+import { n as getGeneratedPromptOverLimit, r as getMaxCharsPerMessageModifierValue, t as MAX_CHARS_PER_MESSAGE_MODIFIER_KEY } from "./promptLength-4X-Wd8PG.js";
+import { a as createEvalId, n as EvalQueries, s as getEvalSummaries, t as Eval } from "./eval-0VRANImH.js";
+import { n as sanitizeProvider, t as EvalResult } from "./evalResult-eUkJv9Ko.js";
+import { n as renderPrompt } from "./evaluatorHelpers-BsYP_muT.js";
+import { t as formatDuration } from "./formatDuration-CMVNrYvE.js";
+import { i as pluginMatchesStrategyTargets, n as loadStrategy, r as validateStrategies, t as Strategies } from "./strategies-DRJjGTIY.js";
+import { n as shouldUseInkUI } from "./interactiveCheck-C4QlIuoR.js";
 import { fileURLToPath } from "node:url";
 import { Command, InvalidArgumentError } from "commander";
 import { z } from "zod";
@@ -61,9 +72,9 @@ import process$1 from "process";
 import $RefParser from "@apidevtools/json-schema-ref-parser";
 import async from "async";
 import { and, asc, count, desc, eq, inArray, isNotNull, like, or, sql } from "drizzle-orm";
-import cliProgress from "cli-progress";
 import { URL as URL$1 } from "url";
 import editor from "@inquirer/editor";
+import cliProgress from "cli-progress";
 import debounce from "debounce";
 import { LRUCache } from "lru-cache";
 import chokidar from "chokidar";
@@ -100,7 +111,7 @@ function runCommand(program) {
 			hasGithubPr: !!cmdObj.githubPr,
 			hasGuidance: !!(cmdObj.guidance || cmdObj.guidanceFile)
 		});
-		const { executeScan } = await import("./scanner-1DqWi1Ej.js");
+		const { executeScan } = await import("./scanner-DS0109SS.js");
 		await executeScan(repoPath, cmdObj);
 	});
 }
@@ -572,47 +583,11 @@ function filterPrompts(prompts, filterPromptsOption) {
 //#endregion
 //#region src/commands/eval/filterProviders.ts
 /**
-* Checks if a value is a valid provider ID (non-empty string).
-*/
-function isValidProviderId(id) {
-	return id !== null && id !== void 0 && typeof id === "string" && id !== "";
-}
-/**
 * Extracts the id and label from a raw provider config without instantiating it.
 * Handles all provider config formats: string, function, ProviderOptions, ProviderOptionsMap.
 */
 function getProviderIdAndLabel(provider, index) {
-	if (typeof provider === "string") return { id: provider };
-	if (typeof provider === "function") {
-		const label = provider.label;
-		return {
-			id: label ?? `custom-function-${index}`,
-			label
-		};
-	}
-	const providerId = provider.id;
-	if ("id" in provider && isValidProviderId(providerId)) return {
-		id: providerId,
-		label: provider.label
-	};
-	const keys = Object.keys(provider);
-	if (keys.length > 0) {
-		const id = keys[0];
-		const value = provider[id];
-		if (typeof value === "object" && value !== null) return {
-			id: value.id || id,
-			label: value.label
-		};
-	}
-	const label = provider.label;
-	if (isValidProviderId(label)) return {
-		id: label,
-		label
-	};
-	return {
-		id: `unknown-${index}`,
-		label
-	};
+	return normalizeProviderRef(provider, { index });
 }
 /**
 * Filters raw provider configs BEFORE instantiation.
@@ -3030,6 +3005,10 @@ function stripAuthFromUrl(urlString) {
 	}
 }
 async function handleEmailCollection(evalRecord) {
+	if (evalRecord.author) {
+		logger.debug(`[Share] Skipping email collection because author is already set`, { evalId: evalRecord.id });
+		return;
+	}
 	if (!process.stdout.isTTY || isCI() || getEnvBool("PROMPTFOO_DISABLE_SHARE_EMAIL_REQUEST")) return;
 	let email = getUserEmail();
 	if (!email) {
@@ -3208,7 +3187,7 @@ function generateTable(evaluateTable, tableCellMaxLength = 250, maxRows = 25) {
 	for (const row of evaluateTable.body.slice(0, maxRows)) table.push([...row.vars.map((v) => ellipsize(v, tableCellMaxLength)), ...row.outputs.map(({ pass, text, failureReason: failureType }) => {
 		text = ellipsize(text, tableCellMaxLength);
 		if (pass) return chalk.green("[PASS] ") + text;
-		else return chalk.red(failureType === ResultFailureReason.ASSERT ? "[FAIL] " : "[ERROR] ") + text.split("---").map((c, idx) => idx === 0 ? chalk.red.bold(c) : c).join("---");
+		return chalk.red(failureType === ResultFailureReason.ASSERT ? "[FAIL] " : "[ERROR] ") + text.split("---").map((c, idx) => idx === 0 ? chalk.red.bold(c) : c).join("---");
 	})]);
 	return table.toString();
 }
@@ -3292,7 +3271,124 @@ function shouldShareResults(opts) {
 	return cloudConfig.isEnabled() && sharing !== false;
 }
 //#endregion
+//#region src/commands/eval/redteamWarning.ts
+function warnIfRedteamConfigHasNoTests(config, testSuite) {
+	if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
+        Warning: Config file has a redteam section but no test cases.
+        Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
+        `));
+}
+//#endregion
 //#region src/commands/eval/summary.ts
+function getCompletionMessage({ completionType, evalId, shareableUrl, wasAborted, writeToDatabase, activelySharing }) {
+	if (wasAborted) {
+		const idSuffix = writeToDatabase ? ` (ID: ${chalk.cyan(evalId)})` : "";
+		return `${chalk.red("✗")} ${completionType} aborted${idSuffix}`;
+	}
+	if (writeToDatabase && shareableUrl) return `${chalk.green("✓")} ${completionType} complete: ${shareableUrl}`;
+	if (writeToDatabase && activelySharing) return `${chalk.green("✓")} ${completionType} complete`;
+	if (writeToDatabase) return `${chalk.green("✓")} ${completionType} complete (ID: ${chalk.cyan(evalId)})`;
+	return `${chalk.green("✓")} ${completionType} complete`;
+}
+function getAbortSummaryLines(targetErrorStatus) {
+	if (targetErrorStatus == null) return [];
+	return [
+		"",
+		chalk.red.bold("Scan stopped: Target is unavailable and will not recover on retry."),
+		chalk.red(`  Target returned HTTP ${targetErrorStatus}`),
+		"",
+		chalk.yellow("Possible causes:"),
+		chalk.yellow("  • Invalid API key or authentication (401/403)"),
+		chalk.yellow("  • Target endpoint does not exist (404)"),
+		chalk.yellow("  • Server does not support the request (501)"),
+		"",
+		chalk.cyan("To fix: Check your target configuration and credentials.")
+	];
+}
+function getGuidanceLines({ writeToDatabase, shareableUrl, wantsToShare, activelySharing, hasExplicitDisable, cloudEnabled }) {
+	if (!writeToDatabase || shareableUrl || wantsToShare || activelySharing) return [];
+	const lines = ["", `» View results: ${chalk.green.bold("promptfoo view")}`];
+	if (!hasExplicitDisable) lines.push(cloudEnabled ? `» Create shareable URL: ${chalk.green.bold("promptfoo share")}` : `» Share with your team: ${chalk.green.bold("https://promptfoo.app")}`);
+	lines.push(`» Feedback: ${chalk.green.bold("https://promptfoo.dev/feedback")}`);
+	return lines;
+}
+function buildUsageDetails(usage, total) {
+	const parts = [];
+	if (usage.prompt && usage.prompt > 0) parts.push(`${usage.prompt.toLocaleString()} prompt`);
+	if (usage.completion && usage.completion > 0) parts.push(`${usage.completion.toLocaleString()} completion`);
+	if (usage.cached && usage.cached > 0) parts.push(usage.cached === total && parts.length === 0 ? "cached" : `${usage.cached.toLocaleString()} cached`);
+	if (usage.completionDetails?.reasoning && usage.completionDetails.reasoning > 0) parts.push(`${usage.completionDetails.reasoning.toLocaleString()} reasoning`);
+	return parts;
+}
+function getTokenUsageLines(tokenUsage, isRedteam, tracker) {
+	const hasEvalTokens = (tokenUsage.total || 0) > 0 || (tokenUsage.prompt || 0) + (tokenUsage.completion || 0) > 0;
+	const hasGradingTokens = tokenUsage.assertions && (tokenUsage.assertions.total || 0) > 0;
+	if (!hasEvalTokens && !hasGradingTokens) return [];
+	const combinedTotal = (tokenUsage.prompt || 0) + (tokenUsage.completion || 0);
+	const evalTokens = {
+		prompt: tokenUsage.prompt || 0,
+		completion: tokenUsage.completion || 0,
+		total: tokenUsage.total || combinedTotal,
+		cached: tokenUsage.cached || 0,
+		numRequests: tokenUsage.numRequests || 0,
+		completionDetails: tokenUsage.completionDetails || {
+			reasoning: 0,
+			acceptedPrediction: 0,
+			rejectedPrediction: 0
+		}
+	};
+	const lines = [`${chalk.bold("Total Tokens:")} ${chalk.white.bold((evalTokens.total + (tokenUsage.assertions?.total || 0)).toLocaleString())}`];
+	if (isRedteam && tokenUsage.numRequests) lines.push(`  ${chalk.gray("Probes:")} ${chalk.white(tokenUsage.numRequests.toLocaleString())}`);
+	if (evalTokens.total > 0) {
+		const evalParts = buildUsageDetails(evalTokens, evalTokens.total);
+		lines.push(`  ${chalk.gray("Eval:")} ${chalk.white(evalTokens.total.toLocaleString())} (${evalParts.join(", ")})`);
+	}
+	if (tokenUsage.assertions?.total && tokenUsage.assertions.total > 0) {
+		const gradingParts = buildUsageDetails(tokenUsage.assertions, tokenUsage.assertions.total);
+		lines.push(`  ${chalk.gray("Grading:")} ${chalk.white(tokenUsage.assertions.total.toLocaleString())} (${gradingParts.join(", ")})`);
+	}
+	lines.push(...getProviderUsageLines(tracker));
+	return lines;
+}
+function getProviderUsageLines(tracker) {
+	const providerIds = tracker.getProviderIds();
+	if (providerIds.length <= 1) return [];
+	const sortedProviders = providerIds.map((id) => ({
+		id,
+		usage: tracker.getProviderUsage(id)
+	})).filter((p) => p.usage != null).sort((a, b) => (b.usage.total || 0) - (a.usage.total || 0));
+	const lines = ["", chalk.bold("Providers:")];
+	for (const { id, usage } of sortedProviders) {
+		if ((usage.total || 0) === 0 && (usage.prompt || 0) + (usage.completion || 0) === 0) continue;
+		const displayTotal = usage.total || (usage.prompt || 0) + (usage.completion || 0);
+		const displayId = id.includes(" (") ? id.substring(0, id.indexOf(" (")) : id;
+		const details = buildUsageDetails(usage, displayTotal);
+		const requestInfo = `${usage.numRequests || 0} requests`;
+		const separator = details.length > 0 ? "; " : "";
+		lines.push(`  ${chalk.gray(`${displayId}:`)} ${chalk.white(displayTotal.toLocaleString())} (${requestInfo}${separator}${details.join(", ")})`);
+	}
+	return lines;
+}
+function formatResultPercentage(count, totalTests) {
+	const percentage = totalTests === 0 ? 0 : count / totalTests * 100;
+	return percentage === 0 || percentage === 100 ? `${percentage.toFixed(0)}%` : `${percentage.toFixed(2)}%`;
+}
+function formatResultLine(count, label, icon, iconColor, totalTests) {
+	return `  ${icon ? `${iconColor(icon)} ` : ""}${chalk.white.bold(count.toLocaleString())} ${chalk.white(label)} ${chalk.gray(`(${formatResultPercentage(count, totalTests)})`)}`;
+}
+function getResultsLines({ successes, failures, errors, duration, maxConcurrency }) {
+	const totalTests = successes + failures + errors;
+	const errorLabel = errors === 1 ? "error" : "errors";
+	return [
+		"",
+		chalk.bold("Results:"),
+		formatResultLine(successes, "passed", successes > 0 ? "✓" : void 0, chalk.green, totalTests),
+		formatResultLine(failures, "failed", failures > 0 ? "✗" : void 0, chalk.red, totalTests),
+		formatResultLine(errors, errorLabel, errors > 0 ? "✗" : void 0, chalk.red, totalTests),
+		chalk.gray(`Duration: ${formatDuration(duration)} (concurrency: ${maxConcurrency})`),
+		""
+	];
+}
 /**
 * Generate formatted evaluation summary output for CLI display.
 *
@@ -3331,115 +3427,28 @@ function shouldShareResults(opts) {
 * ```
 */
 function generateEvalSummary(params) {
-	const { evalId, isRedteam, writeToDatabase, shareableUrl, wantsToShare, hasExplicitDisable, cloudEnabled, activelySharing = false, tokenUsage, successes, failures, errors, duration, maxConcurrency, tracker, targetErrorStatus } = params;
-	const lines = [];
-	const completionType = isRedteam ? "Red team" : "Eval";
-	const wasAborted = targetErrorStatus != null;
-	let completionMessage;
-	if (wasAborted) {
-		completionMessage = `${chalk.red("✗")} ${completionType} aborted`;
-		if (writeToDatabase) completionMessage += ` (ID: ${chalk.cyan(evalId)})`;
-	} else if (writeToDatabase && shareableUrl) completionMessage = `${chalk.green("✓")} ${completionType} complete: ${shareableUrl}`;
-	else if (writeToDatabase && activelySharing) completionMessage = `${chalk.green("✓")} ${completionType} complete`;
-	else if (writeToDatabase) completionMessage = `${chalk.green("✓")} ${completionType} complete (ID: ${chalk.cyan(evalId)})`;
-	else completionMessage = `${chalk.green("✓")} ${completionType} complete`;
-	lines.push(completionMessage);
-	if (wasAborted && targetErrorStatus != null) {
-		lines.push("");
-		lines.push(chalk.red.bold("Scan stopped: Target is unavailable and will not recover on retry."));
-		lines.push(chalk.red(`  Target returned HTTP ${targetErrorStatus}`));
-		lines.push("");
-		lines.push(chalk.yellow("Possible causes:"));
-		lines.push(chalk.yellow("  • Invalid API key or authentication (401/403)"));
-		lines.push(chalk.yellow("  • Target endpoint does not exist (404)"));
-		lines.push(chalk.yellow("  • Server does not support the request (501)"));
-		lines.push("");
-		lines.push(chalk.cyan("To fix: Check your target configuration and credentials."));
-	}
-	if (writeToDatabase && !shareableUrl && !wantsToShare && !activelySharing) {
-		lines.push("");
-		lines.push(`» View results: ${chalk.green.bold("promptfoo view")}`);
-		if (!hasExplicitDisable) if (cloudEnabled) lines.push(`» Create shareable URL: ${chalk.green.bold("promptfoo share")}`);
-		else lines.push(`» Share with your team: ${chalk.green.bold("https://promptfoo.app")}`);
-		lines.push(`» Feedback: ${chalk.green.bold("https://promptfoo.dev/feedback")}`);
-	}
-	lines.push("");
-	const hasEvalTokens = (tokenUsage.total || 0) > 0 || (tokenUsage.prompt || 0) + (tokenUsage.completion || 0) > 0;
-	const hasGradingTokens = tokenUsage.assertions && (tokenUsage.assertions.total || 0) > 0;
-	if (hasEvalTokens || hasGradingTokens) {
-		const combinedTotal = (tokenUsage.prompt || 0) + (tokenUsage.completion || 0);
-		const evalTokens = {
-			prompt: tokenUsage.prompt || 0,
-			completion: tokenUsage.completion || 0,
-			total: tokenUsage.total || combinedTotal,
-			cached: tokenUsage.cached || 0,
-			completionDetails: tokenUsage.completionDetails || {
-				reasoning: 0,
-				acceptedPrediction: 0,
-				rejectedPrediction: 0
-			}
-		};
-		const grandTotal = evalTokens.total + (tokenUsage.assertions?.total || 0);
-		lines.push(`${chalk.bold("Total Tokens:")} ${chalk.white.bold(grandTotal.toLocaleString())}`);
-		if (isRedteam && tokenUsage.numRequests) lines.push(`  ${chalk.gray("Probes:")} ${chalk.white(tokenUsage.numRequests.toLocaleString())}`);
-		if (evalTokens.total > 0) {
-			const evalParts = [];
-			if (evalTokens.prompt > 0) evalParts.push(`${evalTokens.prompt.toLocaleString()} prompt`);
-			if (evalTokens.completion > 0) evalParts.push(`${evalTokens.completion.toLocaleString()} completion`);
-			if (evalTokens.cached > 0) if (evalTokens.cached === evalTokens.total && evalParts.length === 0) evalParts.push("cached");
-			else evalParts.push(`${evalTokens.cached.toLocaleString()} cached`);
-			if (evalTokens.completionDetails?.reasoning && evalTokens.completionDetails.reasoning > 0) evalParts.push(`${evalTokens.completionDetails.reasoning.toLocaleString()} reasoning`);
-			lines.push(`  ${chalk.gray("Eval:")} ${chalk.white(evalTokens.total.toLocaleString())} (${evalParts.join(", ")})`);
-		}
-		if (tokenUsage.assertions && tokenUsage.assertions.total && tokenUsage.assertions.total > 0) {
-			const gradingParts = [];
-			if (tokenUsage.assertions.prompt && tokenUsage.assertions.prompt > 0) gradingParts.push(`${tokenUsage.assertions.prompt.toLocaleString()} prompt`);
-			if (tokenUsage.assertions.completion && tokenUsage.assertions.completion > 0) gradingParts.push(`${tokenUsage.assertions.completion.toLocaleString()} completion`);
-			if (tokenUsage.assertions.cached && tokenUsage.assertions.cached > 0) if (tokenUsage.assertions.cached === tokenUsage.assertions.total && gradingParts.length === 0) gradingParts.push("cached");
-			else gradingParts.push(`${tokenUsage.assertions.cached.toLocaleString()} cached`);
-			if (tokenUsage.assertions.completionDetails?.reasoning && tokenUsage.assertions.completionDetails.reasoning > 0) gradingParts.push(`${tokenUsage.assertions.completionDetails.reasoning.toLocaleString()} reasoning`);
-			lines.push(`  ${chalk.gray("Grading:")} ${chalk.white(tokenUsage.assertions.total.toLocaleString())} (${gradingParts.join(", ")})`);
-		}
-		const providerIds = tracker.getProviderIds();
-		if (providerIds.length > 1) {
-			lines.push("");
-			lines.push(chalk.bold("Providers:"));
-			const sortedProviders = providerIds.map((id) => ({
-				id,
-				usage: tracker.getProviderUsage(id)
-			})).filter((p) => p.usage != null).sort((a, b) => (b.usage.total || 0) - (a.usage.total || 0));
-			for (const { id, usage } of sortedProviders) if ((usage.total || 0) > 0 || (usage.prompt || 0) + (usage.completion || 0) > 0) {
-				const displayTotal = usage.total || (usage.prompt || 0) + (usage.completion || 0);
-				const displayId = id.includes(" (") ? id.substring(0, id.indexOf(" (")) : id;
-				const details = [];
-				if (usage.prompt && usage.prompt > 0) details.push(`${usage.prompt.toLocaleString()} prompt`);
-				if (usage.completion && usage.completion > 0) details.push(`${usage.completion.toLocaleString()} completion`);
-				if (usage.cached && usage.cached > 0) if (usage.cached === displayTotal && details.length === 0) details.push("cached");
-				else details.push(`${usage.cached.toLocaleString()} cached`);
-				if (usage.completionDetails?.reasoning && usage.completionDetails.reasoning > 0) details.push(`${usage.completionDetails.reasoning.toLocaleString()} reasoning`);
-				const breakdown = ` (${`${usage.numRequests || 0} requests`}${details.length > 0 ? "; " : ""}${details.join(", ")})`;
-				lines.push(`  ${chalk.gray(displayId + ":")} ${chalk.white(displayTotal.toLocaleString())}${breakdown}`);
-			}
-		}
-	}
-	lines.push("");
-	const totalTests = successes + failures + errors;
-	const formatResultPercentage = (count) => {
-		const percentage = totalTests === 0 ? 0 : count / totalTests * 100;
-		return percentage === 0 || percentage === 100 ? `${percentage.toFixed(0)}%` : `${percentage.toFixed(2)}%`;
-	};
-	const formatResultLine = (count, label, icon, iconColor) => {
-		return `  ${icon ? `${iconColor(icon)} ` : ""}${chalk.white.bold(count.toLocaleString())} ${chalk.white(label)} ${chalk.gray(`(${formatResultPercentage(count)})`)}`;
-	};
-	const errorLabel = errors === 1 ? "error" : "errors";
-	lines.push(chalk.bold("Results:"));
-	lines.push(formatResultLine(successes, "passed", successes > 0 ? "✓" : void 0, chalk.green));
-	lines.push(formatResultLine(failures, "failed", failures > 0 ? "✗" : void 0, chalk.red));
-	lines.push(formatResultLine(errors, errorLabel, errors > 0 ? "✗" : void 0, chalk.red));
-	const durationDisplay = formatDuration(duration);
-	lines.push(chalk.gray(`Duration: ${durationDisplay} (concurrency: ${maxConcurrency})`));
-	lines.push("");
-	return lines;
+	return [
+		getCompletionMessage({
+			completionType: params.isRedteam ? "Red team" : "Eval",
+			evalId: params.evalId,
+			shareableUrl: params.shareableUrl,
+			wasAborted: params.targetErrorStatus != null,
+			writeToDatabase: params.writeToDatabase,
+			activelySharing: params.activelySharing ?? false
+		}),
+		...getAbortSummaryLines(params.targetErrorStatus),
+		...getGuidanceLines({
+			writeToDatabase: params.writeToDatabase,
+			shareableUrl: params.shareableUrl,
+			wantsToShare: params.wantsToShare,
+			activelySharing: params.activelySharing ?? false,
+			hasExplicitDisable: params.hasExplicitDisable,
+			cloudEnabled: params.cloudEnabled
+		}),
+		"",
+		...getTokenUsageLines(params.tokenUsage, params.isRedteam, params.tracker),
+		...getResultsLines(params)
+	];
 }
 //#endregion
 //#region src/commands/retry.ts
@@ -3657,6 +3666,9 @@ function setupRetryCommand(program) {
 }
 //#endregion
 //#region src/models/modelAudit.ts
+function getModelAuditSortColumn(sortField) {
+	return modelAuditsTable[sortField];
+}
 function createScanId(createdAt = /* @__PURE__ */ new Date()) {
 	return `scan-${randomSequence(3)}-${createdAt.toISOString().slice(0, 19)}`;
 }
@@ -3697,8 +3709,10 @@ var ModelAudit = class ModelAudit {
 		this.issues = data.issues || data.results?.issues || null;
 		const issues = data.issues || data.results?.issues;
 		const resultsHasErrors = data.results?.has_errors ?? false;
-		if (data.hasErrors === void 0) this.hasErrors = resultsHasErrors || issues && issues.some((issue) => issue.severity === "critical" || issue.severity === "error") || false;
-		else this.hasErrors = data.hasErrors;
+		if (data.hasErrors === void 0) {
+			const hasActualErrors = resultsHasErrors || issues && issues.some((issue) => issue.severity === "critical" || issue.severity === "error") || false;
+			this.hasErrors = hasActualErrors;
+		} else this.hasErrors = data.hasErrors;
 		this.totalChecks = data.totalChecks;
 		this.passedChecks = data.passedChecks;
 		this.failedChecks = data.failedChecks;
@@ -3796,9 +3810,9 @@ var ModelAudit = class ModelAudit {
 	static async getMany(limit = 100, offset = 0, sortField = "createdAt", sortOrder = "desc", search) {
 		let query = getDb().select().from(modelAuditsTable);
 		if (search) query = query.where(or(like(modelAuditsTable.name, `%${search}%`), like(modelAuditsTable.modelPath, `%${search}%`), like(modelAuditsTable.id, `%${search}%`)));
-		const sortColumn = sortField === "name" ? modelAuditsTable.name : sortField === "modelPath" ? modelAuditsTable.modelPath : modelAuditsTable.createdAt;
-		if (sortOrder === "asc") query = query.orderBy(asc(sortColumn));
-		else query = query.orderBy(desc(sortColumn));
+		const sortColumn = getModelAuditSortColumn(sortField);
+		if (sortOrder === "asc") query = sortField === "id" ? query.orderBy(asc(sortColumn)) : query.orderBy(asc(sortColumn), asc(modelAuditsTable.id));
+		else query = sortField === "id" ? query.orderBy(desc(sortColumn)) : query.orderBy(desc(sortColumn), desc(modelAuditsTable.id));
 		return (await query.limit(limit).offset(offset).all()).map((r) => new ModelAudit({
 			...r,
 			persisted: true
@@ -4157,14 +4171,11 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
 			state.resume = true;
 			state.retryMode = true;
 		} else ({config, testSuite, basePath: _basePath, commandLineOptions} = await resolveConfigs(cmdObj, defaultConfig));
-		if (!cmdObj.envPath && commandLineOptions?.envPath) {
+		if ((!cmdObj.envPath || cmdObj.envPath.length === 0) && commandLineOptions?.envPath) {
 			logger.debug(`Loading additional environment from config: ${commandLineOptions.envPath}`);
 			setupEnv(commandLineOptions.envPath);
 		}
-		if (config.redteam && (!testSuite.tests || testSuite.tests.length === 0) && (!testSuite.scenarios || testSuite.scenarios.length === 0)) logger.warn(chalk.yellow(dedent`
-        Warning: Config file has a redteam section but no test cases.
-        Did you mean to run ${chalk.bold("promptfoo redteam generate")} instead?
-        `));
+		warnIfRedteamConfigHasNoTests(config, testSuite);
 		if (config.redteam && Array.isArray(config.providers) && config.providers.length > 0 && typeof config.providers[0] === "object" && config.providers[0].id === "http") {
 			const maybeUrl = config.providers[0]?.config?.url;
 			if (typeof maybeUrl === "string" && maybeUrl.includes("promptfoo.app")) telemetry.record("feature_used", { feature: "redteam_run_with_example" });
@@ -4272,7 +4283,14 @@ async function doEval(cmdObj, defaultConfig, defaultConfigPath, evaluateOptions)
         ${z.prettifyError(testSuiteSchema.error)}
       Please review your promptfooconfig.yaml configuration.`));
-		const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, { runtimeOptions: options }) : new Eval(config, { runtimeOptions: options });
+		const author = getAuthor();
+		const evalRecord = resumeEval ? resumeEval : cmdObj.write ? await Eval.create(config, testSuite.prompts, {
+			author,
+			runtimeOptions: options
+		}) : new Eval(config, {
+			author,
+			runtimeOptions: options
+		});
 		const abortController = new AbortController();
 		const previousAbortSignal = evaluateOptions.abortSignal;
 		evaluateOptions.abortSignal = previousAbortSignal ? AbortSignal.any([previousAbortSignal, abortController.signal]) : abortController.signal;
@@ -6045,25 +6063,45 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
 	getAssertions(_prompt) {
 		return getHarmfulAssertions(this.harmCategory);
 	}
-	promptsToTestCases(prompts) {
+	async promptsToTestCases(prompts) {
 		const hasMultipleInputs = this.config.inputs && Object.keys(this.config.inputs).length > 0;
 		const harmCategoryLabel = HARM_PLUGINS[this.harmCategory] || this.harmCategory;
-		return prompts.map(({ __prompt }) => {
+		const pluginId = getShortPluginId(this.harmCategory);
+		return Promise.all([...prompts].sort((a, b) => a.__prompt.localeCompare(b.__prompt)).map(async ({ __prompt }, materializationIndex) => {
 			const vars = { [this.injectVar]: __prompt };
-			if (hasMultipleInputs) try {
-				const parsed = JSON.parse(__prompt);
-				Object.assign(vars, extractVariablesFromJson(parsed, this.config.inputs));
-			} catch {}
+			let inputMaterialization;
+			if (hasMultipleInputs) {
+				let parsed;
+				try {
+					parsed = JSON.parse(__prompt);
+				} catch (error) {
+					logger.debug("[AlignedHarmful] Could not parse prompt as JSON for multi-input mode", { error });
+				}
+				if (parsed) try {
+					const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, this.config.inputs, {
+						materializationIndex,
+						pluginId,
+						provider: this.provider,
+						purpose: this.purpose
+					});
+					Object.assign(vars, materializedVars.vars);
+					inputMaterialization = materializedVars.metadata;
+				} catch (error) {
+					logger.debug("[AlignedHarmful] Failed to materialize prompt inputs", { error });
+					throw error;
+				}
+			}
 			return {
 				vars,
 				metadata: {
 					harmCategory: harmCategoryLabel,
-					pluginId: getShortPluginId(this.harmCategory),
-					pluginConfig: this.config
+					pluginId,
+					pluginConfig: this.config,
+					...inputMaterialization ? { inputMaterialization } : {}
 				},
 				assert: getHarmfulAssertions(this.harmCategory)
 			};
-		});
+		}));
 	}
 };
 //#endregion
@@ -6072,20 +6110,37 @@ var AlignedHarmfulPlugin = class extends RedteamPluginBase {
 * Extract content from <Prompt> tags and parse JSON if inputs are defined.
 * Returns the processed prompt and any additional vars extracted from JSON.
 */
-function processPromptForInputs(prompt, _injectVar, inputs) {
+async function processPromptForInputs(prompt, inputs, plugin, provider, purpose, materializationIndex) {
 	let processedPrompt = prompt.trim();
 	const additionalVars = {};
+	let additionalMetadata;
 	const extractedPrompt = extractPromptFromTags(processedPrompt);
 	if (extractedPrompt) processedPrompt = extractedPrompt;
-	if (inputs && Object.keys(inputs).length > 0) try {
-		const parsed = JSON.parse(processedPrompt);
-		Object.assign(additionalVars, extractVariablesFromJson(parsed, inputs));
-	} catch {
-		logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode");
+	if (inputs && Object.keys(inputs).length > 0) {
+		let parsed;
+		try {
+			parsed = JSON.parse(processedPrompt);
+		} catch (error) {
+			logger.debug("[Harmful] Could not parse prompt as JSON for multi-input mode", { error });
+		}
+		if (parsed) try {
+			const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(parsed, inputs, {
+				materializationIndex,
+				pluginId: plugin,
+				provider,
+				purpose
+			});
+			Object.assign(additionalVars, materializedVars.vars);
+			additionalMetadata = materializedVars.metadata;
+		} catch (error) {
+			logger.debug("[Harmful] Failed to materialize prompt inputs", { error });
+			throw error;
+		}
 	}
 	return {
 		processedPrompt,
-		additionalVars
+		additionalVars,
+		additionalMetadata
 	};
 }
 async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, plugin) {
@@ -6106,15 +6161,19 @@ async function getHarmfulTests({ purpose, injectVar, n, delayMs = 0, config }, p
 	};
 	const allPrompts = await retryWithDeduplication(generatePrompts, n);
 	const inputs = config?.inputs;
-	return sampleArray(allPrompts, n).map((prompt) => {
-		const { processedPrompt, additionalVars } = processPromptForInputs(prompt, injectVar, inputs);
+	return Promise.all(sampleArray(allPrompts, n).map(async (prompt, materializationIndex) => {
+		const { processedPrompt, additionalVars, additionalMetadata } = await processPromptForInputs(prompt, inputs, plugin, unalignedProvider, purpose, materializationIndex);
 		const testCase = createTestCase(injectVar, processedPrompt, plugin);
 		if (Object.keys(additionalVars).length > 0) testCase.vars = {
 			...testCase.vars,
 			...additionalVars
 		};
+		if (additionalMetadata) testCase.metadata = {
+			...testCase.metadata,
+			inputMaterialization: additionalMetadata
+		};
 		return testCase;
-	});
+	}));
 }
 //#endregion
 //#region src/redteam/plugins/teenSafety/graderExamples.ts
@@ -6432,7 +6491,7 @@ const MAX_CHARS_RETRY_MODIFIER_KEY = "__maxCharsPerMessageRetry";
 function computeModifiersFromConfig(config) {
 	const modifiers = { ...config?.modifiers };
 	if (config?.language && typeof config.language === "string") modifiers.language = config.language;
-	if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(config.inputs).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
+	if (config?.inputs && Object.keys(config.inputs).length > 0) modifiers.__outputFormat = `Output each test case as JSON wrapped in <Prompt> tags: <Prompt>{${Object.entries(buildPromptInputDescriptions(config.inputs) ?? {}).map(([k, description]) => `"${k}": "${description}"`).join(", ")}}</Prompt>`;
 	const maxCharsModifier = getMaxCharsPerMessageModifierValue(config?.maxCharsPerMessage);
 	if (maxCharsModifier) modifiers[MAX_CHARS_PER_MESSAGE_MODIFIER_KEY] = maxCharsModifier;
 	return modifiers;
@@ -6517,9 +6576,10 @@ function dedupeTestCases(testCases) {
 	return deduped;
 }
 function buildMaxCharsRetryInstructions(rejectedPromptLengths, limit) {
+	const longestRejectedPromptText = rejectedPromptLengths.length > 0 ? `${Math.max(...rejectedPromptLengths)} characters` : "unknown length";
 	return dedent`
     Your previous response included ${rejectedPromptLengths.length} generated prompt${rejectedPromptLengths.length === 1 ? "" : "s"} that exceeded the ${limit ?? "configured"}-character limit.
-    The longest rejected prompt was ${Math.max(...rejectedPromptLengths)} characters.
+    The longest rejected prompt was ${longestRejectedPromptText}.
     Generate replacement prompts only, and keep every user message within the character limit.
   `.trim();
 }
@@ -6597,6 +6657,31 @@ async function fetchRemoteTestCases(key, purpose, injectVar, n, config) {
 		return [];
 	}
 }
+async function materializeRemoteTestCaseInputs({ config, injectVar, pluginId, provider, purpose, testCases }) {
+	const inputs = config.inputs;
+	if (!inputs || Object.keys(inputs).length === 0) return testCases;
+	return Promise.all(testCases.map(async (testCase, materializationIndex) => {
+		const inputVars = extractInputVarsFromPrompt(String(testCase.vars?.[injectVar] ?? ""), inputs);
+		if (!inputVars) return testCase;
+		const materializedVars = await materializeInputVariablesWithMetadata(inputVars, inputs, {
+			materializationIndex,
+			pluginId,
+			provider,
+			purpose
+		});
+		return {
+			...testCase,
+			vars: {
+				...testCase.vars || {},
+				...materializedVars.vars
+			},
+			metadata: {
+				...testCase.metadata || {},
+				...materializedVars.metadata ? { inputMaterialization: materializedVars.metadata } : {}
+			}
+		};
+	}));
+}
 function createPluginFactory(PluginClass, key, validate) {
 	return {
 		key,
@@ -6607,13 +6692,21 @@ function createPluginFactory(PluginClass, key, validate) {
 				logger.debug(`Using local redteam generation for ${key}`);
 				return new PluginClass(provider, purpose, injectVar, configWithDefaults).generateTests(n, delayMs);
 			}
-			const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
+			const pluginId = getShortPluginId(key);
+			const testCases = await materializeRemoteTestCaseInputs({
+				config: configWithDefaults ?? {},
+				injectVar,
+				pluginId,
+				provider,
+				purpose,
+				testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
+			});
 			const computedModifiers = computeModifiersFromConfig(configWithDefaults);
 			return testCases.map((testCase) => ({
 				...testCase,
 				metadata: {
 					...testCase.metadata,
-					pluginId: getShortPluginId(key),
+					pluginId,
 					pluginConfig: {
 						...configWithDefaults,
 						modifiers: computedModifiers
@@ -6670,7 +6763,7 @@ const pluginFactories = [
 		key: category,
 		action: async (params) => {
 			if (neverGenerateRemote()) {
-				logger.error(`${category} plugin requires remote generation to be enabled`);
+				logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
 				return [];
 			}
 			const testCases = await getHarmfulTests(params, category);
@@ -6693,13 +6786,21 @@ const piiPlugins = PII_PLUGINS.map((category) => ({
 	key: category,
 	action: async (params) => {
 		if (shouldGenerateRemote()) {
-			const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
+			const pluginId = getShortPluginId(category);
+			const testCases = await materializeRemoteTestCaseInputs({
+				config: params.config ?? {},
+				injectVar: params.injectVar,
+				pluginId,
+				provider: params.provider,
+				purpose: params.purpose,
+				testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
+			});
 			const computedModifiers = computeModifiersFromConfig(params.config);
 			return testCases.map((testCase) => ({
 				...testCase,
 				metadata: {
 					...testCase.metadata,
-					pluginId: getShortPluginId(category),
+					pluginId,
 					pluginConfig: {
 						...params.config,
 						modifiers: computedModifiers
@@ -6721,16 +6822,24 @@ const biasPlugins = BIAS_PLUGINS.map((category) => ({
 	key: category,
 	action: async (params) => {
 		if (neverGenerateRemote()) {
-			logger.error(`${category} plugin requires remote generation to be enabled`);
+			logger.error(getRemoteGenerationExplicitlyDisabledError(`${category} plugin`));
 			return [];
 		}
-		const testCases = await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {});
+		const pluginId = getShortPluginId(category);
+		const testCases = await materializeRemoteTestCaseInputs({
+			config: params.config ?? {},
+			injectVar: params.injectVar,
+			pluginId,
+			provider: params.provider,
+			purpose: params.purpose,
+			testCases: await fetchRemoteTestCases(category, params.purpose, params.injectVar, params.n, params.config ?? {})
+		});
 		const computedModifiers = computeModifiersFromConfig(params.config);
 		return testCases.map((testCase) => ({
 			...testCase,
 			metadata: {
 				...testCase.metadata,
-				pluginId: getShortPluginId(category),
+				pluginId,
 				pluginConfig: {
 					...params.config,
 					modifiers: computedModifiers
@@ -6743,19 +6852,27 @@ function createRemotePlugin(key, validate) {
 	return {
 		key,
 		validate,
-		action: async ({ purpose, injectVar, n, config }) => {
+		action: async ({ provider, purpose, injectVar, n, config }) => {
 			const configWithDefaults = applyDefaultRemotePluginConfig(key, config);
 			if (neverGenerateRemote()) {
-				logger.error(`${key} plugin requires remote generation to be enabled`);
+				logger.error(getRemoteGenerationExplicitlyDisabledError(`${key} plugin`));
 				return [];
 			}
-			const testCases = await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {});
+			const pluginId = getShortPluginId(key);
+			const testCases = await materializeRemoteTestCaseInputs({
+				config: configWithDefaults ?? {},
+				injectVar,
+				pluginId,
+				provider,
+				purpose,
+				testCases: await fetchRemoteTestCases(key, purpose, injectVar, n, configWithDefaults ?? {})
+			});
 			const computedModifiers = computeModifiersFromConfig(configWithDefaults);
 			const testsWithMetadata = testCases.map((testCase) => ({
 				...testCase,
 				metadata: {
 					...testCase.metadata,
-					pluginId: getShortPluginId(key),
+					pluginId,
 					pluginConfig: {
 						...configWithDefaults,
 						modifiers: computedModifiers
@@ -6825,6 +6942,37 @@ function getPolicyText(metadata) {
 		return typeof policyObject.text === "string" && policyObject.text.length > 0 ? policyObject.text : void 0;
 	}
 }
+async function rematerializeStrategyInputVars(testCase, injectVar, provider, purpose, materializationIndex) {
+	const inputs = testCase.metadata?.pluginConfig?.inputs;
+	const inputMaterialization = testCase.metadata?.inputMaterialization;
+	if (!inputs || Object.keys(inputs).length === 0 || !testCase.vars?.[injectVar]) return {
+		inputMaterialization,
+		vars: testCase.vars
+	};
+	try {
+		const materializedVars = await extractMaterializedVariablesFromJsonWithMetadata(JSON.parse(String(testCase.vars[injectVar])), inputs, {
+			materializationIndex,
+			pluginId: String(testCase.metadata?.pluginId || "unknown-plugin"),
+			provider,
+			purpose
+		});
+		return {
+			inputMaterialization: materializedVars.metadata ? {
+				...inputMaterialization,
+				...materializedVars.metadata
+			} : inputMaterialization,
+			vars: {
+				...testCase.vars,
+				...materializedVars.vars
+			}
+		};
+	} catch {
+		return {
+			inputMaterialization,
+			vars: testCase.vars
+		};
+	}
+}
 /**
 * Gets the severity level for a plugin based on its ID and configuration.
 * @param pluginId - The ID of the plugin.
@@ -6966,6 +7114,7 @@ const categories = {
 	foundation: FOUNDATION_PLUGINS,
 	harmful: Object.keys(HARM_PLUGINS),
 	"coding-agent:core": CODING_AGENT_CORE_PLUGINS,
+	"coding-agent:all": CODING_AGENT_PLUGINS,
 	bias: BIAS_PLUGINS,
 	pii: PII_PLUGINS,
 	medical: MEDICAL_PLUGINS,
@@ -7060,7 +7209,7 @@ function addLanguageToPluginMetadata(test, lang, plugin, maxCharsPerMessage, tes
 * @param injectVar - The variable to inject.
 * @returns An array of new test cases generated by strategies.
 */
-async function applyStrategies(testCases, strategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
+async function applyStrategies(testCases, strategies, injectVar, provider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage) {
 	const newTestCases = [];
 	const strategyResults = {};
 	for (const strategy of strategies) {
@@ -7116,14 +7265,8 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
 			}
 		}
 		resultTestCases = filterOversizedTestCases(resultTestCases, injectVar, `Strategy ${strategy.id}`, maxCharsPerMessage);
-		newTestCases.push(...resultTestCases.map((t) => {
-			const inputs = t?.metadata?.pluginConfig?.inputs;
-			let updatedVars = t.vars;
-			if (inputs && Object.keys(inputs).length > 0 && t.vars?.[injectVar]) try {
-				const parsed = JSON.parse(String(t.vars[injectVar]));
-				updatedVars = { ...t.vars };
-				Object.assign(updatedVars, extractVariablesFromJson(parsed, inputs));
-			} catch {}
+		newTestCases.push(...await Promise.all(resultTestCases.map(async (t, materializationIndex) => {
+			const { inputMaterialization, vars } = await rematerializeStrategyInputVars(t, injectVar, provider, purpose, materializationIndex);
 			const strategyConfig = {
 				...strategy.config || {},
 				...maxCharsPerMessage ? { maxCharsPerMessage } : {},
@@ -7131,16 +7274,17 @@ async function applyStrategies(testCases, strategies, injectVar, excludeTargetOu
 			};
 			return {
 				...t,
-				vars: updatedVars,
+				vars,
 				metadata: {
 					...t?.metadata || {},
 					...strategy.id !== "retry" && { strategyId: t?.metadata?.strategyId || strategy.id },
 					...t?.metadata?.pluginId && { pluginId: t.metadata.pluginId },
 					...t?.metadata?.pluginConfig && { pluginConfig: t.metadata.pluginConfig },
+					...inputMaterialization && { inputMaterialization },
 					...Object.keys(strategyConfig).length > 0 && { strategyConfig }
 				}
 			};
-		}));
+		})));
 		const displayId = strategy.id === "layer" && Array.isArray(strategy.config?.steps) ? `layer(${strategy.config.steps.map((st) => typeof st === "string" ? st : st.id).join("→")})` : strategy.id;
 		const languagesInResults = new Set(strategyTestCases.map((t) => getLanguageForTestCase(t)).filter((lang) => lang !== void 0));
 		const applyNumTestsCap = (calculatedRequested) => {
@@ -7595,7 +7739,7 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
 			targetIds,
 			...retryStrategy.config
 		};
-		const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, void 0, maxCharsPerMessage);
+		const { testCases: retryTestCases, strategyResults: retryResults } = await applyStrategies(pluginTestCases, [retryStrategy], injectVar, redteamProvider, purpose, void 0, maxCharsPerMessage);
 		pluginTestCases.push(...retryTestCases);
 		Object.assign(strategyResults, retryResults);
 		if (showProgressBar) progressBar?.increment(retryTestCases.length);
@@ -7603,7 +7747,7 @@ async function synthesize$1({ abortSignal, delay, entities: entitiesOverride, in
 	checkAbort();
 	const nonBasicStrategies = strategies.filter((s) => !["basic", "retry"].includes(s.id));
 	if (showProgressBar && nonBasicStrategies.length > 0) progressBar?.update({ task: "Applying strategies" });
-	const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
+	const { testCases: strategyTestCases, strategyResults: otherStrategyResults } = await applyStrategies(pluginTestCases, nonBasicStrategies, injectVar, redteamProvider, purpose, excludeTargetOutputFromAgenticAttackGeneration, maxCharsPerMessage);
 	Object.assign(strategyResults, otherStrategyResults);
 	if (showProgressBar && strategyTestCases.length > 0) progressBar?.increment(strategyTestCases.length);
 	const finalTestCases = [...includeBasicTests ? pluginTestCases : [], ...strategyTestCases];
@@ -8358,65 +8502,175 @@ async function doRedteamRun(options) {
 	return evalResult;
 }
 //#endregion
+//#region src/types/transform.ts
+/** Runtime type guard for `TransformFunction` values. */
+function isTransformFunction(value) {
+	return typeof value === "function";
+}
+//#endregion
 //#region src/index.ts
+/**
+* Shallow-clone a test case so the caller can swap in resolved ApiProvider
+* instances on `options.provider` / `assert[].provider` without leaking those
+* mutations back to the input. The input may alias the unified config written
+* to the Eval record, and a live SDK client (e.g. Bedrock's BedrockRuntime,
+* Anthropic's client) holds circular references that break drizzle's JSON
+* serialization on `evalRecord.save()`. Fixes #8687.
+*
+* Detaches only `options` and `assert[]`. Other reference fields (`provider`,
+* `vars`, `metadata`, `providerOutput`) remain aliased — callers must reassign
+* those by reference rather than mutating in place. `assert-set` children are
+* not deep-cloned because the resolve loop skips `assert-set`; if that ever
+* changes, extend this helper.
+*/
+function cloneTestForResolve(test) {
+	const cloned = { ...test };
+	if (test.options) cloned.options = { ...test.options };
+	if (test.assert) cloned.assert = test.assert.map((assertion) => ({ ...assertion }));
+	return cloned;
+}
+function toSerializableProviderRef(provider) {
+	if (isApiProvider(provider)) return sanitizeProvider(provider);
+	if (Array.isArray(provider)) return provider.map(toSerializableProviderRef);
+	return provider;
+}
+function isRecord(value) {
+	return Boolean(value && typeof value === "object" && !Array.isArray(value));
+}
+function withSerializableProvider(record) {
+	if (!isApiProvider(record.provider)) return record;
+	return {
+		...record,
+		provider: sanitizeProvider(record.provider)
+	};
+}
+/**
+* Function-valued transforms are first-class at runtime but are silently dropped
+* by `JSON.stringify`. Persisted eval configs (drizzle-stored) must never retain
+* a function reference, so replace every `transform`-like field with a
+* `[inline function]: name` marker. Non-function values pass through unchanged.
+*
+* `droppedRef.value` is flipped to `true` the first time a function is replaced
+* so the caller can emit a single warning instead of logging per field.
+*/
+function replaceFunctionTransforms(record, droppedRef) {
+	let result;
+	for (const key of TRANSFORM_KEYS) {
+		const value = record[key];
+		if (!isTransformFunction(value)) continue;
+		if (!result) result = { ...record };
+		result[key] = value.name ? `${INLINE_FUNCTION_LABEL}: ${value.name}` : INLINE_FUNCTION_LABEL;
+		droppedRef.value = true;
+	}
+	return result ?? record;
+}
+function toSerializableAssertion(assertion, droppedRef) {
+	if (!isRecord(assertion)) return assertion;
+	let sanitizedAssertion = withSerializableProvider(assertion);
+	sanitizedAssertion = replaceFunctionTransforms(sanitizedAssertion, droppedRef);
+	if (Array.isArray(assertion.assert)) sanitizedAssertion = {
+		...sanitizedAssertion,
+		assert: assertion.assert.map((a) => toSerializableAssertion(a, droppedRef))
+	};
+	return sanitizedAssertion;
+}
+function toSerializableTestCase(test, droppedRef) {
+	if (!isRecord(test)) return test;
+	let sanitizedTest = withSerializableProvider(test);
+	if (isRecord(test.options)) {
+		let options = withSerializableProvider(test.options);
+		options = replaceFunctionTransforms(options, droppedRef);
+		if (options !== test.options) sanitizedTest = {
+			...sanitizedTest,
+			options
+		};
+	}
+	if (Array.isArray(test.assert)) sanitizedTest = {
+		...sanitizedTest,
+		assert: test.assert.map((a) => toSerializableAssertion(a, droppedRef))
+	};
+	return sanitizedTest;
+}
+function toSerializableScenario(scenario, droppedRef) {
+	if (!isRecord(scenario)) return scenario;
+	if (!Array.isArray(scenario.tests)) return scenario;
+	return {
+		...scenario,
+		tests: scenario.tests.map((t) => toSerializableTestCase(t, droppedRef))
+	};
+}
+function createSerializableUnifiedConfig(testSuite, prompts) {
+	const droppedRef = { value: false };
+	const config = {
+		...testSuite,
+		providers: toSerializableProviderRef(testSuite.providers),
+		defaultTest: toSerializableTestCase(testSuite.defaultTest, droppedRef),
+		tests: Array.isArray(testSuite.tests) ? testSuite.tests.map((t) => toSerializableTestCase(t, droppedRef)) : testSuite.tests,
+		scenarios: Array.isArray(testSuite.scenarios) ? testSuite.scenarios.map((s) => toSerializableScenario(s, droppedRef)) : testSuite.scenarios,
+		prompts
+	};
+	if (droppedRef.value && testSuite.writeLatestResults) logger.warn("Function-valued transform(s) in testSuite were replaced with \"[inline function]\" markers in the persisted config. Re-running the saved eval will not invoke them; use string expressions or file:// references if you need the config to round-trip.");
+	return config;
+}
 async function evaluate(testSuite, options = {}) {
-	if (testSuite.writeLatestResults) await runDbMigrations();
-	const loadedProviders = await loadApiProviders(testSuite.providers, { env: testSuite.env });
+	const { author: suiteAuthor, ...testSuiteConfig } = testSuite;
+	if (testSuiteConfig.writeLatestResults) await runDbMigrations();
+	const loadedProviders = await loadApiProviders(testSuiteConfig.providers, { env: testSuiteConfig.env });
 	const providerMap = {};
 	for (const p of loadedProviders) {
 		providerMap[p.id()] = p;
 		if (p.label) providerMap[p.label] = p;
 	}
-	let resolvedDefaultTest = testSuite.defaultTest;
-	if (typeof testSuite.defaultTest === "string" && testSuite.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuite.defaultTest);
+	let resolvedDefaultTest = testSuiteConfig.defaultTest;
+	if (typeof testSuiteConfig.defaultTest === "string" && testSuiteConfig.defaultTest.startsWith("file://")) resolvedDefaultTest = await maybeLoadFromExternalFile(testSuiteConfig.defaultTest);
 	const constructedTestSuite = {
-		...testSuite,
+		...testSuiteConfig,
 		defaultTest: resolvedDefaultTest,
-		scenarios: testSuite.scenarios,
+		scenarios: testSuiteConfig.scenarios,
 		providers: loadedProviders,
-		tests: await readTests(testSuite.tests),
-		nunjucksFilters: await readFilters(testSuite.nunjucksFilters || {}),
-		prompts: await processPrompts(testSuite.prompts)
+		tests: await readTests(testSuiteConfig.tests),
+		nunjucksFilters: await readFilters(testSuiteConfig.nunjucksFilters || {}),
+		prompts: await processPrompts(testSuiteConfig.prompts)
 	};
-	if (typeof constructedTestSuite.defaultTest === "object") {
-		if (constructedTestSuite.defaultTest?.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
-			env: testSuite.env,
+	if (typeof constructedTestSuite.defaultTest === "object" && constructedTestSuite.defaultTest) {
+		constructedTestSuite.defaultTest = cloneTestForResolve(constructedTestSuite.defaultTest);
+		if (constructedTestSuite.defaultTest.provider && !isApiProvider(constructedTestSuite.defaultTest.provider)) constructedTestSuite.defaultTest.provider = await resolveProvider(constructedTestSuite.defaultTest.provider, providerMap, {
+			env: testSuiteConfig.env,
 			basePath: state.basePath
 		});
-		if (constructedTestSuite.defaultTest?.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
-			env: testSuite.env,
+		if (constructedTestSuite.defaultTest.options?.provider && !isApiProvider(constructedTestSuite.defaultTest.options.provider)) constructedTestSuite.defaultTest.options.provider = await resolveProvider(constructedTestSuite.defaultTest.options.provider, providerMap, {
+			env: testSuiteConfig.env,
 			basePath: state.basePath
 		});
 	}
-	for (const test of constructedTestSuite.tests || []) {
+	constructedTestSuite.tests = (constructedTestSuite.tests || []).map(cloneTestForResolve);
+	for (const test of constructedTestSuite.tests) {
 		if (test.options?.provider && !isApiProvider(test.options.provider)) test.options.provider = await resolveProvider(test.options.provider, providerMap, {
-			env: testSuite.env,
+			env: testSuiteConfig.env,
 			basePath: state.basePath
 		});
-		if (test.assert) for (const assertion of test.assert) {
+		for (const assertion of test.assert || []) {
 			if (assertion.type === "assert-set" || typeof assertion.provider === "function") continue;
 			if (assertion.provider && !isApiProvider(assertion.provider)) assertion.provider = await resolveProvider(assertion.provider, providerMap, {
-				env: testSuite.env,
+				env: testSuiteConfig.env,
 				basePath: state.basePath
 			});
 		}
 	}
 	if (options.cache === false) disableCache();
-	const parsedProviderPromptMap = readProviderPromptMap(testSuite, constructedTestSuite.prompts);
-	const unifiedConfig = {
-		...testSuite,
-		prompts: constructedTestSuite.prompts
-	};
-	const evalRecord = testSuite.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts) : new Eval(unifiedConfig);
+	const parsedProviderPromptMap = readProviderPromptMap(testSuiteConfig, constructedTestSuite.prompts);
+	const unifiedConfig = createSerializableUnifiedConfig(testSuiteConfig, constructedTestSuite.prompts);
+	const author = getAuthor(suiteAuthor);
+	const evalRecord = testSuiteConfig.writeLatestResults ? await Eval.create(unifiedConfig, constructedTestSuite.prompts, { author }) : new Eval(unifiedConfig, { author });
 	const ret = await evaluate$1({
 		...constructedTestSuite,
 		providerPromptMap: parsedProviderPromptMap
 	}, evalRecord, {
 		eventSource: "library",
-		isRedteam: Boolean(testSuite.redteam),
+		isRedteam: Boolean(testSuiteConfig.redteam),
 		...options
 	});
-	if (testSuite.writeLatestResults && testSuite.sharing) if (isSharingEnabled(ret)) try {
+	if (testSuiteConfig.writeLatestResults && testSuiteConfig.sharing) if (isSharingEnabled(ret)) try {
 		const shareableUrl = await createShareableUrl(ret, { silent: true });
 		if (shareableUrl) {
 			ret.shareableUrl = shareableUrl;
@@ -8427,9 +8681,9 @@ async function evaluate(testSuite, options = {}) {
 		logger.warn(`Failed to create shareable URL: ${error}`);
 	}
 	else logger.debug("Sharing requested but not enabled (check cloud config or sharing settings)");
-	if (testSuite.outputPath) {
-		if (typeof testSuite.outputPath === "string") await writeOutput(testSuite.outputPath, evalRecord, null);
-		else if (Array.isArray(testSuite.outputPath)) await writeMultipleOutputs(testSuite.outputPath, evalRecord, null);
+	if (testSuiteConfig.outputPath) {
+		if (typeof testSuiteConfig.outputPath === "string") await writeOutput(testSuiteConfig.outputPath, evalRecord, null);
+		else if (Array.isArray(testSuiteConfig.outputPath)) await writeMultipleOutputs(testSuiteConfig.outputPath, evalRecord, null);
 	}
 	return ret;
 }
@@ -8658,6 +8912,58 @@ function setDownloadHeaders(res, fileName, contentType) {
 //#region src/server/routes/eval.ts
 const evalRouter = Router();
 const evalJobs = /* @__PURE__ */ new Map();
+function sendEvalTableResponse(res, evalId, responsePayload) {
+	try {
+		res.json(responsePayload);
+	} catch (error) {
+		if (!(error instanceof RangeError)) throw error;
+		logger.warn("[GET /:id/table] Response too large, stripping per-cell prompts by size", { evalId });
+		const promptLocations = getEvalTableOutputPromptLocationsBySize(responsePayload);
+		if (promptLocations.length === 0) {
+			logger.error("[GET /:id/table] Response too large and has no prompts to strip", { evalId });
+			res.status(413).json({ error: "Eval too large to display. Try reducing the page size." });
+			return;
+		}
+		const tryStringifyWithStrippedPrompts = (promptCountToStrip) => {
+			const responseWithoutPrompts = getEvalTablePromptStrippedPayload(responsePayload, promptLocations, promptCountToStrip);
+			try {
+				const responseBody = JSON.stringify(responseWithoutPrompts);
+				invariant(typeof responseBody === "string", "Eval table response must serialize to JSON");
+				return responseBody;
+			} catch (retryError) {
+				if (!(retryError instanceof RangeError)) throw retryError;
+				return null;
+			}
+		};
+		let lowerBound = 0;
+		let upperBound = 1;
+		let responseBody = null;
+		while (upperBound < promptLocations.length) {
+			responseBody = tryStringifyWithStrippedPrompts(upperBound);
+			if (responseBody) break;
+			lowerBound = upperBound;
+			upperBound *= 2;
+		}
+		if (!responseBody) {
+			upperBound = promptLocations.length;
+			responseBody = tryStringifyWithStrippedPrompts(upperBound);
+		}
+		if (responseBody) {
+			while (upperBound - lowerBound > 1) {
+				const midPoint = lowerBound + Math.floor((upperBound - lowerBound) / 2);
+				const midpointResponseBody = tryStringifyWithStrippedPrompts(midPoint);
+				if (midpointResponseBody) {
+					upperBound = midPoint;
+					responseBody = midpointResponseBody;
+				} else lowerBound = midPoint;
+			}
+			res.type("json").send(responseBody);
+			return;
+		}
+		logger.error("[GET /:id/table] Response still too large after stripping prompts", { evalId });
+		res.status(413).json({ error: "Eval too large to display. Try reducing the page size." });
+	}
+}
 evalRouter.post("/job", (req, res) => {
 	const result = EvalSchemas.CreateJob.Request.safeParse(req.body);
 	if (!result.success) {
@@ -8698,9 +9004,10 @@ evalRouter.post("/job", (req, res) => {
 		job.evalId = evalResult.id;
 		console.log(`[${id}] Complete`);
 	}).catch((error) => {
-		logger.error(dedent`Failed to eval tests:
-        Error: ${error}
-        Body: ${JSON.stringify(req.body, null, 2)}`);
+		logger.error("Failed to eval tests", {
+			error,
+			body: sanitizeObject(testSuite, { context: "request body" })
+		});
 		const job = evalJobs.get(id);
 		invariant(job, "Job not found");
 		job.status = "error";
@@ -8897,7 +9204,7 @@ evalRouter.get("/:id/table", async (req, res) => {
 			evalId: id
 		});
 	}
-	res.json({
+	sendEvalTableResponse(res, id, {
 		table: returnTable,
 		totalCount: table.totalCount,
 		filteredCount: table.filteredCount,
@@ -9063,43 +9370,53 @@ evalRouter.post("/:evalId/results/:id/rating", async (req, res) => {
 		res.status(400).json({ error: z.prettifyError(bodyResult.error) });
 		return;
 	}
-	const { id } = paramsResult.data;
-	const gradingResult = bodyResult.data;
-	const result = await EvalResult.findById(id);
-	invariant(result, "Result not found");
-	const eval_ = await Eval.findById(result.evalId);
-	invariant(eval_, "Eval not found");
-	const hasExistingManualOverride = Boolean(result.gradingResult?.componentResults?.some((r) => r.assertion?.type === HUMAN_ASSERTION_TYPE));
-	const successChanged = result.success !== gradingResult.pass;
-	const scoreChange = gradingResult.score - result.score;
-	result.gradingResult = gradingResult;
-	result.success = gradingResult.pass;
-	result.score = gradingResult.score;
-	const prompt = eval_.prompts[result.promptIdx];
-	invariant(prompt, "Prompt not found");
-	if (!prompt.metrics) {
-		logger.error(`[${id}] This is not normal. Prompt metrics not found for prompt ${result.promptIdx}`);
-		res.status(400).json({ error: "Prompt metrics not found" });
-		return;
+	try {
+		const { evalId, id } = paramsResult.data;
+		const gradingResult = bodyResult.data;
+		const result = await EvalResult.findById(id);
+		if (!result || result.evalId !== evalId) {
+			res.status(404).json({ error: "Result not found" });
+			return;
+		}
+		const eval_ = await Eval.findById(evalId);
+		if (!eval_) {
+			res.status(404).json({ error: "Eval not found" });
+			return;
+		}
+		const hasExistingManualOverride = Boolean(result.gradingResult?.componentResults?.some((r) => r.assertion?.type === HUMAN_ASSERTION_TYPE));
+		const successChanged = result.success !== gradingResult.pass;
+		const scoreChange = gradingResult.score - result.score;
+		result.gradingResult = gradingResult;
+		result.success = gradingResult.pass;
+		result.score = gradingResult.score;
+		const prompt = eval_.prompts[result.promptIdx];
+		invariant(prompt, "Prompt not found");
+		if (!prompt.metrics) {
+			logger.error(`[${id}] This is not normal. Prompt metrics not found for prompt ${result.promptIdx}`);
+			res.status(400).json({ error: "Prompt metrics not found" });
+			return;
+		}
+		if (successChanged) if (result.success) {
+			prompt.metrics.testPassCount += 1;
+			prompt.metrics.testFailCount -= 1;
+			prompt.metrics.assertPassCount += 1;
+			prompt.metrics.score += scoreChange;
+			if (hasExistingManualOverride) prompt.metrics.assertFailCount -= 1;
+		} else {
+			prompt.metrics.testPassCount -= 1;
+			prompt.metrics.testFailCount += 1;
+			prompt.metrics.assertFailCount += 1;
+			prompt.metrics.score += scoreChange;
+			if (hasExistingManualOverride) prompt.metrics.assertPassCount -= 1;
+		}
+		else if (!hasExistingManualOverride) if (result.success) prompt.metrics.assertPassCount += 1;
+		else prompt.metrics.assertFailCount += 1;
+		await eval_.save();
+		await result.save();
+		res.json(result);
+	} catch (error) {
+		sendError(res, 500, "Failed to submit rating", error);
 	}
-	if (successChanged) if (result.success) {
-		prompt.metrics.testPassCount += 1;
-		prompt.metrics.testFailCount -= 1;
-		prompt.metrics.assertPassCount += 1;
-		prompt.metrics.score += scoreChange;
-		if (hasExistingManualOverride) prompt.metrics.assertFailCount -= 1;
-	} else {
-		prompt.metrics.testPassCount -= 1;
-		prompt.metrics.testFailCount += 1;
-		prompt.metrics.assertFailCount += 1;
-		prompt.metrics.score += scoreChange;
-		if (hasExistingManualOverride) prompt.metrics.assertPassCount -= 1;
-	}
-	else if (!hasExistingManualOverride) if (result.success) prompt.metrics.assertPassCount += 1;
-	else prompt.metrics.assertFailCount += 1;
-	await eval_.save();
-	await result.save();
-	res.json(result);
 });
 evalRouter.post("/", async (req, res) => {
 	const bodyResult = EvalSchemas.Save.Request.safeParse(req.body);
@@ -9133,9 +9450,10 @@ evalRouter.post("/", async (req, res) => {
 			res.json(EvalSchemas.Save.Response.parse({ id: eval_.id }));
 		}
 	} catch (error) {
-		logger.error(dedent`Failed to write eval to database:
-      Error: ${error}
-      Body: ${JSON.stringify(body, null, 2)}`);
+		logger.error("Failed to write eval to database", {
+			error,
+			body: sanitizeObject(body, { context: "request body" })
+		});
 		res.status(500).json({ error: "Failed to write eval to database" });
 	}
 });
@@ -9353,7 +9671,7 @@ async function checkForUpdates() {
 	} catch {
 		return false;
 	}
-	if (semverGt(latestVersion, "0.121.4")) {
+	if (semverGt(latestVersion, "0.121.7")) {
 		const border = "=".repeat(TERMINAL_MAX_WIDTH);
 		logger.info(`\n${border}
 ${chalk.yellow("⚠️")} The current version of promptfoo ${chalk.yellow(VERSION)} is lower than the latest available version ${chalk.green(latestVersion)}.
@@ -9512,6 +9830,9 @@ const ModelAuditCliOptionsSchema = z.object({
 	dryRun: z.boolean().optional(),
 	cache: z.boolean().optional(),
 	stream: z.boolean().optional(),
+	scanners: z.array(z.string()).optional(),
+	excludeScanner: z.array(z.string()).optional(),
+	listScanners: z.boolean().optional(),
 	share: z.boolean().optional(),
 	noShare: z.boolean().optional()
 });
@@ -9599,6 +9920,18 @@ const CLI_ARG_MAP = {
 	stream: {
 		flag: "--stream",
 		type: "boolean"
+	},
+	scanners: {
+		flag: "--scanners",
+		type: "array"
+	},
+	excludeScanner: {
+		flag: "--exclude-scanner",
+		type: "array"
+	},
+	listScanners: {
+		flag: "--list-scanners",
+		type: "boolean"
 	}
 };
 /**
@@ -9767,6 +10100,62 @@ function spawnModelAudit(args, options) {
 		});
 	});
 }
+function collectRepeatableOption(value, previous = []) {
+	return [...previous, value];
+}
+function hasScannerSelectionOptions(options) {
+	return Boolean(options.scanners?.length || options.excludeScanner?.length);
+}
+function hasScannerSelectionValue(value) {
+	if (Array.isArray(value)) return value.length > 0;
+	return typeof value === "string" && value.trim().length > 0;
+}
+function hasPersistedScannerSelection(metadata) {
+	const options = metadata?.options;
+	if (!options || typeof options !== "object" || Array.isArray(options)) return false;
+	return hasScannerSelectionValue(options.scanners) || hasScannerSelectionValue(options.excludeScanner);
+}
+/**
+* Parse CLI options through Zod, logging validation errors to the CLI.
+* Returns null when validation fails (and sets process.exitCode to 1).
+*/
+function buildCliArgs(paths, cliOptions) {
+	try {
+		return parseModelAuditArgs(paths, cliOptions);
+	} catch (error) {
+		if (error instanceof z.ZodError) {
+			logger.error(`Invalid model audit options provided:\n${z.prettifyError(error)}`);
+			process.exitCode = 1;
+			return null;
+		}
+		throw error;
+	}
+}
+/**
+* Run modelaudit with inherited stdio and propagate its exit code.
+*
+* `treatExitOneAsIssues=true` suppresses the error log for exit code 1, which
+* modelaudit uses to mean "scan completed, issues found" — callers that expect
+* findings (like the main scan flow) should set this, while list/help flows
+* (where a non-zero exit is always unexpected) should leave it false.
+*/
+async function runPassthroughModelAudit(args, env, treatExitOneAsIssues = false) {
+	try {
+		const spawnResult = await spawnModelAudit(args, {
+			captureOutput: false,
+			env
+		});
+		const isIssuesExit = treatExitOneAsIssues && spawnResult.code === 1;
+		if (spawnResult.code !== null && spawnResult.code !== 0 && !isIssuesExit) logger.error(`Model scan process exited with code ${spawnResult.code}`);
+		process.exitCode = spawnResult.code || 0;
+	} catch (error) {
+		const message = error instanceof Error ? error.message : String(error);
+		logger.error(`Failed to start modelaudit: ${message}`);
+		logger.info("Make sure modelaudit is installed and available in your PATH.");
+		logger.info("Install it using: pip install modelaudit");
+		process.exitCode = 1;
+	}
+}
 /**
 * Check for existing scan and determine if re-scan is needed.
 * Returns the existing audit if found and re-scan should happen.
@@ -9789,6 +10178,20 @@ async function checkExistingScan(paths, options, currentScannerVersion) {
 			shouldSkip: false,
 			existingAudit: null
 		};
+		if (hasScannerSelectionOptions(options)) {
+			logger.debug("Re-scanning with scanner selection options");
+			return {
+				shouldSkip: false,
+				existingAudit: existing
+			};
+		}
+		if (hasPersistedScannerSelection(existing.metadata)) {
+			logger.debug("Re-scanning because cached revision used scanner selection options");
+			return {
+				shouldSkip: false,
+				existingAudit: existing
+			};
+		}
 		if (options.force) {
 			logger.debug(`Re-scanning (--force): ${modelId}`);
 			return {
@@ -9920,7 +10323,9 @@ async function saveAuditRecord(paths, results, options, currentScannerVersion, e
 			cache: options.cache,
 			quiet: options.quiet,
 			progress: options.progress,
-			stream: options.stream
+			stream: options.stream,
+			scanners: options.scanners,
+			excludeScanner: options.excludeScanner
 		}
 	};
 	if (existingAudit) {
@@ -10052,8 +10457,8 @@ async function processScanResultsFromStdout(spawnResult, paths, options, current
 	return processJsonResults(jsonOutput, spawnResult.code || 0, paths, options, currentScannerVersion, existingAudit);
 }
 function modelScanCommand(program) {
-	program.command("scan-model").description("Scan model files for security and quality issues").argument("<paths...>", "Model files or directories to scan").option("-b, --blacklist <patterns...>", "Additional blacklist patterns to check against model names").option("-o, --output <path>", "Output file path (prints to stdout if not specified)").option("-f, --format <format>", "Output format (text, json, sarif)", "text").option("--sbom <path>", "Write CycloneDX SBOM to the specified file").option("--no-write", "Do not write results to database").option("--name <name>", "Name for the audit (when saving to database)").option("-t, --timeout <seconds>", "Scan timeout in seconds", "300").option("--max-size <size>", "Override auto-detected size limits (e.g., 10GB, 500MB)").option("--strict", "Strict mode: fail on warnings, scan all file types, strict license validation").option("--dry-run", "Preview what would be scanned/downloaded without actually doing it").option("--no-cache", "Force disable caching (overrides smart detection)").option("--quiet", "Silence detection messages").option("--progress", "Force enable progress reporting (auto-detected by default)").option("--stream", "Scan and delete downloaded files immediately after scan").option("-v, --verbose", "Enable verbose output").option("--force", "Force scan even if model was already scanned").option("--share", "Share the model audit results").option("--no-share", "Do not share the model audit results").action(async (paths, options) => {
-		if (!paths || paths.length === 0) {
+	program.command("scan-model").description("Scan model files for security and quality issues").argument("[paths...]", "Model files or directories to scan").option("-b, --blacklist <patterns...>", "Additional blacklist patterns to check against model names").option("-o, --output <path>", "Output file path (prints to stdout if not specified)").option("-f, --format <format>", "Output format (text, json, sarif)", "text").option("--sbom <path>", "Write CycloneDX SBOM to the specified file").option("--no-write", "Do not write results to database").option("--name <name>", "Name for the audit (when saving to database)").option("-t, --timeout <seconds>", "Scan timeout in seconds", "300").option("--max-size <size>", "Override auto-detected size limits (e.g., 10GB, 500MB)").option("--strict", "Strict mode: fail on warnings, scan all file types, strict license validation").option("--dry-run", "Preview what would be scanned/downloaded without actually doing it").option("--no-cache", "Force disable caching (overrides smart detection)").option("--quiet", "Silence detection messages").option("--progress", "Force enable progress reporting (auto-detected by default)").option("--stream", "Scan and delete downloaded files immediately after scan").option("--scanners <scanner>", "Only run selected ModelAudit scanners (IDs/classes; comma-separated or repeated)", collectRepeatableOption).option("--exclude-scanner <scanner>", "Exclude a ModelAudit scanner from the active set (comma-separated or repeated)", collectRepeatableOption).option("--list-scanners", "List registered ModelAudit scanners and exit").option("-v, --verbose", "Enable verbose output").option("--force", "Force scan even if model was already scanned").option("--share", "Share the model audit results").option("--no-share", "Do not share the model audit results").action(async (paths, options) => {
+		if (!options.listScanners && (!paths || paths.length === 0)) {
 			logger.error("No paths specified. Provide at least one model file or directory to scan.");
 			process.exitCode = 1;
 			return;
@@ -10069,6 +10474,21 @@ function modelScanCommand(program) {
 		}
 		await checkModelAuditUpdates();
 		if (currentScannerVersion) logger.debug(`Using modelaudit version: ${currentScannerVersion}`);
+		const delegationEnv = {
+			...process.env,
+			PROMPTFOO_DELEGATED: "true"
+		};
+		if (options.listScanners) {
+			const parsed = buildCliArgs(paths || [], {
+				...options,
+				format: options.format || "text",
+				output: options.output,
+				timeout: void 0
+			});
+			if (!parsed) return;
+			await runPassthroughModelAudit(parsed.args, delegationEnv);
+			return;
+		}
 		const saveToDatabase = options.write === void 0 || options.write === true;
 		let existingAuditToUpdate = null;
 		if (saveToDatabase) {
@@ -10080,33 +10500,22 @@ function modelScanCommand(program) {
 			existingAuditToUpdate = existingAudit;
 		}
 		const outputFormat = saveToDatabase ? "json" : options.format || "text";
-		const cliOptions = {
+		const parsed = buildCliArgs(paths, {
 			...options,
 			format: outputFormat,
 			output: options.output && !saveToDatabase ? options.output : void 0,
 			timeout: options.timeout ? parseInt(options.timeout, 10) : void 0
-		};
-		let args;
-		try {
-			const result = parseModelAuditArgs(paths, cliOptions);
-			args = result.args;
-			if (result.unsupportedOptions.length > 0) logger.warn(`Unsupported options detected: ${result.unsupportedOptions.join(", ")}`);
-		} catch (error) {
-			if (error instanceof z.ZodError) {
-				logger.error("Invalid model audit options provided:");
-				for (const err of error.issues) logger.error(`  - ${err.path.join(".")}: ${err.message}`);
-				process.exitCode = 1;
-				return;
-			}
-			throw error;
+		});
+		if (!parsed) return;
+		const args = parsed.args;
+		if (parsed.unsupportedOptions.length > 0) logger.warn(`Unsupported options detected: ${parsed.unsupportedOptions.join(", ")}`);
+		if (saveToDatabase || outputFormat === "text") logger.info(`Running model scan on: ${paths.join(", ")}`);
+		if (!saveToDatabase) {
+			await runPassthroughModelAudit(args, delegationEnv, true);
+			return;
 		}
-		logger.info(`Running model scan on: ${paths.join(", ")}`);
-		const delegationEnv = {
-			...process.env,
-			PROMPTFOO_DELEGATED: "true"
-		};
 		try {
-			if (saveToDatabase) if (supportsCliUiWithOutput(currentScannerVersion)) {
+			if (supportsCliUiWithOutput(currentScannerVersion)) {
 				const tempOutputPath = createTempOutputPath();
 				args.push("--output", tempOutputPath);
 				let cleanedUp = false;
@@ -10140,14 +10549,6 @@ function modelScanCommand(program) {
 				});
 				process.exitCode = await processScanResultsFromStdout(spawnResult, paths, options, currentScannerVersion, existingAuditToUpdate);
 			}
-			else {
-				const spawnResult = await spawnModelAudit(args, {
-					captureOutput: false,
-					env: delegationEnv
-				});
-				if (spawnResult.code !== null && spawnResult.code !== 0 && spawnResult.code !== 1) logger.error(`Model scan process exited with code ${spawnResult.code}`);
-				process.exitCode = spawnResult.code || 0;
-			}
 		} catch (error) {
 			const message = error instanceof Error ? error.message : String(error);
 			logger.error(`Failed to start modelaudit: ${message}`);
@@ -10164,6 +10565,14 @@ const CheckInstalledResponseSchema = z.object({
 	version: z.string().nullable(),
 	cwd: z.string()
 });
+const ScannerInfoSchema = z.object({
+	id: z.string(),
+	class: z.string().optional().default(""),
+	description: z.string().optional().default(""),
+	extensions: z.array(z.string()).optional().default([]),
+	dependencies: z.array(z.string()).optional().default([])
+}).passthrough();
+const ListScannersResponseSchema = z.object({ scanners: z.array(ScannerInfoSchema) }).passthrough();
 const CheckPathRequestSchema = z.object({ path: z.string().trim().min(1, "No path provided") });
 const CheckPathResponseSchema = z.union([z.object({
 	exists: z.literal(false),
@@ -10195,6 +10604,8 @@ const ScanRequestSchema = z.object({
 		sbom: z.string().optional(),
 		output: z.string().optional(),
 		maxSize: z.string().optional(),
+		scanners: z.array(z.string()).optional(),
+		excludeScanner: z.array(z.string()).optional(),
 		persist: z.boolean().optional(),
 		name: z.string().optional(),
 		author: z.string().optional()
@@ -10205,8 +10616,13 @@ const ListScansQuerySchema = z.object({
 	offset: z.coerce.number().int().min(0).optional().default(0),
 	sort: z.enum([
 		"createdAt",
+		"failedChecks",
+		"hasErrors",
+		"id",
+		"modelPath",
 		"name",
-		"modelPath"
+		"passedChecks",
+		"totalChecks"
 	]).optional().default("createdAt"),
 	order: z.enum(["asc", "desc"]).optional().default("desc"),
 	search: z.string().optional()
@@ -10245,6 +10661,7 @@ const DeleteScanResponseSchema = z.object({
 });
 const ModelAuditSchemas = {
 	CheckInstalled: { Response: CheckInstalledResponseSchema },
+	ListScanners: { Response: ListScannersResponseSchema },
 	CheckPath: {
 		Request: CheckPathRequestSchema,
 		Response: CheckPathResponseSchema
@@ -10267,6 +10684,47 @@ const ModelAuditSchemas = {
 //#endregion
 //#region src/server/routes/modelAudit.ts
 const modelAuditRouter = Router();
+const LIST_SCANNERS_ARGS = parseModelAuditArgs([], {
+	listScanners: true,
+	format: "json"
+}).args;
+function getModelAuditDelegationEnv() {
+	return {
+		...process.env,
+		PROMPTFOO_DELEGATED: "true"
+	};
+}
+function spawnModelAuditCapture(args, options = {}) {
+	return new Promise((resolve, reject) => {
+		const child = spawn("modelaudit", args, { env: getModelAuditDelegationEnv() });
+		let stdout = "";
+		let stderr = "";
+		const onAbort = () => {
+			if (!child.killed) child.kill("SIGTERM");
+		};
+		if (options.signal?.aborted) onAbort();
+		else options.signal?.addEventListener("abort", onAbort, { once: true });
+		const cleanupAbort = () => options.signal?.removeEventListener("abort", onAbort);
+		child.stdout?.on("data", (data) => {
+			stdout += data.toString();
+		});
+		child.stderr?.on("data", (data) => {
+			stderr += data.toString();
+		});
+		child.on("error", (error) => {
+			cleanupAbort();
+			reject(error);
+		});
+		child.on("close", (code) => {
+			cleanupAbort();
+			resolve({
+				code,
+				stdout,
+				stderr
+			});
+		});
+	});
+}
 modelAuditRouter.get("/check-installed", async (_req, res) => {
 	try {
 		const { installed, version } = await checkModelAuditInstalled();
@@ -10283,6 +10741,34 @@ modelAuditRouter.get("/check-installed", async (_req, res) => {
 		}));
 	}
 });
+modelAuditRouter.get("/scanners", async (req, res) => {
+	const abortController = new AbortController();
+	const onClientClose = () => abortController.abort();
+	req.on("close", onClientClose);
+	try {
+		const { installed } = await checkModelAuditInstalled();
+		if (!installed) {
+			res.status(400).json({ error: "ModelAudit is not installed. Please install it using: pip install modelaudit" });
+			return;
+		}
+		const { code, stdout, stderr } = await spawnModelAuditCapture(LIST_SCANNERS_ARGS, { signal: abortController.signal });
+		if (abortController.signal.aborted) return;
+		if (code !== null && code !== 0) {
+			sendError(res, 500, "Failed to list ModelAudit scanners", {
+				code,
+				stderr
+			});
+			return;
+		}
+		const parsedOutput = JSON.parse(stdout);
+		res.json(ModelAuditSchemas.ListScanners.Response.parse(parsedOutput));
+	} catch (error) {
+		if (abortController.signal.aborted) return;
+		sendError(res, 500, "Failed to list ModelAudit scanners", error);
+	} finally {
+		req.removeListener("close", onClientClose);
+	}
+});
 modelAuditRouter.post("/check-path", async (req, res) => {
 	const bodyResult = ModelAuditSchemas.CheckPath.Request.safeParse(req.body);
 	if (!bodyResult.success) {
@@ -10353,11 +10839,12 @@ modelAuditRouter.post("/scan", async (req, res) => {
 			event: "model_scan",
 			pathCount: paths.length,
 			hasBlacklist: (options.blacklist?.length ?? 0) > 0,
+			hasScannerSelection: Boolean(options.scanners?.length || options.excludeScanner?.length),
 			timeout: options.timeout ?? 0,
 			verbose: options.verbose ?? false,
 			persist
 		});
-		const modelAudit = spawn("modelaudit", args);
+		const modelAudit = spawn("modelaudit", args, { env: getModelAuditDelegationEnv() });
 		let stdout = "";
 		let stderr = "";
 		let responded = false;
@@ -10543,9 +11030,20 @@ modelAuditRouter.post("/scan", async (req, res) => {
 							options: {
 								blacklist: options.blacklist,
 								timeout: options.timeout,
+								maxSize: options.maxSize,
 								maxFileSize: options.maxFileSize,
 								maxTotalSize: options.maxTotalSize,
-								verbose: options.verbose
+								verbose: options.verbose,
+								format: options.format,
+								strict: options.strict,
+								dryRun: options.dryRun,
+								cache: options.cache,
+								quiet: options.quiet,
+								progress: options.progress,
+								sbom: options.sbom,
+								output: options.output,
+								scanners: options.scanners,
+								excludeScanner: options.excludeScanner
 							}
 						}
 					})).id;
@@ -10701,6 +11199,38 @@ function normalizeTargetPurposeDiscoveryResult(result) {
 		tools: cleanTools(result.tools)
 	};
 }
+function extractStringField(value) {
+	if (typeof value !== "string") return;
+	return value.trim() || void 0;
+}
+async function getRemoteResponseErrorDetail(response) {
+	const rawText = (await response.text()).trim();
+	const fallback = rawText || response.statusText || "Unknown error";
+	if (!rawText) return fallback;
+	try {
+		const parsed = JSON.parse(rawText);
+		return extractStringField(parsed?.message) ?? extractStringField(parsed?.error) ?? fallback;
+	} catch {
+		return fallback;
+	}
+}
+const REMOTE_ERROR_HINTS = {
+	400: "This usually means your promptfoo client is out of date. Try `npm install -g promptfoo@latest` and rerun.",
+	401: "Check that you are logged in (`promptfoo auth login`) and that your account has access to target discovery.",
+	403: "Check that you are logged in (`promptfoo auth login`) and that your account has access to target discovery.",
+	404: "This usually means your promptfoo client is out of date. Try `npm install -g promptfoo@latest` and rerun.",
+	429: "You are being rate limited. Wait a moment and try again."
+};
+function getRemoteErrorHint(status) {
+	if (REMOTE_ERROR_HINTS[status]) return REMOTE_ERROR_HINTS[status];
+	if (status >= 500) return "The remote generation service may be temporarily unavailable. Retry in a few minutes or contact support if the issue persists.";
+}
+async function buildRemoteErrorFromResponse(response) {
+	const detail = await getRemoteResponseErrorDetail(response);
+	const hint = getRemoteErrorHint(response.status);
+	const base = `Remote server returned HTTP ${response.status}: ${detail}`;
+	return new Error(hint ? `${base}\n${hint}` : base);
+}
 /**
 * Queries Cloud for the purpose-discovery logic, sends each logic to the target,
 * and summarizes the results.
@@ -10750,11 +11280,7 @@ async function doTargetPurposeDiscovery(target, prompt, showProgress = true) {
 				email: getUserEmail()
 			}))
 		});
-		if (!response.ok) {
-			const error = await response.text();
-			logger.error(`${LOG_PREFIX} Error getting the next question from remote server: ${error}`);
-			continue;
-		}
+		if (!response.ok) throw await buildRemoteErrorFromResponse(response);
 		const responseData = await response.json();
 		const data = TargetPurposeDiscoveryTaskResponseSchema.parse(responseData);
 		logger.debug(`${LOG_PREFIX} Received response from remote server: ${JSON.stringify(data, null, 2)}`);
@@ -10971,7 +11497,7 @@ function determineEffectiveSessionSource({ provider, sessionConfig }) {
 async function testProviderConnectivity({ provider, prompt = "Hello World!", inputs }) {
 	const vars = {};
 	if (!provider?.config?.sessionParser) vars["sessionId"] = crypto.randomUUID();
-	if (inputs && typeof inputs === "object") for (const [varName, _description] of Object.entries(inputs)) vars[varName] = `test_${varName}`;
+	if (inputs && typeof inputs === "object") for (const [varName, definition] of Object.entries(inputs)) vars[varName] = createPlaceholderInputValue(varName, definition);
 	const testSuite = {
 		providers: [provider],
 		prompts: [{
@@ -11202,10 +11728,15 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
 			sessionConfig
 		});
 		const initialSessionId = effectiveSessionSource === "server" ? void 0 : crypto.randomUUID();
+		const materializeSessionPrompt = (prompt) => {
+			if (!mainInputVariable) return prompt;
+			const definition = inputs?.[mainInputVariable];
+			return definition ? createPlaceholderInputValue(mainInputVariable, definition, prompt) : prompt;
+		};
 		const inputVars = {};
-		if (inputs && typeof inputs === "object") for (const [varName, _description] of Object.entries(inputs)) {
+		if (inputs && typeof inputs === "object") for (const [varName, definition] of Object.entries(inputs)) {
 			if (varName === mainInputVariable) continue;
-			inputVars[varName] = `test_${varName}`;
+			inputVars[varName] = createPlaceholderInputValue(varName, definition);
 		}
 		const firstPrompt = "What can you help me with?";
 		const secondPrompt = "What was the last thing I asked you?";
@@ -11218,7 +11749,7 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
 			vars: {
 				...initialSessionId ? { sessionId: initialSessionId } : {},
 				...inputVars,
-				...mainInputVariable ? { [mainInputVariable]: firstPrompt } : {}
+				...mainInputVariable ? { [mainInputVariable]: materializeSessionPrompt(firstPrompt) } : {}
 			},
 			prompt: {
 				raw: firstPrompt,
@@ -11265,7 +11796,7 @@ async function testProviderSession({ provider, sessionConfig, options, inputs, m
 			vars: {
 				...extractedSessionId ? { sessionId: extractedSessionId } : {},
 				...inputVars,
-				...mainInputVariable ? { [mainInputVariable]: secondPrompt } : {}
+				...mainInputVariable ? { [mainInputVariable]: materializeSessionPrompt(secondPrompt) } : {}
 			},
 			prompt: {
 				raw: secondPrompt,
@@ -11790,7 +12321,7 @@ providersRouter.post("/test-session", async (req, res) => {
 const TestCaseGenerationSchema = z.object({
 	plugin: z.object({
 		id: z.string().refine((val) => ALL_PLUGINS.includes(val), { message: `Invalid plugin ID. Must be one of: ${ALL_PLUGINS.join(", ")}` }),
-		config: PluginConfigSchema.optional().prefault({})
+		config: PluginConfigSchema.catchall(z.unknown()).optional().prefault({})
 	}),
 	strategy: z.object({
 		id: z.string().refine((val) => ALL_STRATEGIES.includes(val), { message: `Invalid strategy ID. Must be one of: ${ALL_STRATEGIES.join(", ")}` }),
@@ -12367,11 +12898,11 @@ redteamRouter.post("/:taskId", async (req, res) => {
 	}
 	const { taskId } = paramsResult.data;
 	const cloudFunctionUrl = getRemoteGenerationUrl();
-	logger.debug(`Received ${taskId} task request: ${JSON.stringify({
+	logger.debug(`Received ${taskId} task request`, {
 		method: req.method,
 		url: req.url,
-		body: req.body
-	})}`);
+		body: sanitizeObject(bodyResult.data, { context: "request body" })
+	});
 	try {
 		logger.debug(`Sending request to cloud function: ${cloudFunctionUrl}`);
 		const response = await fetchWithProxy(cloudFunctionUrl, {
@@ -12743,7 +13274,7 @@ router.get("/", async (_req, res) => {
 				};
 			} catch (error) {
 				logger.debug(`Failed to fetch latest version: ${error}`);
-				latestVersion = versionCache.latestVersion ?? "0.121.4";
+				latestVersion = versionCache.latestVersion ?? "0.121.7";
 			}
 		}
 		const selfHosted = getEnvBool("PROMPTFOO_SELF_HOSTED");
@@ -12752,7 +13283,7 @@ router.get("/", async (_req, res) => {
 			selfHosted,
 			isNpx
 		});
-		const resolvedLatestVersion = latestVersion ?? "0.121.4";
+		const resolvedLatestVersion = latestVersion ?? "0.121.7";
 		const response = {
 			currentVersion: VERSION,
 			latestVersion: resolvedLatestVersion,
@@ -15174,7 +15705,11 @@ const EXAMPLE_ALIASES = {
 	"google-adk-example": "integration-google-adk",
 	"google-sheets": "integration-google-sheets",
 	"gpt-4o-temperature-comparison": "compare-gpt-temperature",
-	"gpt-4o-vs-4o-mini": "compare-gpt-4o-vs-4o-mini",
+	"compare-gpt-4o-vs-4o-mini": "compare-gpt-model-tiers",
+	"compare-gpt-5-vs-gpt-5-mini-mmlu": "compare-gpt-model-tiers-mmlu-pro",
+	"compare-gpt-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
+	"gpt-4o-vs-4o-mini": "compare-gpt-model-tiers",
+	"gpt-model-tiers": "compare-gpt-model-tiers",
 	"gpt-vs-claude-vs-gemini": "compare-gpt-vs-claude-vs-gemini",
 	"grok-4-political-bias": "redteam-grok-4-political-bias",
 	groq: "provider-groq",
@@ -15225,7 +15760,9 @@ const EXAMPLE_ALIASES = {
 	"node-package-typescript": "config-node-package-typescript",
 	nscale: "provider-nscale",
 	"nunjucks-custom-filters": "config-nunjucks-custom-filters",
-	"openai-gpt-5-vs-gpt-5-mini-mmlu": "compare-gpt-5-vs-gpt-5-mini-mmlu",
+	"gpt-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
+	"gpt-model-tiers-mmlu-pro": "compare-gpt-model-tiers-mmlu-pro",
+	"openai-gpt-5-vs-gpt-5-mini-mmlu": "compare-gpt-model-tiers-mmlu-pro",
 	"openai-model-comparison": "compare-openai-models",
 	openclaw: "provider-openclaw",
 	"opencode-sdk": "provider-opencode-sdk/basic",
@@ -15597,8 +16134,8 @@ function shouldUseInkList() {
 async function runInkList(options) {
 	const [React, { renderInteractive }, { ListApp }] = await Promise.all([
 		import("react"),
-		import("./render-CgVDrJmM.js"),
-		import("./ListApp-DQkFNqE9.js")
+		import("./render-DznWrxGO.js"),
+		import("./ListApp-DLmM02JS.js")
 	]);
 	let result = { cancelled: false };
 	let resolveResult;
@@ -17906,8 +18443,8 @@ function registerRunEvaluationTool(server) {
 					}
 					filteredTestSuite.tests = filteredTests;
 				}
-				const { evaluate } = await import("./evaluator-CVessDWe.js");
-				const evalRecord = await (await import("./eval-CzJFfFO9.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
+				const { evaluate } = await import("./evaluator-DNdJF1Gv.js");
+				const evalRecord = await (await import("./eval-DscR5iOM.js")).default.create(config, filteredTestSuite.prompts, { id: `mcp-eval-${Date.now()}` });
 				logger.debug(`Running filtered eval with ${filteredTestSuite.tests?.length || 0} test cases, ${filteredTestSuite.prompts.length} prompts, ${filteredTestSuite.providers.length} providers`);
 				const startTime = Date.now();
 				const result = await evaluate(filteredTestSuite, evalRecord, {
@@ -18473,6 +19010,9 @@ function analyzeConfiguration(config) {
 }
 //#endregion
 //#region src/commands/mcp/server.ts
+function setMcpTransport(transport) {
+	Object.assign(process.env, { MCP_TRANSPORT: transport });
+}
 /**
 * Creates an MCP server with tools for interacting with promptfoo
 */
@@ -18508,7 +19048,7 @@ async function createMcpServer() {
 */
 async function startHttpMcpServer(port) {
 	if (!Number.isInteger(port) || port < 1 || port > 65535) throw new Error(`Invalid port number: ${port}. Port must be an integer between 1 and 65535.`);
-	process.env.MCP_TRANSPORT = "http";
+	setMcpTransport("http");
 	const app = express();
 	app.use(express.json());
 	const mcpServer = await createMcpServer();
@@ -18565,7 +19105,7 @@ async function startHttpMcpServer(port) {
 * Starts an MCP server with stdio transport
 */
 async function startStdioMcpServer() {
-	process.env.MCP_TRANSPORT = "stdio";
+	setMcpTransport("stdio");
 	logger.transports.forEach((transport) => {
 		if (transport.constructor.name === "Console" || transport.name === "console") transport.silent = true;
 	});
@@ -19434,7 +19974,7 @@ function printErrorInformation(errorLogFile, debugLogFile) {
 async function main() {
 	setupEnvFilesFromArgv();
 	initializeRunLogging();
-	if (!process.env.PROMPTFOO_DISABLE_UPDATE && typeof process.env.CI !== "undefined") process.env.PROMPTFOO_DISABLE_UPDATE = "true";
+	if (!process.env.PROMPTFOO_DISABLE_UPDATE && typeof process.env.CI !== "undefined") Object.assign(process.env, { PROMPTFOO_DISABLE_UPDATE: "true" });
 	await checkForUpdates();
 	await runDbMigrations();
 	const { defaultConfig, defaultConfigPath } = await loadDefaultConfig();