npm - @oh-my-pi/pi-coding-agent - Versions diffs - 15.11.6 → 15.11.7 - Mend

@oh-my-pi/pi-coding-agent 15.11.6 → 15.11.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

package/CHANGELOG.md +29 -1
package/dist/cli.js +114 -71
package/dist/types/cli/bench-cli.d.ts +78 -0
package/dist/types/commands/bench.d.ts +29 -0
package/dist/types/config/model-resolver.d.ts +3 -2
package/dist/types/config/settings-schema.d.ts +72 -0
package/dist/types/edit/renderer.d.ts +1 -0
package/dist/types/modes/components/oauth-selector.d.ts +10 -1
package/dist/types/modes/components/settings-selector.d.ts +8 -1
package/dist/types/modes/components/snapcompact-shape-preview.d.ts +31 -0
package/dist/types/modes/components/tool-execution.d.ts +13 -9
package/dist/types/modes/setup-wizard/scenes/sign-in.d.ts +3 -0
package/dist/types/modes/setup-wizard/scenes/types.d.ts +10 -1
package/dist/types/modes/setup-wizard/scenes/web-search.d.ts +3 -0
package/dist/types/session/snapcompact-inline.d.ts +2 -0
package/dist/types/tools/bash.d.ts +2 -0
package/dist/types/tools/eval-render.d.ts +1 -0
package/dist/types/tools/renderers.d.ts +13 -0
package/dist/types/tools/ssh.d.ts +1 -0
package/package.json +11 -11
package/src/cli/bench-cli.ts +437 -0
package/src/cli-commands.ts +1 -0
package/src/commands/bench.ts +42 -0
package/src/config/model-registry.ts +52 -5
package/src/config/model-resolver.ts +36 -5
package/src/config/settings-schema.ts +92 -0
package/src/edit/renderer.ts +5 -0
package/src/hindsight/client.ts +26 -1
package/src/hindsight/state.ts +6 -2
package/src/internal-urls/docs-index.generated.ts +1 -1
package/src/mcp/transports/stdio.ts +81 -7
package/src/modes/components/oauth-selector.ts +67 -7
package/src/modes/components/settings-selector.ts +27 -0
package/src/modes/components/snapcompact-shape-preview-doc.md +11 -0
package/src/modes/components/snapcompact-shape-preview.ts +192 -0
package/src/modes/components/tool-execution.ts +18 -10
package/src/modes/controllers/input-controller.ts +8 -6
package/src/modes/controllers/selector-controller.ts +4 -2
package/src/modes/interactive-mode.ts +24 -0
package/src/modes/setup-wizard/index.ts +1 -0
package/src/modes/setup-wizard/scenes/glyph.ts +24 -6
package/src/modes/setup-wizard/scenes/providers.ts +36 -2
package/src/modes/setup-wizard/scenes/sign-in.ts +10 -1
package/src/modes/setup-wizard/scenes/theme.ts +28 -1
package/src/modes/setup-wizard/scenes/types.ts +10 -1
package/src/modes/setup-wizard/scenes/web-search.ts +22 -6
package/src/modes/setup-wizard/wizard-overlay.ts +38 -1
package/src/modes/utils/context-usage.ts +1 -1
package/src/prompts/bench.md +7 -0
package/src/sdk.ts +1 -0
package/src/session/agent-session.ts +5 -0
package/src/session/snapcompact-inline.ts +11 -19
package/src/tools/bash.ts +3 -0
package/src/tools/eval-render.ts +4 -0
package/src/tools/renderers.ts +13 -0
package/src/tools/ssh.ts +3 -0

package/src/cli/bench-cli.ts ADDED Viewed

@@ -0,0 +1,437 @@
+import type { ResolvedThinkingLevel } from "@oh-my-pi/pi-agent-core";
+import type {
+	Api,
+	ApiKeyResolver,
+	AssistantMessage,
+	AssistantMessageEvent,
+	AssistantMessageEventStream,
+	Context,
+	Effort,
+	Model,
+	SimpleStreamOptions,
+} from "@oh-my-pi/pi-ai";
+import { streamSimple } from "@oh-my-pi/pi-ai";
+import type { CanonicalModelVariant } from "@oh-my-pi/pi-catalog/identity";
+import { replaceTabs, truncateToWidth } from "@oh-my-pi/pi-tui";
+import { formatDuration, getProjectDir } from "@oh-my-pi/pi-utils";
+import chalk from "chalk";
+import type { ApiKeyResolverModel } from "../config/api-key-resolver";
+import { type CanonicalModelQueryOptions, ModelRegistry } from "../config/model-registry";
+import { formatModelString, getModelMatchPreferences, resolveCliModel } from "../config/model-resolver";
+import { Settings } from "../config/settings";
+import benchPrompt from "../prompts/bench.md" with { type: "text" };
+import { discoverAuthStorage } from "../sdk";
+import { resolveThinkingLevelForModel, shouldDisableReasoning, toReasoningEffort } from "../thinking";
+const DEFAULT_RUNS = 1;
+const DEFAULT_MAX_TOKENS = 512;
+const ERROR_WIDTH = 110;
+const BENCH_PROMPT = benchPrompt.trim();
+export interface BenchCommandArgs {
+	models: string[];
+	flags: {
+		runs?: number;
+		maxTokens?: number;
+		prompt?: string;
+		json?: boolean;
+	};
+}
+export interface BenchModelRegistry {
+	getAll(): Model<Api>[];
+	getApiKey(model: Model<Api>, sessionId?: string): Promise<string | undefined>;
+	resolver(model: ApiKeyResolverModel, sessionId?: string): ApiKeyResolver;
+	resolveCanonicalModel?(canonicalId: string, options?: CanonicalModelQueryOptions): Model<Api> | undefined;
+	getCanonicalVariants?(canonicalId: string, options?: CanonicalModelQueryOptions): CanonicalModelVariant[];
+	getCanonicalId?(model: Model<Api>): string | undefined;
+}
+export interface BenchRuntime {
+	modelRegistry: BenchModelRegistry;
+	settings?: Settings;
+	close?: () => void;
+}
+export interface BenchRunSuccess {
+	ok: true;
+	ttftMs: number;
+	durationMs: number;
+	outputTokens: number;
+	/** Generation throughput measured over the post-first-token window. */
+	tokensPerSecond: number;
+}
+export interface BenchRunFailure {
+	ok: false;
+	error: string;
+}
+export type BenchRunResult = BenchRunSuccess | BenchRunFailure;
+export interface BenchAverages {
+	ttftMs: number;
+	durationMs: number;
+	outputTokens: number;
+	tokensPerSecond: number;
+}
+export interface BenchModelReport {
+	/** Selector as the user typed it (e.g. "opus" or "gemini-3.5:low"). */
+	selector: string;
+	/** Resolved `provider/id`. */
+	model: string;
+	/** Explicit thinking level from a `:level` selector suffix; undefined = provider default. */
+	thinking?: ResolvedThinkingLevel;
+	results: BenchRunResult[];
+	/** Averages over successful runs; null when every run failed. */
+	average: BenchAverages | null;
+}
+export interface BenchSummary {
+	runs: number;
+	maxTokens: number;
+	models: BenchModelReport[];
+	failures: number;
+}
+type BenchStreamSimple = (
+	model: Model<Api>,
+	context: Context,
+	options?: SimpleStreamOptions,
+) => AssistantMessageEventStream;
+export interface BenchDependencies {
+	createRuntime?: () => Promise<BenchRuntime>;
+	randomSessionId?: () => string;
+	writeStdout?: (text: string) => void;
+	writeStderr?: (text: string) => void;
+	setExitCode?: (code: number) => void;
+	streamSimple?: BenchStreamSimple;
+	now?: () => number;
+	stdoutIsTTY?: boolean;
+}
+function getErrorMessage(error: unknown): string {
+	if (error instanceof Error && error.message) return error.message;
+	return String(error);
+}
+function normalizePositiveInteger(name: string, value: number | undefined, fallback: number): number {
+	if (value === undefined) return fallback;
+	if (!Number.isInteger(value) || value <= 0) {
+		throw new Error(`Expected --${name} to be a positive integer, got ${value}`);
+	}
+	return value;
+}
+function isFirstTokenEvent(event: AssistantMessageEvent): boolean {
+	switch (event.type) {
+		case "text_delta":
+		case "thinking_delta":
+		case "toolcall_delta":
+			return event.delta.length > 0;
+		case "text_end":
+		case "thinking_end":
+			return event.content.length > 0;
+		default:
+			return false;
+	}
+}
+/**
+ * Tokens/s over the generation window (duration minus TTFT) so queue/prefill
+ * latency does not dilute throughput. Falls back to total duration when the
+ * response arrived as a single chunk (TTFT ~ duration).
+ */
+function computeTokensPerSecond(outputTokens: number, durationMs: number, ttftMs: number): number {
+	const decodeMs = durationMs - ttftMs;
+	const windowMs = decodeMs > 0 ? decodeMs : durationMs;
+	return windowMs > 0 ? (outputTokens * 1000) / windowMs : 0;
+}
+interface BenchRequestOptions {
+	apiKey: ApiKeyResolver;
+	sessionId: string;
+	prompt: string;
+	maxTokens: number;
+	/** Explicit effort from a `:level` selector suffix; absent = provider default. */
+	reasoning?: Effort;
+	/** Only set for an explicit `:off` suffix — some endpoints reject disablement. */
+	disableReasoning?: boolean;
+}
+async function runBenchRequest(
+	model: Model<Api>,
+	options: BenchRequestOptions,
+	streamFn: BenchStreamSimple,
+	now: () => number,
+): Promise<BenchRunResult> {
+	const startedAt = now();
+	let firstTokenAt: number | undefined;
+	try {
+		const context: Context = {
+			// Codex's Responses endpoint 400s with "Instructions are required" when no
+			// system prompt is present — same guard as eval's completion bridge.
+			systemPrompt: ["You are a helpful assistant."],
+			messages: [{ role: "user", content: options.prompt, timestamp: Date.now(), attribution: "user" }],
+		};
+		const stream = streamFn(model, context, {
+			apiKey: options.apiKey,
+			sessionId: options.sessionId,
+			maxTokens:
+				Number.isFinite(model.maxTokens) && model.maxTokens > 0
+					? Math.min(options.maxTokens, model.maxTokens)
+					: options.maxTokens,
+			reasoning: options.reasoning,
+			disableReasoning: options.disableReasoning,
+			// pi-ai opts every OpenRouter request into response caching (1h TTL).
+			// Bench sends a byte-identical request each run, so within the TTL
+			// OpenRouter replays the cached generation with zeroed usage — the run
+			// shows "tokens 0, TPS 0.0" at line speed. Opt back out so every run
+			// measures a fresh generation.
+			headers: model.provider === "openrouter" ? { "X-OpenRouter-Cache": "false" } : undefined,
+		});
+		let message: AssistantMessage | undefined;
+		for await (const event of stream) {
+			if (firstTokenAt === undefined && isFirstTokenEvent(event)) {
+				firstTokenAt = now();
+			}
+			if (event.type === "error") {
+				return { ok: false, error: event.error.errorMessage ?? "request failed" };
+			}
+			if (event.type === "done") {
+				message = event.message;
+			}
+		}
+		message ??= await stream.result();
+		if (message.stopReason === "error" || message.errorMessage) {
+			return { ok: false, error: message.errorMessage ?? "request failed" };
+		}
+		const rawDuration = message.duration ?? now() - startedAt;
+		const durationMs = Number.isFinite(rawDuration) && rawDuration > 0 ? rawDuration : 0;
+		const rawTtft = message.ttft ?? (firstTokenAt === undefined ? durationMs : firstTokenAt - startedAt);
+		const ttftMs = Number.isFinite(rawTtft) && rawTtft > 0 ? rawTtft : 0;
+		const outputTokens = Number.isFinite(message.usage.output) && message.usage.output > 0 ? message.usage.output : 0;
+		return {
+			ok: true,
+			ttftMs,
+			durationMs,
+			outputTokens,
+			tokensPerSecond: computeTokensPerSecond(outputTokens, durationMs, ttftMs),
+		};
+	} catch (error) {
+		return { ok: false, error: getErrorMessage(error) };
+	}
+}
+function buildModelReport(
+	selector: string,
+	model: Model<Api>,
+	thinking: ResolvedThinkingLevel | undefined,
+	results: BenchRunResult[],
+): BenchModelReport {
+	const successes = results.filter((result): result is BenchRunSuccess => result.ok);
+	const average =
+		successes.length === 0
+			? null
+			: {
+					ttftMs: successes.reduce((sum, r) => sum + r.ttftMs, 0) / successes.length,
+					durationMs: successes.reduce((sum, r) => sum + r.durationMs, 0) / successes.length,
+					outputTokens: successes.reduce((sum, r) => sum + r.outputTokens, 0) / successes.length,
+					tokensPerSecond: successes.reduce((sum, r) => sum + r.tokensPerSecond, 0) / successes.length,
+				};
+	return { selector, model: formatModelString(model), thinking, results, average };
+}
+function formatMs(ms: number): string {
+	return formatDuration(Math.max(0, Math.round(ms)));
+}
+function formatRunLine(result: BenchRunResult, index: number, total: number): string {
+	const prefix = chalk.dim(`run ${index + 1}/${total}`);
+	if (result.ok) {
+		return `  ${chalk.green("✓")} ${prefix} ${chalk.dim("TTFT")} ${formatMs(result.ttftMs)} ${chalk.dim("TPS")} ${result.tokensPerSecond.toFixed(1)}/s ${chalk.dim("tokens")} ${result.outputTokens} ${chalk.dim("total")} ${formatMs(result.durationMs)}`;
+	}
+	return `  ${chalk.red("✗")} ${prefix} ${chalk.red(truncateToWidth(replaceTabs(result.error).replace(/\r?\n/g, " "), ERROR_WIDTH))}`;
+}
+export function formatBenchTable(summary: BenchSummary): string {
+	const ranked = [...summary.models].sort((a, b) => {
+		if (a.average === null && b.average === null) return 0;
+		if (a.average === null) return 1;
+		if (b.average === null) return -1;
+		return b.average.tokensPerSecond - a.average.tokensPerSecond;
+	});
+	const rows = ranked.map(report => ({
+		model: report.model,
+		ttft: report.average ? formatMs(report.average.ttftMs) : "-",
+		tps: report.average ? `${report.average.tokensPerSecond.toFixed(1)}/s` : "-",
+		tokens: report.average ? String(Math.round(report.average.outputTokens)) : "-",
+		total: report.average ? formatMs(report.average.durationMs) : "-",
+		failed: report.results.filter(result => !result.ok).length,
+	}));
+	const headers = { model: "model", ttft: "TTFT", tps: "TPS", tokens: "tokens", total: "total" } as const;
+	const width = (key: keyof typeof headers): number =>
+		Math.max(headers[key].length, ...rows.map(row => row[key].length));
+	const lines = [
+		[
+			headers.model.padEnd(width("model")),
+			headers.ttft.padEnd(width("ttft")),
+			headers.tps.padEnd(width("tps")),
+			headers.tokens.padEnd(width("tokens")),
+			headers.total.padEnd(width("total")),
+		]
+			.join("  ")
+			.trimEnd(),
+	];
+	for (const row of rows) {
+		const failedSuffix = row.failed > 0 ? `  ${chalk.red(`(${row.failed} failed)`)}` : "";
+		lines.push(
+			[
+				row.model.padEnd(width("model")),
+				row.ttft.padEnd(width("ttft")),
+				row.tps.padEnd(width("tps")),
+				row.tokens.padEnd(width("tokens")),
+				row.total.padEnd(width("total")),
+			]
+				.join("  ")
+				.trimEnd() + failedSuffix,
+		);
+	}
+	return `${lines.map((line, index) => (index === 0 ? chalk.dim(line) : line)).join("\n")}\n`;
+}
+async function createDefaultRuntime(): Promise<BenchRuntime> {
+	const authStorage = await discoverAuthStorage();
+	try {
+		const settings = await Settings.init({ cwd: getProjectDir() });
+		const modelRegistry = new ModelRegistry(authStorage);
+		return {
+			modelRegistry,
+			settings,
+			close: () => authStorage.close(),
+		};
+	} catch (error) {
+		authStorage.close();
+		throw error;
+	}
+}
+interface BenchTarget {
+	selector: string;
+	model: Model<Api>;
+	thinking: ResolvedThinkingLevel | undefined;
+}
+function resolveBenchModels(
+	selectors: string[],
+	modelRegistry: BenchModelRegistry,
+	settings: Settings | undefined,
+	writeStderr: (text: string) => void,
+): BenchTarget[] {
+	const preferences = getModelMatchPreferences(settings);
+	const resolved: BenchTarget[] = [];
+	const errors: string[] = [];
+	for (const selector of selectors) {
+		const result = resolveCliModel({ cliModel: selector, modelRegistry, preferences });
+		if (result.error) {
+			errors.push(`${selector}: ${result.error}`);
+			continue;
+		}
+		if (!result.model) {
+			errors.push(`${selector}: model not found`);
+			continue;
+		}
+		if (result.warning) writeStderr(`${chalk.yellow(`Warning: ${result.warning}`)}\n`);
+		resolved.push({
+			selector,
+			model: result.model,
+			thinking: resolveThinkingLevelForModel(result.model, result.thinkingLevel),
+		});
+	}
+	if (errors.length > 0) {
+		throw new Error(`Could not resolve ${errors.length === 1 ? "model" : "models"}:\n${errors.join("\n")}`);
+	}
+	return resolved;
+}
+export async function runBenchCommand(command: BenchCommandArgs, deps: BenchDependencies = {}): Promise<BenchSummary> {
+	const runs = normalizePositiveInteger("runs", command.flags.runs, DEFAULT_RUNS);
+	const maxTokens = normalizePositiveInteger("max-tokens", command.flags.maxTokens, DEFAULT_MAX_TOKENS);
+	const prompt = command.flags.prompt?.trim() || BENCH_PROMPT;
+	const json = command.flags.json === true;
+	const randomSessionId = deps.randomSessionId ?? (() => Bun.randomUUIDv7());
+	const writeStdout = deps.writeStdout ?? ((text: string) => process.stdout.write(text));
+	const writeStderr = deps.writeStderr ?? ((text: string) => process.stderr.write(text));
+	const setExitCode =
+		deps.setExitCode ??
+		((code: number) => {
+			process.exitCode = code;
+		});
+	const streamFn = deps.streamSimple ?? streamSimple;
+	const now = deps.now ?? (() => performance.now());
+	const interactive = deps.stdoutIsTTY ?? process.stdout.isTTY === true;
+	if (command.models.length === 0) {
+		throw new Error("Pass at least one model selector, e.g. `omp bench opus gpt-5.2`");
+	}
+	const runtime = await (deps.createRuntime ?? createDefaultRuntime)();
+	try {
+		const targets = resolveBenchModels(command.models, runtime.modelRegistry, runtime.settings, writeStderr);
+		const reports: BenchModelReport[] = [];
+		for (const { selector, model, thinking } of targets) {
+			if (!json) {
+				const resolvedNote = selector === formatModelString(model) ? "" : chalk.dim(` (${selector})`);
+				writeStdout(`${chalk.bold(formatModelString(model))}${resolvedNote}\n`);
+			}
+			const results: BenchRunResult[] = [];
+			for (let index = 0; index < runs; index++) {
+				const sessionId = randomSessionId();
+				const initialKey = await runtime.modelRegistry.getApiKey(model, sessionId);
+				if (!initialKey) {
+					const failure: BenchRunFailure = {
+						ok: false,
+						error: `No credentials for provider "${model.provider}". Run \`omp\` and use /login, or set the provider API key.`,
+					};
+					results.push(failure);
+					if (!json) writeStdout(`${formatRunLine(failure, index, runs)}\n`);
+					break; // remaining runs would fail identically
+				}
+				if (!json && interactive) {
+					writeStdout(chalk.dim(`  … run ${index + 1}/${runs} streaming`));
+				}
+				const result = await runBenchRequest(
+					model,
+					{
+						apiKey: runtime.modelRegistry.resolver(model, sessionId),
+						sessionId,
+						prompt,
+						maxTokens,
+						reasoning: toReasoningEffort(thinking),
+						disableReasoning: shouldDisableReasoning(thinking) ? true : undefined,
+					},
+					streamFn,
+					now,
+				);
+				results.push(result);
+				if (!json) {
+					if (interactive) writeStdout("\r\x1b[2K");
+					writeStdout(`${formatRunLine(result, index, runs)}\n`);
+				}
+			}
+			reports.push(buildModelReport(selector, model, thinking, results));
+		}
+		const failures = reports.reduce((sum, report) => sum + report.results.filter(result => !result.ok).length, 0);
+		const summary: BenchSummary = { runs, maxTokens, models: reports, failures };
+		if (json) {
+			writeStdout(`${JSON.stringify(summary, null, 2)}\n`);
+		} else if (reports.length > 1 || runs > 1) {
+			writeStdout(`\n${formatBenchTable(summary)}`);
+		}
+		if (failures > 0) setExitCode(1);
+		return summary;
+	} finally {
+		runtime.close?.();
+	}
+}

package/src/cli-commands.ts CHANGED Viewed

@@ -16,6 +16,7 @@ export const commands: CommandEntry[] = [
 	{ name: "auth-broker", load: () => import("./commands/auth-broker").then(m => m.default) },
 	{ name: "auth-gateway", load: () => import("./commands/auth-gateway").then(m => m.default) },
 	{ name: "agents", load: () => import("./commands/agents").then(m => m.default) },
+	{ name: "bench", load: () => import("./commands/bench").then(m => m.default) },
 	{ name: "commit", load: () => import("./commands/commit").then(m => m.default) },
 	{ name: "completions", load: () => import("./commands/completions").then(m => m.default) },
 	{ name: "__complete", load: () => import("./commands/complete").then(m => m.default) },

package/src/commands/bench.ts ADDED Viewed

@@ -0,0 +1,42 @@
+import { Args, Command, Flags } from "@oh-my-pi/pi-utils/cli";
+import { runBenchCommand } from "../cli/bench-cli";
+export default class Bench extends Command {
+	static description =
+		"Benchmark models with the same prompt: time-to-first-token and generation throughput (tokens/s)";
+	static args = {
+		models: Args.string({
+			description: "Model selectors (provider/model or fuzzy id, e.g. opus)",
+			required: true,
+			multiple: true,
+		}),
+	};
+	static flags = {
+		runs: Flags.integer({ description: "Requests per model (results are averaged)", default: 1 }),
+		"max-tokens": Flags.integer({ description: "Max output tokens per request", default: 512 }),
+		prompt: Flags.string({ description: "Custom prompt text (default: bundled bench prompt)" }),
+		json: Flags.boolean({ description: "Output JSON" }),
+	};
+	static examples = [
+		"# Compare two models\n  omp bench anthropic/claude-opus-4-5 openai/gpt-5.2",
+		"# Fuzzy selectors work\n  omp bench opus sonnet",
+		"# Average over 3 runs each\n  omp bench opus gpt-5.2 --runs 3",
+		"# Machine-readable output\n  omp bench opus --json",
+	];
+	async run(): Promise<void> {
+		const { args, flags } = await this.parse(Bench);
+		await runBenchCommand({
+			models: args.models ?? [],
+			flags: {
+				runs: flags.runs,
+				maxTokens: flags["max-tokens"],
+				prompt: flags.prompt,
+				json: flags.json,
+			},
+		});
+	}
+}

package/src/config/model-registry.ts CHANGED Viewed

@@ -20,6 +20,11 @@ import {
 	UNK_CONTEXT_WINDOW,
 	UNK_MAX_TOKENS,
 } from "@oh-my-pi/pi-catalog/provider-models";
+import {
+	collapseBuiltModelVariants,
+	getVariantAliasSources,
+	resolveVariantAlias,
+} from "@oh-my-pi/pi-catalog/variant-collapse";
 // Sentinel for local-only OAuth token (LM Studio, vLLM) — declared inline to avoid loading
 // any provider module at startup. Must match `DEFAULT_LOCAL_TOKEN` in oauth/lm-studio.ts.
@@ -542,7 +547,37 @@ function normalizeSuppressedSelector(selector: string): string {
 	if (!trimmed) return trimmed;
 	const parsed = parseModelString(trimmed);
 	if (!parsed) return trimmed;
-	return `${parsed.provider}/${parsed.id}`;
+	// Retired effort-tier variant ids normalize to their collapsed logical id
+	// so persisted suppressions keyed by raw member ids still bind.
+	const aliasId = resolveVariantAlias(parsed.provider, parsed.id);
+	return `${parsed.provider}/${aliasId ?? parsed.id}`;
+}
+/**
+ * Look up a model's override, falling back to entries keyed by retired
+ * effort-tier variant ids (models.yml authored before collapsing). A raw key
+ * only re-binds when no live model holds that id.
+ */
+function resolveModelOverrideWithAliases(
+	overrides: Map<string, ModelOverride>,
+	model: Model<Api>,
+	hasLiveModel: (provider: string, id: string) => boolean,
+): ModelOverride | undefined {
+	const direct = overrides.get(model.id);
+	if (direct) return direct;
+	for (const rawId of getVariantAliasSources(model.provider, model.id)) {
+		if (hasLiveModel(model.provider, rawId)) continue;
+		const remapped = overrides.get(rawId);
+		if (remapped) {
+			logger.debug("model override re-keyed through variant alias", {
+				provider: model.provider,
+				from: rawId,
+				to: model.id,
+			});
+			return remapped;
+		}
+	}
+	return undefined;
 }
 function getDisabledProviderIdsFromSettings(): Set<string> {
@@ -799,7 +834,9 @@ export class ModelRegistry {
 		const withConfigModels = this.#mergeCustomModels(resolvedDefaults, this.#customModelOverlays);
 		// Merge runtime extension models so they survive refresh() cycles
 		const combined = this.#mergeCustomModels(withConfigModels, this.#runtimeModelOverlays);
-		const withModelOverrides = this.#applyModelOverrides(combined, this.#modelOverrides);
+		// Custom/config providers bypass the model-manager merge point —
+		// collapse effort-tier variants here so X/X-thinking twins fold.
+		const withModelOverrides = this.#applyModelOverrides(collapseBuiltModelVariants(combined), this.#modelOverrides);
 		this.#models = this.#applyRuntimeProviderOverrides(withModelOverrides);
 		this.#rebuildCanonicalIndex();
 		this.#lastStaticLoadMtime = this.#modelsConfigFile.getMtimeMs();
@@ -1152,7 +1189,7 @@ export class ModelRegistry {
 		const withConfigModels = this.#mergeCustomModels(resolved, this.#customModelOverlays);
 		// Merge runtime extension models so they survive online discovery completion
 		const combined = this.#mergeCustomModels(withConfigModels, this.#runtimeModelOverlays);
-		const withModelOverrides = this.#applyModelOverrides(combined, this.#modelOverrides);
+		const withModelOverrides = this.#applyModelOverrides(collapseBuiltModelVariants(combined), this.#modelOverrides);
 		this.#models = this.#applyRuntimeProviderOverrides(withModelOverrides);
 		this.#rebuildCanonicalIndex();
 	}
@@ -1398,8 +1435,13 @@ export class ModelRegistry {
 	#applyProviderModelOverrides(provider: string, models: Model<Api>[]): Model<Api>[] {
 		const overrides = this.#modelOverrides.get(provider);
 		if (!overrides || overrides.size === 0) return models;
+		let liveIds: Set<string> | null = null;
+		const hasLiveModel = (_provider: string, id: string) => {
+			liveIds ??= new Set(models.map(m => m.id));
+			return liveIds.has(id);
+		};
 		return models.map(model => {
-			const override = overrides.get(model.id);
+			const override = resolveModelOverrideWithAliases(overrides, model, hasLiveModel);
 			if (!override) return model;
 			return applyModelOverride(model, override);
 		});
@@ -1443,10 +1485,15 @@ export class ModelRegistry {
 	}
 	#applyModelOverrides(models: Model<Api>[], overrides: Map<string, Map<string, ModelOverride>>): Model<Api>[] {
 		if (overrides.size === 0) return models;
+		let liveKeys: Set<string> | null = null;
+		const hasLiveModel = (provider: string, id: string) => {
+			liveKeys ??= new Set(models.map(m => `${m.provider}\u0000${m.id}`));
+			return liveKeys.has(`${provider}\u0000${id}`);
+		};
 		return models.map(model => {
 			const providerOverrides = overrides.get(model.provider);
 			if (!providerOverrides) return model;
-			const override = providerOverrides.get(model.id);
+			const override = resolveModelOverrideWithAliases(providerOverrides, model, hasLiveModel);
 			if (!override) return model;
 			return applyModelOverride(model, override);
 		});

package/src/config/model-resolver.ts CHANGED Viewed

@@ -3,8 +3,9 @@
  *
  * Layering:
  * - `matchModel` is the single matching engine. Order: exact `provider/id`
- *   reference (with OpenRouter routed/date fallbacks) → exact canonical id →
- *   exact bare id → provider-scoped fuzzy → substring with alias-vs-dated pick.
+ *   reference (with variant-alias and OpenRouter routed/date fallbacks) →
+ *   exact canonical id → exact bare id → retired variant alias →
+ *   provider-scoped fuzzy → substring with alias-vs-dated pick.
  * - `parseModelPatternWithContext`/`parseModelPattern` layer the selector
  *   grammar on top: trailing `:level` thinking suffixes (`splitThinkingSuffix`)
  *   and `@upstream` provider routing (`splitUpstreamRouting`).
@@ -19,9 +20,11 @@ import type { Api, Effort, KnownProvider, Model, ModelSpec } from "@oh-my-pi/pi-
 import { buildModel } from "@oh-my-pi/pi-catalog/build";
 import { modelMatchesHost } from "@oh-my-pi/pi-catalog/hosts";
 import { buildModelProviderPriorityRank } from "@oh-my-pi/pi-catalog/identity";
+import { stripThinkingVariantToken } from "@oh-my-pi/pi-catalog/identity/family";
 import { clampThinkingLevelForModel } from "@oh-my-pi/pi-catalog/model-thinking";
 import { modelsAreEqual } from "@oh-my-pi/pi-catalog/models";
 import { DEFAULT_MODEL_PER_PROVIDER } from "@oh-my-pi/pi-catalog/provider-models";
+import { resolveBareVariantAlias, resolveVariantAlias } from "@oh-my-pi/pi-catalog/variant-collapse";
 import { fuzzyMatch } from "@oh-my-pi/pi-tui";
 import { logger } from "@oh-my-pi/pi-utils";
 import chalk from "chalk";
@@ -228,6 +231,18 @@ export function resolveProviderModelReference(
 		return exact;
 	}
+	// Retired effort-tier variant ids resolve to their collapsed logical
+	// model: hand-table aliases first, then the `X-thinking` → `X` grammar
+	// for auto-derived pairs. Exact lookup above always wins while raw is live.
+	const variantAliasId =
+		resolveVariantAlias(normalizedProvider, normalizedModelId) ?? stripThinkingVariantToken(normalizedModelId);
+	if (variantAliasId) {
+		const aliased = index.get(`${normalizedProvider}\u0000${variantAliasId.toLowerCase()}`);
+		if (aliased) {
+			return aliased;
+		}
+	}
 	if (normalizedProvider !== "openrouter") {
 		return undefined;
 	}
@@ -407,11 +422,13 @@ function findExactCanonicalModelMatch(
 /**
  * The single model-matching engine. Tries, in order:
- * 1. exact `provider/id` reference (OpenRouter routed/date fallbacks included),
+ * 1. exact `provider/id` reference (variant-alias and OpenRouter routed/date
+ *    fallbacks included),
  * 2. exact canonical id (coalesces provider variants),
  * 3. exact bare id (preference-ranked),
- * 4. provider-scoped fuzzy match,
- * 5. substring match with the alias-vs-dated pick.
+ * 4. retired effort-tier variant alias (collapsed catalog entries),
+ * 5. provider-scoped fuzzy match,
+ * 6. substring match with the alias-vs-dated pick.
  * Returns the matched model or undefined if no match found.
  */
 function matchModel(
@@ -440,6 +457,20 @@ function matchModel(
 	if (exactMatches.length > 0) {
 		return pickPreferredModel(exactMatches, context);
 	}
+	// Retired effort-tier variant ids (bare, no provider prefix) resolve to
+	// their collapsed logical model; models from the providers whose table
+	// declared the alias win ties. Auto-derived `X-thinking` pairs resolve
+	// through the grammar fallback.
+	const bareAlias = resolveBareVariantAlias(modelPattern);
+	const bareAliasTargetId = bareAlias?.id ?? stripThinkingVariantToken(modelPattern);
+	if (bareAliasTargetId) {
+		const aliasMatches = availableModels.filter(m => m.id.toLowerCase() === bareAliasTargetId.toLowerCase());
+		if (aliasMatches.length > 0) {
+			const preferred = bareAlias ? aliasMatches.filter(m => bareAlias.providers.includes(m.provider)) : [];
+			return pickPreferredModel(preferred.length > 0 ? preferred : aliasMatches, context);
+		}
+	}
 	// Check for provider/modelId format — fuzzy match within provider only.
 	const slashIndex = modelPattern.indexOf("/");
 	if (slashIndex !== -1) {