npm - @oh-my-pi/pi-ai - Versions diffs - 15.1.7 → 15.1.9 - Mend

@oh-my-pi/pi-ai 15.1.7 → 15.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CHANGELOG.md +19 -0
package/dist/types/provider-models/openai-compat.d.ts +12 -0
package/dist/types/types.d.ts +1 -1
package/dist/types/utils/fireworks-model-id.d.ts +8 -0
package/dist/types/utils/oauth/firepass.d.ts +1 -0
package/dist/types/utils/oauth/types.d.ts +1 -1
package/package.json +2 -2
package/src/auth-storage.ts +6 -0
package/src/model-cache.ts +14 -0
package/src/models.json +27 -0
package/src/provider-models/descriptors.ts +2 -0
package/src/provider-models/openai-compat.ts +36 -4
package/src/providers/anthropic.ts +10 -5
package/src/providers/ollama.ts +26 -1
package/src/providers/openai-chat-server.ts +2 -2
package/src/providers/openai-completions-compat.ts +18 -10
package/src/providers/openai-completions.ts +43 -12
package/src/providers/openai-responses-shared.ts +6 -3
package/src/stream.ts +1 -0
package/src/types.ts +1 -0
package/src/utils/fireworks-model-id.ts +17 -0
package/src/utils/oauth/firepass.ts +24 -0
package/src/utils/oauth/index.ts +11 -1
package/src/utils/oauth/perplexity.ts +10 -4
package/src/utils/oauth/types.ts +1 -0
package/src/utils/schema/zod-decontaminate.ts +11 -2

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,24 @@
 ## [Unreleased]
+## [15.1.9] - 2026-05-21
+### Fixed
+- Fixed Ollama named tool forcing to send only the requested tool when the caller passes a named `toolChoice`, preserving `tool_choice: "required"` while preventing local models from selecting a different tool. ([#1236](https://github.com/can1357/oh-my-pi/issues/1236))
+- Fixed `/btw` (and IRC background replies) returning a `BedrockException` 400 (`The toolConfig field must be defined when using toolUse and toolResult content blocks.`) on LiteLLM → Bedrock once the session has tool-call history. Two source fixes in `buildParams`: (1) `if (context.tools)` → `if (context.tools?.length)` so an explicit `context.tools = []` (the /btw opt-out) never routes through `convertTools` and never emits an empty `"tools"` array; (2) `else if (hasToolHistory(...))` → `else if (context.tools === undefined && hasToolHistory(...))` so the Anthropic-proxy sentinel that injects `tools: []` for tool-history turns is suppressed when the caller explicitly opted out, preventing it from re-introducing the empty array. As defence-in-depth, `tool_choice: "none"` is also dropped when the resolved tools list is missing or empty. ([#1227](https://github.com/can1357/oh-my-pi/issues/1227))
+## [15.1.8] - 2026-05-20
+### Added
+- Added Fireworks Fire Pass as a separate `firepass` provider with API-key login flow, bundled `kimi-k2.6-turbo` model entry (Kimi K2.6 Turbo), and wire-id translation from the friendly catalog id to the `accounts/fireworks/routers/kimi-k2p6-turbo` router endpoint. Fire Pass keys (`fpk_…`) authorize only the dedicated router and reject `/v1/models`, so login validation pings chat completions against the router id directly. Extended the openai-completions Kimi-family safety net so the firepass entry inherits the per-Fireworks-docs "always send `max_tokens`" default ([Kimi K2 guide](https://docs.fireworks.ai/models/kimi-k2)); the router's accepted `reasoning_effort` set includes `xhigh`, so it is forwarded verbatim rather than remapped. See https://docs.fireworks.ai/firepass.
+### Fixed
+- Fixed DeepSeek V4 direct API requests with tools to keep documented thinking mode instead of dropping reasoning: lower OMP efforts now map to DeepSeek's supported `high`, `tool_choice` is omitted, `thinking: { type: "enabled" }` and `max_tokens` are sent, and partial user `reasoningEffortMap` overrides merge with DeepSeek defaults. ([#1207](https://github.com/can1357/oh-my-pi/issues/1207))
+- Fixed model cache schema v2 databases so offline refreshes preserve cached provider discoveries after upgrading to schema v3 and subsequent online refreshes can overwrite the cache. ([#1219](https://github.com/can1357/oh-my-pi/issues/1219))
+- Fixed Perplexity OAuth credentials being treated as expired one hour after login. `getJwtExpiry` was fabricating `expires = now + 1h` whenever the JWT had no `exp` claim (the common case — Perplexity sessions are server-side). Once the hour elapsed, `getOAuthApiKey` would mark the cred expired and the search provider's loader would silently skip it, surfacing as "logged out". Logins with no `exp` now persist a far-future sentinel; `getOAuthApiKey` also normalizes any stale `expires` written by older builds.
 ## [15.1.7] - 2026-05-19
 ### Added
@@ -11,6 +29,7 @@
 ### Fixed
 - Fixed Anthropic fast mode (`serviceTier: "priority"`) looping on 429 `rate_limit_error: "Extra usage is required for fast mode."` for accounts without the extra-usage entitlement. `isAnthropicFastModeUnsupportedError` now matches the 429 phrasing in addition to the 400 `invalid_request_error` "does not support the `speed` parameter" case, so the provider drops `speed: "fast"` on the in-turn retry, sets `providerSessionState.fastModeDisabled` for the remainder of the session, and surfaces `disabledFeatures: ["priority"]` to the caller instead of retrying with the same payload until `PROVIDER_MAX_RETRIES` is exhausted.
+- Fixed MiniMax Coding Plan CN streaming `<think>...</think>` reasoning as visible assistant text. The OpenAI-compatible stream parser now enables the existing MiniMax tag parser for both `minimax-code` and `minimax-code-cn`, so CN responses become structured `thinking` blocks instead of raw text. ([#1203](https://github.com/can1357/oh-my-pi/issues/1203))
 ## [15.1.6] - 2026-05-19

package/dist/types/provider-models/openai-compat.d.ts CHANGED Viewed

@@ -63,6 +63,18 @@ export interface FireworksModelManagerConfig {
     baseUrl?: string;
 }
 export declare function fireworksModelManagerOptions(config?: FireworksModelManagerConfig): ModelManagerOptions<"openai-completions">;
+export interface FirepassModelManagerConfig {
+    apiKey?: string;
+    baseUrl?: string;
+}
+/**
+ * Fire Pass is a Fireworks subscription product that exposes a single router
+ * model (Kimi K2.6 Turbo) under `accounts/fireworks/routers/kimi-k2p6-turbo`.
+ * The dedicated `fpk_…` keys do not authorize `/v1/models`, so this manager
+ * never performs dynamic discovery — the bundled catalog entry is canonical.
+ * See https://docs.fireworks.ai/firepass.
+ */
+export declare function firepassModelManagerOptions(_config?: FirepassModelManagerConfig): ModelManagerOptions<"openai-completions">;
 export interface MistralModelManagerConfig {
     apiKey?: string;
     baseUrl?: string;

package/dist/types/types.d.ts CHANGED Viewed

@@ -48,7 +48,7 @@ export interface ThinkingConfig {
     /** Provider-specific transport used to encode the selected effort. */
     mode: ThinkingControlMode;
 }
-export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
+export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
 export type Provider = KnownProvider | string;
 import type { Effort } from "./model-thinking";
 /** Token budgets for each thinking level (token-based providers only) */

package/dist/types/utils/fireworks-model-id.d.ts CHANGED Viewed

@@ -1,2 +1,10 @@
 export declare function toFireworksPublicModelId(modelId: string): string;
 export declare function toFireworksWireModelId(modelId: string): string;
+/**
+ * Fire Pass exposes its Kimi K2.6 Turbo subscription through a dedicated router
+ * endpoint at `accounts/fireworks/routers/<id>` rather than the `models/` namespace.
+ * We keep a friendly public id (e.g. `kimi-k2.6-turbo`) in the catalog and translate
+ * to the wire form (`accounts/fireworks/routers/kimi-k2p6-turbo`) at request time.
+ */
+export declare function toFirepassPublicModelId(modelId: string): string;
+export declare function toFirepassWireModelId(modelId: string): string;

package/dist/types/utils/oauth/firepass.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare const loginFirepass: (options: import("./types").OAuthController) => Promise<string>;

package/dist/types/utils/oauth/types.d.ts CHANGED Viewed

@@ -7,7 +7,7 @@ export type OAuthCredentials = {
     email?: string;
     accountId?: string;
 };
-export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "fireworks" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
+export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
 export type OAuthProviderId = OAuthProvider | (string & {});
 export type OAuthPrompt = {
     message: string;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "15.1.7",
+	"version": "15.1.9",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -43,7 +43,7 @@
 	"dependencies": {
 		"@anthropic-ai/sdk": "^0.94.0",
 		"@bufbuild/protobuf": "^2.12.0",
-		"@oh-my-pi/pi-utils": "15.1.7",
+		"@oh-my-pi/pi-utils": "15.1.9",
 		"openai": "^6.36.0",
 		"partial-json": "^0.1.7",
 		"zod": "4.4.3"

package/src/auth-storage.ts CHANGED Viewed

@@ -1344,6 +1344,12 @@ export class AuthStorage {
 				await saveApiKeyCredential(apiKey);
 				return;
 			}
+			case "firepass": {
+				const { loginFirepass } = await import("./utils/oauth/firepass");
+				const apiKey = await loginFirepass(ctrl);
+				await saveApiKeyCredential(apiKey);
+				return;
+			}
 			case "zai": {
 				const { loginZai } = await import("./utils/oauth/zai");
 				const apiKey = await loginZai(ctrl);

package/src/model-cache.ts CHANGED Viewed

@@ -17,6 +17,10 @@ interface CacheRow {
 	models: string;
 }
+interface TableInfoRow {
+	name: string;
+}
 interface CacheEntry<TApi extends Api = Api> {
 	models: Model<TApi>[];
 	fresh: boolean;
@@ -55,11 +59,21 @@ function getDb(dbPath?: string): Database {
 			models TEXT NOT NULL
 		)
 	`);
+	migrateCacheSchema(db);
 	sharedDb = db;
 	sharedDbPath = resolvedPath;
 	return db;
 }
+function migrateCacheSchema(db: Database): void {
+	const columns = db.prepare("PRAGMA table_info(model_cache)").all() as TableInfoRow[];
+	if (!columns.some(column => column.name === "static_fingerprint")) {
+		db.run("ALTER TABLE model_cache ADD COLUMN static_fingerprint TEXT NOT NULL DEFAULT ''");
+	}
+	db.run("UPDATE model_cache SET version = ? WHERE version = 2", [CACHE_SCHEMA_VERSION]);
+}
 export function readModelCache<TApi extends Api>(
 	providerId: string,
 	ttlMs: number,

package/src/models.json CHANGED Viewed

@@ -5027,6 +5027,33 @@
 			}
 		}
 	},
+	"firepass": {
+		"kimi-k2.6-turbo": {
+			"id": "kimi-k2.6-turbo",
+			"name": "Kimi K2.6 Turbo (Fire Pass)",
+			"api": "openai-completions",
+			"provider": "firepass",
+			"baseUrl": "https://api.fireworks.ai/inference/v1",
+			"reasoning": true,
+			"input": [
+				"text",
+				"image"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 262144,
+			"maxTokens": 65536,
+			"thinking": {
+				"mode": "effort",
+				"minLevel": "minimal",
+				"maxLevel": "xhigh"
+			}
+		}
+	},
 	"fireworks": {
 		"deepseek-v4-pro": {
 			"id": "deepseek-v4-pro",

package/src/provider-models/descriptors.ts CHANGED Viewed

@@ -14,6 +14,7 @@ import {
 	cerebrasModelManagerOptions,
 	cloudflareAiGatewayModelManagerOptions,
 	deepseekModelManagerOptions,
+	firepassModelManagerOptions,
 	fireworksModelManagerOptions,
 	githubCopilotModelManagerOptions,
 	groqModelManagerOptions,
@@ -152,6 +153,7 @@ export const PROVIDER_DESCRIPTORS: readonly ProviderDescriptor[] = [
 		config => fireworksModelManagerOptions(config),
 		catalog("Fireworks", ["FIREWORKS_API_KEY"]),
 	),
+	descriptor("firepass", "kimi-k2.6-turbo", config => firepassModelManagerOptions(config)),
 	descriptor("xai", "grok-4-fast-non-reasoning", config => xaiModelManagerOptions(config)),
 	catalogDescriptor(
 		"deepseek",

package/src/provider-models/openai-compat.ts CHANGED Viewed

@@ -692,6 +692,30 @@ export function fireworksModelManagerOptions(
 	};
 }
+// ---------------------------------------------------------------------------
+// 7.6 Fire Pass (Fireworks Kimi K2.6 Turbo subscription)
+// ---------------------------------------------------------------------------
+export interface FirepassModelManagerConfig {
+	apiKey?: string;
+	baseUrl?: string;
+}
+/**
+ * Fire Pass is a Fireworks subscription product that exposes a single router
+ * model (Kimi K2.6 Turbo) under `accounts/fireworks/routers/kimi-k2p6-turbo`.
+ * The dedicated `fpk_…` keys do not authorize `/v1/models`, so this manager
+ * never performs dynamic discovery — the bundled catalog entry is canonical.
+ * See https://docs.fireworks.ai/firepass.
+ */
+export function firepassModelManagerOptions(
+	_config?: FirepassModelManagerConfig,
+): ModelManagerOptions<"openai-completions"> {
+	return {
+		providerId: "firepass",
+	};
+}
 // ---------------------------------------------------------------------------
 // 7. Mistral
 // ---------------------------------------------------------------------------
@@ -2083,18 +2107,26 @@ const MODELS_DEV_PROVIDER_DESCRIPTORS_CORE: readonly ModelsDevProviderDescriptor
 		// ids are kept off the catalog until the issue thread asks for them.
 		filterModel: (id, m) => m.tool_call === true && id.startsWith("deepseek-v4"),
 		compat: {
-			// xhigh maps to DeepSeek's `max` reasoning_effort (#830 thread).
+			// DeepSeek V4 only accepts `high`/`max`; map lower OMP levels upward so
+			// subagent "minimal" turns stay in documented thinking mode instead of
+			// sending unsupported effort strings.
+			supportsDeveloperRole: false,
 			supportsReasoningEffort: true,
-			reasoningEffortMap: { xhigh: "max" },
-			// `tool_choice` returns 400 against DeepSeek when reasoning_effort is set
-			// (per the issue thread). Tool calls still work without the parameter.
+			reasoningEffortMap: { minimal: "high", low: "high", medium: "high", high: "high", xhigh: "max" },
+			maxTokensField: "max_tokens",
+			// DeepSeek V4 thinking mode rejects the `tool_choice` control parameter.
+			// Tool calls still work without it; the API defaults to auto when tools exist.
 			supportsToolChoice: false,
+			// DeepSeek V4's OpenAI format docs enable thinking with both the toggle and
+			// reasoning_effort. Keep the toggle explicit for built-in models.
+			extraBody: { thinking: { type: "enabled" } },
 			// DeepSeek emits chain-of-thought via `reasoning_content` and requires it
 			// to round-trip on assistant tool-call messages so the model can resume
 			// from prior thinking (interleaved.field=reasoning_content on models.dev,
 			// matches the kimi/openrouter handling already in detectCompat).
 			reasoningContentField: "reasoning_content",
 			requiresReasoningContentForToolCalls: true,
+			requiresAssistantContentForToolCalls: true,
 		},
 	}),
 ];

package/src/providers/anthropic.ts CHANGED Viewed

@@ -1060,16 +1060,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 			let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
 			const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
 				let nextParams = buildParams(model, baseUrl, context, isOAuthToken, options, disableStrictTools);
-				const replacementPayload = await options?.onPayload?.(nextParams, model);
-				if (replacementPayload !== undefined) {
-					nextParams = replacementPayload as typeof nextParams;
-				}
 				if (disableStrictTools) {
 					dropAnthropicStrictTools(nextParams);
 				}
 				if (dropFastMode) {
 					dropAnthropicFastMode(nextParams);
 				}
+				const replacementPayload = await options?.onPayload?.(nextParams, model);
+				if (replacementPayload !== undefined) {
+					nextParams = replacementPayload as typeof nextParams;
+				}
 				rawRequestDump = {
 					provider: model.provider,
 					api: output.api,
@@ -2388,7 +2388,12 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
 		result.properties = normalizedProperties;
 	}
 	if (isRecord(result.additionalProperties)) {
-		result.additionalProperties = normalizeAnthropicToolSchema(result.additionalProperties);
+		const normalized = normalizeAnthropicToolSchema(result.additionalProperties);
+		if (isRecord(normalized) && Object.keys(normalized).length === 0) {
+			result.additionalProperties = true;
+		} else {
+			result.additionalProperties = normalized;
+		}
 	}
 	if (Array.isArray(result.items)) {
 		result.items = result.items.map(item => normalizeAnthropicToolSchema(item));

package/src/providers/ollama.ts CHANGED Viewed

@@ -116,6 +116,29 @@ function mapToolChoice(toolChoice: ToolChoice | undefined): "auto" | "none" | "r
 	return undefined;
 }
+function getNamedToolChoiceName(toolChoice: ToolChoice | undefined): string | undefined {
+	if (!toolChoice || typeof toolChoice === "string") {
+		return undefined;
+	}
+	if ("function" in toolChoice) {
+		return toolChoice.function.name;
+	}
+	return toolChoice.name;
+}
+function selectToolsForToolChoice(tools: Tool[] | undefined, toolChoice: ToolChoice | undefined): Tool[] | undefined {
+	const toolName = getNamedToolChoiceName(toolChoice);
+	if (!toolName || !tools) {
+		return tools;
+	}
+	for (const tool of tools) {
+		if (tool.name === toolName) {
+			return [tool];
+		}
+	}
+	return [];
+}
 function toPlainContent(content: string | Array<{ type: "text" | "image"; text?: string; data?: string }>): {
 	content: string;
 	images?: string[];
@@ -231,10 +254,12 @@ function convertTools(tools: Tool[] | undefined): OllamaFunctionTool[] | undefin
 function createChatBody(model: Model<"ollama-chat">, context: Context, options: OllamaChatOptions | undefined) {
 	const think = mapReasoning(options?.reasoning);
 	const toolChoice = mapToolChoice(options?.toolChoice);
+	const selectedTools = selectToolsForToolChoice(context.tools, options?.toolChoice);
+	const tools = convertTools(selectedTools);
 	return {
 		model: model.id,
 		messages: convertMessages(model, context),
-		...(convertTools(context.tools) ? { tools: convertTools(context.tools) } : {}),
+		...(tools ? { tools } : {}),
 		...(think !== undefined ? { think } : {}),
 		...(toolChoice !== undefined ? { tool_choice: toolChoice } : {}),
 		...(options?.maxTokens !== undefined ? { options: { num_predict: options.maxTokens } } : {}),

package/src/providers/openai-chat-server.ts CHANGED Viewed

@@ -11,7 +11,7 @@ import type {
 	Context,
 	ImageContent,
 	Message,
-	ServiceTier,
+	ResolvedServiceTier,
 	StopReason,
 	TextContent,
 	Tool,
@@ -36,7 +36,7 @@ function isReasoningEffort(value: unknown): value is ReasoningEffort {
 	return value === "minimal" || value === "low" || value === "medium" || value === "high" || value === "xhigh";
 }
-function isServiceTier(value: unknown): value is ServiceTier {
+function isServiceTier(value: unknown): value is ResolvedServiceTier {
 	return value === "auto" || value === "default" || value === "flex" || value === "scale" || value === "priority";
 }

package/src/providers/openai-completions-compat.ts CHANGED Viewed

@@ -52,7 +52,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 	const isCerebras = provider === "cerebras" || baseUrl.includes("cerebras.ai");
 	const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
 	const isKilo = provider === "kilo" || baseUrl.includes("api.kilo.ai");
-	const isKimiModel = model.id.includes("moonshotai/kimi") || /^kimi[-.]/i.test(model.id);
+	const isKimiModel = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
 	const isMoonshotKimi =
 		isKimiModel &&
 		(provider === "moonshot" ||
@@ -79,7 +79,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 		baseUrl.includes("deepseek.com") ||
 		lowerId.includes("deepseek") ||
 		lowerName.includes("deepseek");
+	const isDirectDeepseekApi = provider === "deepseek" || baseUrl.includes("api.deepseek.com");
+	const isDirectDeepseekReasoning = isDirectDeepseekApi && isDeepseekFamily && Boolean(model.reasoning);
 	const isNonStandard =
 		isCerebras ||
 		provider === "xai" ||
@@ -102,7 +103,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 		provider === "mistral" ||
 		baseUrl.includes("mistral.ai") ||
 		baseUrl.includes("chutes.ai") ||
-		baseUrl.includes("fireworks.ai");
+		baseUrl.includes("fireworks.ai") ||
+		isDirectDeepseekApi;
 	const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
 	const isMistral = provider === "mistral" || baseUrl.includes("mistral.ai");
@@ -162,7 +164,13 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 					xhigh: "default",
 				} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
 			: isDeepseekFamily && model.reasoning
-				? { xhigh: "max" }
+				? ({
+						minimal: "high",
+						low: "high",
+						medium: "high",
+						high: "high",
+						xhigh: "max",
+					} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
 				: {};
 	return {
@@ -173,8 +181,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 		reasoningEffortMap,
 		supportsUsageInStreaming: !isCerebras,
 		disableReasoningOnForcedToolChoice: isKimiModel || isAnthropicModel,
-		disableReasoningOnToolChoice: isDeepseekFamily && Boolean(model.reasoning),
-		supportsToolChoice: true,
+		disableReasoningOnToolChoice: isDeepseekFamily && Boolean(model.reasoning) && !isOpenRouter,
+		supportsToolChoice: !isDirectDeepseekReasoning,
 		maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
 		requiresToolResultName: isMistral,
 		requiresAssistantAfterToolResult: false,
@@ -204,11 +212,11 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 		// DeepSeek V4 rejects synthetic reasoning_content placeholders (".") on tool-call turns.
 		// Kimi and OpenRouter accept them when actual reasoning is unavailable.
 		allowsSyntheticReasoningContentForToolCalls: !isDeepseekFamily || !model.reasoning,
-		requiresAssistantContentForToolCalls: isKimiModel,
+		requiresAssistantContentForToolCalls: isKimiModel || isDirectDeepseekReasoning,
 		openRouterRouting: undefined,
 		vercelGatewayRouting: undefined,
 		supportsStrictMode: detectStrictModeSupport(provider, baseUrl),
-		extraBody: undefined,
+		extraBody: isDirectDeepseekReasoning ? { thinking: { type: "enabled" } } : undefined,
 		toolStrictMode: isCerebras ? "all_strict" : "mixed",
 	};
 }
@@ -235,7 +243,7 @@ export function resolveOpenAICompat(
 		supportsMultipleSystemMessages:
 			model.compat.supportsMultipleSystemMessages ?? detected.supportsMultipleSystemMessages,
 		supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
-		reasoningEffortMap: model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
+		reasoningEffortMap: { ...detected.reasoningEffortMap, ...(model.compat.reasoningEffortMap ?? {}) },
 		supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
 		supportsToolChoice: model.compat.supportsToolChoice ?? detected.supportsToolChoice,
 		maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
@@ -259,7 +267,7 @@ export function resolveOpenAICompat(
 		openRouterRouting: model.compat.openRouterRouting ?? detected.openRouterRouting,
 		vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
 		supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
-		extraBody: model.compat.extraBody,
+		extraBody: model.compat.extraBody ?? detected.extraBody,
 		toolStrictMode: model.compat.toolStrictMode ?? detected.toolStrictMode,
 	};
 }

package/src/providers/openai-completions.ts CHANGED Viewed

@@ -27,6 +27,7 @@ import {
 	type StopReason,
 	type StreamFunction,
 	type StreamOptions,
+	shouldSendServiceTier,
 	type TextContent,
 	type ThinkingContent,
 	type Tool,
@@ -37,7 +38,7 @@ import {
 import { normalizeSystemPrompts } from "../utils";
 import { createAbortSourceTracker } from "../utils/abort";
 import { AssistantMessageEventStream } from "../utils/event-stream";
-import { toFireworksWireModelId } from "../utils/fireworks-model-id";
+import { toFirepassWireModelId, toFireworksWireModelId } from "../utils/fireworks-model-id";
 import {
 	type CapturedHttpErrorResponse,
 	finalizeErrorMessage,
@@ -486,7 +487,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 			}
 			stream.push({ type: "start", partial: output });
-			const parseMiniMaxThinkTags = model.provider === "minimax-code";
+			const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
 			// Some OpenAI-compatible DeepSeek hosts (including NVIDIA NIM and DeepSeek's
 			// native API) leak chat-template tool-call markers in `delta.content` even
 			// though tool calls are also surfaced structurally. Strip the leaked markers
@@ -1037,13 +1038,23 @@ function buildParams(
 	maybeAddOpenRouterAnthropicCacheControl(model, messages);
 	const supportsReasoningParams = model.provider !== "github-copilot";
-	// Kimi (including via OpenRouter) calculates TPM rate limits based on max_tokens, not actual output.
-	// Always send max_tokens to avoid their high default causing rate limit issues.
+	// Kimi (including via OpenRouter and Fireworks router-form IDs such as
+	// `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
+	// max_tokens, not actual output. The official Kimi K2 model guidance
+	// (https://docs.fireworks.ai/models/kimi-k2) also requires `max_tokens` for
+	// every call since the family can otherwise emit very long reasoning traces
+	// before the final answer. Always send max_tokens — match the same
+	// Kimi-family regex used by the compat detector.
 	// Note: Direct kimi-code provider is handled by the dedicated Kimi provider in kimi.ts.
-	const isKimi = model.id.includes("moonshotai/kimi");
+	const isKimi = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
 	const effectiveMaxTokens = options?.maxTokens ?? (isKimi ? model.maxTokens : undefined);
-	const requestModelId = model.provider === "fireworks" ? toFireworksWireModelId(model.id) : model.id;
+	const requestModelId =
+		model.provider === "fireworks"
+			? toFireworksWireModelId(model.id)
+			: model.provider === "firepass"
+				? toFirepassWireModelId(model.id)
+				: model.id;
 	const params: OpenAICompletionsParams = {
 		model: requestModelId,
 		messages,
@@ -1092,17 +1103,25 @@ function buildParams(
 	if (options?.frequencyPenalty !== undefined) {
 		params.frequency_penalty = options.frequencyPenalty;
 	}
-	const resolvedServiceTier = resolveServiceTier(options?.serviceTier, model.provider);
-	if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
-		params.service_tier = resolvedServiceTier;
+	if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
+		const resolved = resolveServiceTier(options?.serviceTier, model.provider);
+		if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
+			params.service_tier = resolved;
+		}
 	}
-	if (context.tools) {
+	if (context.tools?.length) {
 		const builtTools = convertTools(context.tools, compat, toolStrictModeOverride);
 		params.tools = builtTools.tools;
 		toolStrictMode = builtTools.toolStrictMode;
-	} else if (hasToolHistory(context.messages)) {
-		// Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
+	} else if (context.tools === undefined && hasToolHistory(context.messages)) {
+		// Anthropic (via LiteLLM/proxy) requires the `tools` param when the conversation
+		// contains tool_calls/tool_results, even when no tools are offered this turn.
+		// Only inject the sentinel when the caller passed `context.tools = undefined`
+		// (i.e. tools were not specified at all). An explicit `context.tools = []` means
+		// the caller opted out of tools for this turn (as /btw and IRC background replies
+		// do via AgentSession.runEphemeralTurn) — honour that intent and emit nothing,
+		// so LiteLLM → Bedrock never sees an empty `toolConfig` block.
 		params.tools = [];
 	}
@@ -1110,6 +1129,18 @@ function buildParams(
 		params.tool_choice = mapToOpenAICompletionsToolChoice(options.toolChoice);
 	}
+	if (params.tool_choice === "none" && (!Array.isArray(params.tools) || params.tools.length === 0)) {
+		// `tool_choice: "none"` with no tools to gate is redundant and also
+		// trips LiteLLM → Bedrock: the proxy serializes the directive into a
+		// `toolConfig` block, and Bedrock requires `toolConfig.tools` to be
+		// non-empty whenever the conversation already holds `toolUse`/`toolResult`
+		// content. Drop it whenever the resolved tools list is missing or empty.
+		// Side-channel turns hit this: `/btw` and IRC background replies route
+		// through `AgentSession.runEphemeralTurn`, which sets `context.tools = []`
+		// and `toolChoice: "none"` (see packages/coding-agent/src/session/agent-session.ts).
+		delete params.tool_choice;
+	}
 	if (supportsReasoningParams && compat.thinkingFormat === "zai" && model.reasoning) {
 		// Z.ai uses binary thinking: { type: "enabled" | "disabled" }
 		// Must explicitly disable since z.ai defaults to thinking enabled.

package/src/providers/openai-responses-shared.ts CHANGED Viewed

@@ -21,6 +21,7 @@ import {
 	type ServiceTier,
 	type StopReason,
 	type StreamOptions,
+	shouldSendServiceTier,
 	type TextContent,
 	type TextSignatureV1,
 	type ThinkingContent,
@@ -650,9 +651,11 @@ export function applyCommonResponsesSamplingParams<P extends CommonResponsesPara
 	if (options?.minP !== undefined) params.min_p = options.minP;
 	if (options?.presencePenalty !== undefined) params.presence_penalty = options.presencePenalty;
 	if (options?.repetitionPenalty !== undefined) params.repetition_penalty = options.repetitionPenalty;
-	const resolvedServiceTier = resolveServiceTier(options?.serviceTier, provider);
-	if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
-		params.service_tier = resolvedServiceTier;
+	if (shouldSendServiceTier(options?.serviceTier, provider)) {
+		const resolved = resolveServiceTier(options?.serviceTier, provider);
+		if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
+			params.service_tier = resolved;
+		}
 	}
 }

package/src/stream.ts CHANGED Viewed

@@ -83,6 +83,7 @@ const serviceProviderMap: Record<string, KeyResolver> = {
 	cerebras: "CEREBRAS_API_KEY",
 	xai: "XAI_API_KEY",
 	fireworks: "FIREWORKS_API_KEY",
+	firepass: "FIREPASS_API_KEY",
 	openrouter: "OPENROUTER_API_KEY",
 	kilo: "KILO_API_KEY",
 	"vercel-ai-gateway": "AI_GATEWAY_API_KEY",

package/src/types.ts CHANGED Viewed

@@ -110,6 +110,7 @@ export type KnownProvider =
 	| "minimax-code-cn"
 	| "github-copilot"
 	| "fireworks"
+	| "firepass"
 	| "gitlab-duo"
 	| "cursor"
 	| "deepseek"

package/src/utils/fireworks-model-id.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 const FIREWORKS_WIRE_PREFIX = "accounts/fireworks/models/";
+const FIREPASS_WIRE_PREFIX = "accounts/fireworks/routers/";
 const VERSION_SEPARATOR_PATTERN = /(?<=\d)p(?=\d)/g;
 const VERSION_DOT_PATTERN = /(?<=\d)\.(?=\d)/g;
@@ -11,3 +12,19 @@ export function toFireworksWireModelId(modelId: string): string {
 	const stripped = modelId.startsWith(FIREWORKS_WIRE_PREFIX) ? modelId.slice(FIREWORKS_WIRE_PREFIX.length) : modelId;
 	return `${FIREWORKS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
 }
+/**
+ * Fire Pass exposes its Kimi K2.6 Turbo subscription through a dedicated router
+ * endpoint at `accounts/fireworks/routers/<id>` rather than the `models/` namespace.
+ * We keep a friendly public id (e.g. `kimi-k2.6-turbo`) in the catalog and translate
+ * to the wire form (`accounts/fireworks/routers/kimi-k2p6-turbo`) at request time.
+ */
+export function toFirepassPublicModelId(modelId: string): string {
+	const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
+	return stripped.replace(VERSION_SEPARATOR_PATTERN, ".");
+}
+export function toFirepassWireModelId(modelId: string): string {
+	const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
+	return `${FIREPASS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
+}

package/src/utils/oauth/firepass.ts ADDED Viewed

@@ -0,0 +1,24 @@
+/**
+ * Fire Pass login flow.
+ *
+ * Fire Pass is a Fireworks subscription product whose dedicated `fpk_…` API
+ * keys are scoped to the `accounts/fireworks/routers/kimi-k2p6-turbo` router
+ * (Kimi K2.6 Turbo). The key does NOT authorize `/v1/models`, so validation
+ * pings the chat completions endpoint with the router id directly.
+ * See https://docs.fireworks.ai/firepass.
+ */
+import { createApiKeyLogin } from "./api-key-login";
+export const loginFirepass = createApiKeyLogin({
+	providerLabel: "Fire Pass",
+	authUrl: "https://app.fireworks.ai/settings/users/api-keys",
+	instructions: "Create a dedicated Fire Pass API key in the Fireworks dashboard",
+	promptMessage: "Paste your Fire Pass API key",
+	placeholder: "fpk_...",
+	validation: {
+		kind: "chat-completions",
+		provider: "Fire Pass",
+		baseUrl: "https://api.fireworks.ai/inference/v1",
+		model: "accounts/fireworks/routers/kimi-k2p6-turbo",
+	},
+});

package/src/utils/oauth/index.ts CHANGED Viewed

@@ -55,6 +55,11 @@ const builtInOAuthProviders: OAuthProviderInfo[] = [
 		name: "Fireworks",
 		available: true,
 	},
+	{
+		id: "firepass",
+		name: "Fire Pass (Fireworks Kimi K2.6 Turbo subscription)",
+		available: true,
+	},
 	{
 		id: "github-copilot",
 		name: "GitHub Copilot",
@@ -301,6 +306,7 @@ export async function refreshOAuthToken(
 		case "opencode-go":
 		case "cerebras":
 		case "fireworks":
+		case "firepass":
 		case "nvidia":
 		case "nanogpt":
 		case "synthetic":
@@ -363,10 +369,14 @@ export async function getOAuthApiKey(
 	}
 	if (provider === "perplexity") {
+		// Perplexity JWTs usually omit `exp` (server-side sessions). Trust the JWT
+		// claim when present; otherwise treat the credential as non-expiring rather
+		// than honoring a stale stored `expires` (older logins wrote loginTime+1h).
+		const NEVER_EXPIRES = 8.64e15;
 		const normalizedExpires =
 			creds.expires > 0 && creds.expires < 10_000_000_000 ? creds.expires * 1000 : creds.expires;
 		const jwtExpiry = getPerplexityJwtExpiryMs(creds.access);
-		const expires = jwtExpiry && jwtExpiry > normalizedExpires ? jwtExpiry : normalizedExpires;
+		const expires = jwtExpiry ?? Math.max(normalizedExpires, NEVER_EXPIRES);
 		if (expires !== creds.expires) {
 			creds = { ...creds, expires };
 		}

package/src/utils/oauth/perplexity.ts CHANGED Viewed

@@ -24,20 +24,26 @@ const APP_USER_AGENT = "Perplexity/641 CFNetwork/1568 Darwin/25.2.0";
 // JWT helpers
 // ---------------------------------------------------------------------------
-/** Extract expiry from a JWT. Falls back to 1 hour from now. Subtracts 5 min safety margin. */
+/**
+ * Extract expiry from a JWT. Perplexity tokens generally lack an `exp` claim
+ * (their sessions are server-side and effectively non-expiring from the client's
+ * point of view), so we return a far-future sentinel when no `exp` is present.
+ * When `exp` IS present, subtract a 5-minute safety margin.
+ */
+const NEVER_EXPIRES = 8.64e15; // max safe Date value
 function getJwtExpiry(token: string): number {
 	try {
 		const parts = token.split(".");
-		if (parts.length !== 3) return Date.now() + 3600_000;
+		if (parts.length !== 3) return NEVER_EXPIRES;
 		const payload = parts[1] ?? "";
 		const decoded = JSON.parse(atob(payload.replace(/-/g, "+").replace(/_/g, "/")));
-		if (decoded?.exp && typeof decoded.exp === "number") {
+		if (typeof decoded?.exp === "number" && Number.isFinite(decoded.exp)) {
 			return decoded.exp * 1000 - 5 * 60_000;
 		}
 	} catch {
 		// Ignore decode errors
 	}
-	return Date.now() + 3600_000;
+	return NEVER_EXPIRES;
 }
 /** Build OAuthCredentials from a Perplexity JWT string. */

package/src/utils/oauth/types.ts CHANGED Viewed

@@ -15,6 +15,7 @@ export type OAuthProvider =
 	| "cloudflare-ai-gateway"
 	| "cursor"
 	| "fireworks"
+	| "firepass"
 	| "github-copilot"
 	| "google-gemini-cli"
 	| "google-antigravity"

package/src/utils/schema/zod-decontaminate.ts CHANGED Viewed

@@ -243,8 +243,17 @@ function rewriteZodNode(node: JsonObject, seen: WeakSet<object>): unknown {
 		case "pipe":
 		case "transform": {
 			const inner = walk(unwrapInnerSchema(def), seen);
-			if (kind === "nullable" && isJsonObject(inner) && typeof inner.type === "string") {
-				return { ...inner, type: [inner.type, "null"] };
+			if (kind === "nullable" && isJsonObject(inner)) {
+				if (typeof inner.type === "string") {
+					return { ...inner, type: [inner.type, "null"] };
+				}
+				if (Array.isArray(inner.type)) {
+					return (inner.type as string[]).includes("null")
+						? inner
+						: { ...inner, type: [...(inner.type as string[]), "null"] };
+				}
+				// anyOf / allOf / $ref shapes — no scalar `type` field
+				return { anyOf: [inner, { type: "null" }] };
 			}
 			return inner;
 		}