npm - @oh-my-pi/pi-ai - Versions diffs - 15.1.6 → 15.1.8 - Mend

@oh-my-pi/pi-ai 15.1.6 → 15.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/CHANGELOG.md +22 -0
package/dist/types/provider-models/openai-compat.d.ts +12 -0
package/dist/types/providers/anthropic.d.ts +20 -1
package/dist/types/types.d.ts +47 -10
package/dist/types/utils/fireworks-model-id.d.ts +8 -0
package/dist/types/utils/oauth/firepass.d.ts +1 -0
package/dist/types/utils/oauth/types.d.ts +1 -1
package/package.json +2 -2
package/src/auth-storage.ts +6 -0
package/src/model-cache.ts +14 -0
package/src/models.json +27 -0
package/src/provider-models/descriptors.ts +2 -0
package/src/provider-models/openai-compat.ts +36 -4
package/src/providers/anthropic.ts +118 -5
package/src/providers/openai-chat-server.ts +2 -2
package/src/providers/openai-codex-responses.ts +4 -3
package/src/providers/openai-completions-compat.ts +18 -10
package/src/providers/openai-completions.ts +21 -7
package/src/providers/openai-responses-shared.ts +5 -1
package/src/stream.ts +7 -0
package/src/types.ts +74 -16
package/src/utils/fireworks-model-id.ts +17 -0
package/src/utils/oauth/firepass.ts +24 -0
package/src/utils/oauth/index.ts +11 -1
package/src/utils/oauth/perplexity.ts +10 -4
package/src/utils/oauth/types.ts +1 -0
package/src/utils/schema/zod-decontaminate.ts +11 -2

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,28 @@
 ## [Unreleased]
+## [15.1.8] - 2026-05-20
+### Added
+- Added Fireworks Fire Pass as a separate `firepass` provider with API-key login flow, bundled `kimi-k2.6-turbo` model entry (Kimi K2.6 Turbo), and wire-id translation from the friendly catalog id to the `accounts/fireworks/routers/kimi-k2p6-turbo` router endpoint. Fire Pass keys (`fpk_…`) authorize only the dedicated router and reject `/v1/models`, so login validation pings chat completions against the router id directly. Extended the openai-completions Kimi-family safety net so the firepass entry inherits the per-Fireworks-docs "always send `max_tokens`" default ([Kimi K2 guide](https://docs.fireworks.ai/models/kimi-k2)); the router's accepted `reasoning_effort` set includes `xhigh`, so it is forwarded verbatim rather than remapped. See https://docs.fireworks.ai/firepass.
+### Fixed
+- Fixed DeepSeek V4 direct API requests with tools to keep documented thinking mode instead of dropping reasoning: lower OMP efforts now map to DeepSeek's supported `high`, `tool_choice` is omitted, `thinking: { type: "enabled" }` and `max_tokens` are sent, and partial user `reasoningEffortMap` overrides merge with DeepSeek defaults. ([#1207](https://github.com/can1357/oh-my-pi/issues/1207))
+- Fixed model cache schema v2 databases so offline refreshes preserve cached provider discoveries after upgrading to schema v3 and subsequent online refreshes can overwrite the cache. ([#1219](https://github.com/can1357/oh-my-pi/issues/1219))
+- Fixed Perplexity OAuth credentials being treated as expired one hour after login. `getJwtExpiry` was fabricating `expires = now + 1h` whenever the JWT had no `exp` claim (the common case — Perplexity sessions are server-side). Once the hour elapsed, `getOAuthApiKey` would mark the cred expired and the search provider's loader would silently skip it, surfacing as "logged out". Logins with no `exp` now persist a far-future sentinel; `getOAuthApiKey` also normalizes any stale `expires` written by older builds.
+## [15.1.7] - 2026-05-19
+### Added
+- Added Anthropic realization of `serviceTier: "priority"`. The anthropic-messages provider now sets `speed: "fast"` on the request and appends the `fast-mode-2026-02-01` beta to `Anthropic-Beta` whenever the caller passes `serviceTier: "priority"`. When the server rejects an unsupported model with `invalid_request_error`, the provider transparently retries the same turn without the fast-mode signal (mirroring the strict-tools fallback pattern), persists the disable via a new `providerSessionState.fastModeDisabled` flag so subsequent requests in the session skip the field, and surfaces the action via the new `AssistantMessage.disabledFeatures` array (id `"priority"`) so callers can sync user-facing toggles. A new `clearAnthropicFastModeFallback(providerSessionState)` helper lets callers re-arm priority after the auto-fallback fired.
+- Added scoped `ServiceTier` values: `"openai-only"` (priority on `openai`/`openai-codex`, ignored elsewhere) and `"claude-only"` (priority on direct `anthropic`, ignored on Bedrock/Vertex Claude and elsewhere). A new `resolveServiceTier(serviceTier, provider)` helper computes the effective tier for the provider; existing OpenAI/Anthropic provider code routes through it, so `service_tier` and Anthropic fast-mode emission both respect scope. `getPriorityPremiumRequests` now counts Anthropic+priority as one premium request (previously zero) and continues to ignore providers that drop the field on the wire.
+### Fixed
+- Fixed Anthropic fast mode (`serviceTier: "priority"`) looping on 429 `rate_limit_error: "Extra usage is required for fast mode."` for accounts without the extra-usage entitlement. `isAnthropicFastModeUnsupportedError` now matches the 429 phrasing in addition to the 400 `invalid_request_error` "does not support the `speed` parameter" case, so the provider drops `speed: "fast"` on the in-turn retry, sets `providerSessionState.fastModeDisabled` for the remainder of the session, and surfaces `disabledFeatures: ["priority"]` to the caller instead of retrying with the same payload until `PROVIDER_MAX_RETRIES` is exhausted.
+- Fixed MiniMax Coding Plan CN streaming `<think>...</think>` reasoning as visible assistant text. The OpenAI-compatible stream parser now enables the existing MiniMax tag parser for both `minimax-code` and `minimax-code-cn`, so CN responses become structured `thinking` blocks instead of raw text. ([#1203](https://github.com/can1357/oh-my-pi/issues/1203))
 ## [15.1.6] - 2026-05-19
 ### Fixed

package/dist/types/provider-models/openai-compat.d.ts CHANGED Viewed

@@ -63,6 +63,18 @@ export interface FireworksModelManagerConfig {
     baseUrl?: string;
 }
 export declare function fireworksModelManagerOptions(config?: FireworksModelManagerConfig): ModelManagerOptions<"openai-completions">;
+export interface FirepassModelManagerConfig {
+    apiKey?: string;
+    baseUrl?: string;
+}
+/**
+ * Fire Pass is a Fireworks subscription product that exposes a single router
+ * model (Kimi K2.6 Turbo) under `accounts/fireworks/routers/kimi-k2p6-turbo`.
+ * The dedicated `fpk_…` keys do not authorize `/v1/models`, so this manager
+ * never performs dynamic discovery — the bundled catalog entry is canonical.
+ * See https://docs.fireworks.ai/firepass.
+ */
+export declare function firepassModelManagerOptions(_config?: FirepassModelManagerConfig): ModelManagerOptions<"openai-completions">;
 export interface MistralModelManagerConfig {
     apiKey?: string;
     baseUrl?: string;

package/dist/types/providers/anthropic.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
 import type { MessageParam } from "@anthropic-ai/sdk/resources/messages";
-import type { FetchImpl, Message, Model, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
+import type { FetchImpl, Message, Model, ProviderSessionState, ServiceTier, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
 export type AnthropicHeaderOptions = {
     apiKey: string;
     baseUrl?: string;
@@ -17,6 +17,15 @@ type AnthropicCacheControl = {
     type: "ephemeral";
     ttl?: "1h" | "5m";
 };
+/**
+ * Clears the in-session "server rejected fast mode" sticky flag. Call when the
+ * caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
+ * `/fast on` after a previous turn auto-disabled it) so the next request
+ * actually carries `speed: "fast"` again. No-op when the map or state entry
+ * hasn't been materialized yet.
+ */
+export declare function clearAnthropicFastModeFallback(providerSessionState: Map<string, ProviderSessionState> | undefined): void;
+export declare function isAnthropicFastModeUnsupportedError(error: unknown): boolean;
 export declare const claudeCodeVersion = "2.1.63";
 export declare const claudeToolPrefix: string;
 export declare const claudeCodeSystemInstruction = "You are a Claude agent, built on Anthropic's Claude Agent SDK.";
@@ -77,6 +86,16 @@ export interface AnthropicOptions extends StreamOptions {
         name: string;
     };
     betas?: string[] | string;
+    /**
+     * Realization of `serviceTier: "priority"` on Anthropic models. When
+     * `"priority"`, sets `speed: "fast"` on the request and appends the
+     * `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
+     * with `invalid_request_error`, which triggers an in-provider one-shot
+     * fallback (see `fastModeDisabled` provider state).
+     *
+     * Other `ServiceTier` values are currently ignored on this provider.
+     */
+    serviceTier?: ServiceTier;
     /** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
     isOAuth?: boolean;
     /**

package/dist/types/types.d.ts CHANGED Viewed

@@ -48,7 +48,7 @@ export interface ThinkingConfig {
     /** Provider-specific transport used to encode the selected effort. */
     mode: ThinkingControlMode;
 }
-export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
+export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
 export type Provider = KnownProvider | string;
 import type { Effort } from "./model-thinking";
 /** Token budgets for each thinking level (token-based providers only) */
@@ -69,18 +69,47 @@ export type ToolChoice = "auto" | "none" | "any" | "required" | {
     name: string;
 };
 export type CacheRetention = "none" | "short" | "long";
-/** OpenAI service tier for processing priority. Only applies to OpenAI-compatible APIs. */
-export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority";
-export declare function shouldSendServiceTier(serviceTier?: ServiceTier | null, provider?: Provider): serviceTier is "flex" | "scale" | "priority";
 /**
- * Premium-request weight contributed by sending a `priority` service tier to
- * a provider that supports it. Mirrors GitHub Copilot's `premiumRequests`
- * accounting so the "premium requests" stat aggregates priority traffic too.
+ * Service tier hint for processing priority / cost control.
  *
- * Returns 1 per priority request, 0 otherwise. Non-priority tiers (`flex`,
- * `scale`) and providers that ignore `service_tier` always return 0.
+ * The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
+ * `"priority"`) are passed through to providers that understand them
+ * (OpenAI's `service_tier` field directly; Anthropic translates
+ * `"priority"` into `speed: "fast"` on supported Opus models).
+ *
+ * The scoped values target a specific provider family and behave as the
+ * unscoped value on the matching provider, or `undefined` everywhere else.
+ * They let users opt into priority on one family without paying premium
+ * costs on the other when switching models mid-session.
+ *
+ * - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
+ * - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
+ */
+export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
+/** Resolved tier — one of the values that providers actually consume on the wire. */
+export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
+/**
+ * Resolves a possibly scoped `ServiceTier` to the effective tier for the
+ * given provider. Scoped values match their target family and otherwise
+ * collapse to `undefined`; unscoped values pass through unchanged.
+ */
+export declare function resolveServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): ResolvedServiceTier | undefined;
+/**
+ * True when the (possibly scoped) tier should be sent as OpenAI's
+ * `service_tier` request field for the given provider. Non-OpenAI
+ * providers, unsupported tiers (`"auto"`, `"default"`), and scope
+ * mismatches all return false.
  */
-export declare function getPriorityPremiumRequests(serviceTier?: ServiceTier | null, provider?: Provider): number;
+export declare function shouldSendServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): boolean;
+/**
+ * Premium-request weight contributed by sending priority to a provider
+ * that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
+ * so the "premium requests" stat aggregates priority traffic across the
+ * OpenAI family and Anthropic fast-mode realizations.
+ *
+ * Returns 1 per resolved priority request, 0 otherwise.
+ */
+export declare function getPriorityPremiumRequests(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): number;
 export interface ProviderSessionState {
     close(): void;
 }
@@ -371,6 +400,14 @@ export interface AssistantMessage {
     errorMessage?: string;
     /** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
     errorStatus?: number;
+    /**
+     * Stable identifiers for request features the provider silently dropped
+     * during this turn (e.g. `"priority"`). Set when a server-side rejection
+     * triggered an in-provider fallback retry that succeeded without the
+     * feature. Callers can use this to sync user-facing toggles back to the
+     * server's actual state.
+     */
+    disabledFeatures?: string[];
     /** Provider-specific opaque payload used to reconstruct transport-native history. */
     providerPayload?: ProviderPayload;
     timestamp: number;

package/dist/types/utils/fireworks-model-id.d.ts CHANGED Viewed

@@ -1,2 +1,10 @@
 export declare function toFireworksPublicModelId(modelId: string): string;
 export declare function toFireworksWireModelId(modelId: string): string;
+/**
+ * Fire Pass exposes its Kimi K2.6 Turbo subscription through a dedicated router
+ * endpoint at `accounts/fireworks/routers/<id>` rather than the `models/` namespace.
+ * We keep a friendly public id (e.g. `kimi-k2.6-turbo`) in the catalog and translate
+ * to the wire form (`accounts/fireworks/routers/kimi-k2p6-turbo`) at request time.
+ */
+export declare function toFirepassPublicModelId(modelId: string): string;
+export declare function toFirepassWireModelId(modelId: string): string;

package/dist/types/utils/oauth/firepass.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare const loginFirepass: (options: import("./types").OAuthController) => Promise<string>;

package/dist/types/utils/oauth/types.d.ts CHANGED Viewed

@@ -7,7 +7,7 @@ export type OAuthCredentials = {
     email?: string;
     accountId?: string;
 };
-export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "fireworks" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
+export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
 export type OAuthProviderId = OAuthProvider | (string & {});
 export type OAuthPrompt = {
     message: string;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "15.1.6",
+	"version": "15.1.8",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -43,7 +43,7 @@
 	"dependencies": {
 		"@anthropic-ai/sdk": "^0.94.0",
 		"@bufbuild/protobuf": "^2.12.0",
-		"@oh-my-pi/pi-utils": "15.1.6",
+		"@oh-my-pi/pi-utils": "15.1.8",
 		"openai": "^6.36.0",
 		"partial-json": "^0.1.7",
 		"zod": "4.4.3"

package/src/auth-storage.ts CHANGED Viewed

@@ -1344,6 +1344,12 @@ export class AuthStorage {
 				await saveApiKeyCredential(apiKey);
 				return;
 			}
+			case "firepass": {
+				const { loginFirepass } = await import("./utils/oauth/firepass");
+				const apiKey = await loginFirepass(ctrl);
+				await saveApiKeyCredential(apiKey);
+				return;
+			}
 			case "zai": {
 				const { loginZai } = await import("./utils/oauth/zai");
 				const apiKey = await loginZai(ctrl);

package/src/model-cache.ts CHANGED Viewed

@@ -17,6 +17,10 @@ interface CacheRow {
 	models: string;
 }
+interface TableInfoRow {
+	name: string;
+}
 interface CacheEntry<TApi extends Api = Api> {
 	models: Model<TApi>[];
 	fresh: boolean;
@@ -55,11 +59,21 @@ function getDb(dbPath?: string): Database {
 			models TEXT NOT NULL
 		)
 	`);
+	migrateCacheSchema(db);
 	sharedDb = db;
 	sharedDbPath = resolvedPath;
 	return db;
 }
+function migrateCacheSchema(db: Database): void {
+	const columns = db.prepare("PRAGMA table_info(model_cache)").all() as TableInfoRow[];
+	if (!columns.some(column => column.name === "static_fingerprint")) {
+		db.run("ALTER TABLE model_cache ADD COLUMN static_fingerprint TEXT NOT NULL DEFAULT ''");
+	}
+	db.run("UPDATE model_cache SET version = ? WHERE version = 2", [CACHE_SCHEMA_VERSION]);
+}
 export function readModelCache<TApi extends Api>(
 	providerId: string,
 	ttlMs: number,

package/src/models.json CHANGED Viewed

@@ -5027,6 +5027,33 @@
 			}
 		}
 	},
+	"firepass": {
+		"kimi-k2.6-turbo": {
+			"id": "kimi-k2.6-turbo",
+			"name": "Kimi K2.6 Turbo (Fire Pass)",
+			"api": "openai-completions",
+			"provider": "firepass",
+			"baseUrl": "https://api.fireworks.ai/inference/v1",
+			"reasoning": true,
+			"input": [
+				"text",
+				"image"
+			],
+			"cost": {
+				"input": 0,
+				"output": 0,
+				"cacheRead": 0,
+				"cacheWrite": 0
+			},
+			"contextWindow": 262144,
+			"maxTokens": 65536,
+			"thinking": {
+				"mode": "effort",
+				"minLevel": "minimal",
+				"maxLevel": "xhigh"
+			}
+		}
+	},
 	"fireworks": {
 		"deepseek-v4-pro": {
 			"id": "deepseek-v4-pro",

package/src/provider-models/descriptors.ts CHANGED Viewed

@@ -14,6 +14,7 @@ import {
 	cerebrasModelManagerOptions,
 	cloudflareAiGatewayModelManagerOptions,
 	deepseekModelManagerOptions,
+	firepassModelManagerOptions,
 	fireworksModelManagerOptions,
 	githubCopilotModelManagerOptions,
 	groqModelManagerOptions,
@@ -152,6 +153,7 @@ export const PROVIDER_DESCRIPTORS: readonly ProviderDescriptor[] = [
 		config => fireworksModelManagerOptions(config),
 		catalog("Fireworks", ["FIREWORKS_API_KEY"]),
 	),
+	descriptor("firepass", "kimi-k2.6-turbo", config => firepassModelManagerOptions(config)),
 	descriptor("xai", "grok-4-fast-non-reasoning", config => xaiModelManagerOptions(config)),
 	catalogDescriptor(
 		"deepseek",

package/src/provider-models/openai-compat.ts CHANGED Viewed

@@ -692,6 +692,30 @@ export function fireworksModelManagerOptions(
 	};
 }
+// ---------------------------------------------------------------------------
+// 7.6 Fire Pass (Fireworks Kimi K2.6 Turbo subscription)
+// ---------------------------------------------------------------------------
+export interface FirepassModelManagerConfig {
+	apiKey?: string;
+	baseUrl?: string;
+}
+/**
+ * Fire Pass is a Fireworks subscription product that exposes a single router
+ * model (Kimi K2.6 Turbo) under `accounts/fireworks/routers/kimi-k2p6-turbo`.
+ * The dedicated `fpk_…` keys do not authorize `/v1/models`, so this manager
+ * never performs dynamic discovery — the bundled catalog entry is canonical.
+ * See https://docs.fireworks.ai/firepass.
+ */
+export function firepassModelManagerOptions(
+	_config?: FirepassModelManagerConfig,
+): ModelManagerOptions<"openai-completions"> {
+	return {
+		providerId: "firepass",
+	};
+}
 // ---------------------------------------------------------------------------
 // 7. Mistral
 // ---------------------------------------------------------------------------
@@ -2083,18 +2107,26 @@ const MODELS_DEV_PROVIDER_DESCRIPTORS_CORE: readonly ModelsDevProviderDescriptor
 		// ids are kept off the catalog until the issue thread asks for them.
 		filterModel: (id, m) => m.tool_call === true && id.startsWith("deepseek-v4"),
 		compat: {
-			// xhigh maps to DeepSeek's `max` reasoning_effort (#830 thread).
+			// DeepSeek V4 only accepts `high`/`max`; map lower OMP levels upward so
+			// subagent "minimal" turns stay in documented thinking mode instead of
+			// sending unsupported effort strings.
+			supportsDeveloperRole: false,
 			supportsReasoningEffort: true,
-			reasoningEffortMap: { xhigh: "max" },
-			// `tool_choice` returns 400 against DeepSeek when reasoning_effort is set
-			// (per the issue thread). Tool calls still work without the parameter.
+			reasoningEffortMap: { minimal: "high", low: "high", medium: "high", high: "high", xhigh: "max" },
+			maxTokensField: "max_tokens",
+			// DeepSeek V4 thinking mode rejects the `tool_choice` control parameter.
+			// Tool calls still work without it; the API defaults to auto when tools exist.
 			supportsToolChoice: false,
+			// DeepSeek V4's OpenAI format docs enable thinking with both the toggle and
+			// reasoning_effort. Keep the toggle explicit for built-in models.
+			extraBody: { thinking: { type: "enabled" } },
 			// DeepSeek emits chain-of-thought via `reasoning_content` and requires it
 			// to round-trip on assistant tool-call messages so the model can resume
 			// from prior thinking (interleaved.field=reasoning_content on models.dev,
 			// matches the kimi/openrouter handling already in detectCompat).
 			reasoningContentField: "reasoning_content",
 			requiresReasoningContentForToolCalls: true,
+			requiresAssistantContentForToolCalls: true,
 		},
 	}),
 ];

package/src/providers/anthropic.ts CHANGED Viewed

@@ -32,6 +32,7 @@ import type {
 	Model,
 	ProviderSessionState,
 	RedactedThinkingContent,
+	ServiceTier,
 	SimpleStreamOptions,
 	StopReason,
 	StreamFunction,
@@ -43,6 +44,7 @@ import type {
 	ToolResultMessage,
 	Usage,
 } from "../types";
+import { resolveServiceTier } from "../types";
 import {
 	isAnthropicOAuthToken,
 	isRecord,
@@ -111,6 +113,7 @@ const claudeCodeBetaDefaults = [
 ];
 const fineGrainedToolStreamingBeta = "fine-grained-tool-streaming-2025-05-14";
 const interleavedThinkingBeta = "interleaved-thinking-2025-05-14";
+const fastModeBeta = "fast-mode-2026-02-01";
 function getHeaderCaseInsensitive(headers: Record<string, string> | undefined, headerName: string): string | undefined {
 	if (!headers) return undefined;
@@ -224,13 +227,16 @@ const ANTHROPIC_PROVIDER_SESSION_STATE_KEY = "anthropic-messages";
 type AnthropicProviderSessionState = ProviderSessionState & {
 	strictToolsDisabled: boolean;
+	fastModeDisabled: boolean;
 };
 function createAnthropicProviderSessionState(): AnthropicProviderSessionState {
 	const state: AnthropicProviderSessionState = {
 		strictToolsDisabled: false,
+		fastModeDisabled: false,
 		close: () => {
 			state.strictToolsDisabled = false;
+			state.fastModeDisabled = false;
 		},
 	};
 	return state;
@@ -249,6 +255,23 @@ function getAnthropicProviderSessionState(
 	return created;
 }
+/**
+ * Clears the in-session "server rejected fast mode" sticky flag. Call when the
+ * caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
+ * `/fast on` after a previous turn auto-disabled it) so the next request
+ * actually carries `speed: "fast"` again. No-op when the map or state entry
+ * hasn't been materialized yet.
+ */
+export function clearAnthropicFastModeFallback(
+	providerSessionState: Map<string, ProviderSessionState> | undefined,
+): void {
+	if (!providerSessionState) return;
+	const state = providerSessionState.get(ANTHROPIC_PROVIDER_SESSION_STATE_KEY) as
+		| AnthropicProviderSessionState
+		| undefined;
+	if (state) state.fastModeDisabled = false;
+}
 function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
 	if (extractHttpStatusFromError(error) !== 400) return false;
 	const message = error instanceof Error ? error.message : String(error);
@@ -258,11 +281,45 @@ function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
 	return /invalid_request_error/i.test(message) && (isStrictGrammarTooLarge || isSchemaCompilationTooComplex);
 }
+export function isAnthropicFastModeUnsupportedError(error: unknown): boolean {
+	const status = extractHttpStatusFromError(error);
+	if (status !== 400 && status !== 429) return false;
+	const message = error instanceof Error ? error.message : String(error);
+	// 400 invalid_request_error — model doesn't accept `speed` at all.
+	// Observed: "'claude-opus-4-5-20251101' does not support the `speed` parameter."
+	// Stay tolerant of phrasing drift ("is not supported", quoted vs backticked field).
+	if (
+		status === 400 &&
+		/invalid_request_error/i.test(message) &&
+		/\bspeed\b/i.test(message) &&
+		/not support/i.test(message)
+	) {
+		return true;
+	}
+	// 429 rate_limit_error — account lacks the extra-usage entitlement fast mode requires.
+	// Observed: "Extra usage is required for fast mode."
+	if (status === 429 && /rate_limit_error/i.test(message) && /fast mode/i.test(message)) {
+		return true;
+	}
+	return false;
+}
 function hasStrictAnthropicTools(params: MessageCreateParamsStreaming): boolean {
 	const tools = params.tools as Array<{ strict?: unknown }> | undefined;
 	return tools?.some(tool => tool.strict === true) ?? false;
 }
+/**
+ * `speed` lives on `BetaMessageCreateParams` (client.beta.messages) but this
+ * provider posts via `client.messages.create`, whose param type doesn't
+ * include it. This alias narrows the cast to one place.
+ */
+type ParamsWithSpeed = MessageCreateParamsStreaming & { speed?: "fast" };
+function dropAnthropicFastMode(params: MessageCreateParamsStreaming): void {
+	delete (params as ParamsWithSpeed).speed;
+}
 function dropAnthropicStrictTools(params: MessageCreateParamsStreaming): void {
 	const tools = params.tools as Array<{ strict?: unknown }> | undefined;
 	if (!tools) return;
@@ -526,6 +583,16 @@ export interface AnthropicOptions extends StreamOptions {
 	interleavedThinking?: boolean;
 	toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
 	betas?: string[] | string;
+	/**
+	 * Realization of `serviceTier: "priority"` on Anthropic models. When
+	 * `"priority"`, sets `speed: "fast"` on the request and appends the
+	 * `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
+	 * with `invalid_request_error`, which triggers an in-provider one-shot
+	 * fallback (see `fastModeDisabled` provider state).
+	 *
+	 * Other `ServiceTier` values are currently ignored on this provider.
+	 */
+	serviceTier?: ServiceTier;
 	/** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
 	isOAuth?: boolean;
 	/**
@@ -961,10 +1028,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 			} else {
 				const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
+				const extraBetas = normalizeExtraBetas(options?.betas);
+				const wantsAnthropicPriority = resolveServiceTier(options?.serviceTier, model.provider) === "priority";
+				if (wantsAnthropicPriority && !extraBetas.includes(fastModeBeta)) {
+					extraBetas.push(fastModeBeta);
+				}
 				const created = createClient(model, {
 					model,
 					apiKey,
-					extraBetas: normalizeExtraBetas(options?.betas),
+					extraBetas,
 					stream: true,
 					interleavedThinking: options?.interleavedThinking ?? true,
 					headers: options?.headers,
@@ -984,15 +1057,19 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 			let disableStrictTools =
 				(providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
 			let strictFallbackErrorMessage: string | undefined;
+			let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
 			const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
 				let nextParams = buildParams(model, baseUrl, context, isOAuthToken, options, disableStrictTools);
+				if (disableStrictTools) {
+					dropAnthropicStrictTools(nextParams);
+				}
+				if (dropFastMode) {
+					dropAnthropicFastMode(nextParams);
+				}
 				const replacementPayload = await options?.onPayload?.(nextParams, model);
 				if (replacementPayload !== undefined) {
 					nextParams = replacementPayload as typeof nextParams;
 				}
-				if (disableStrictTools) {
-					dropAnthropicStrictTools(nextParams);
-				}
 				rawRequestDump = {
 					provider: model.provider,
 					api: output.api,
@@ -1284,6 +1361,30 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 						firstTokenTime = undefined;
 						continue;
 					}
+					if (
+						!dropFastMode &&
+						resolveServiceTier(options?.serviceTier, model.provider) === "priority" &&
+						firstTokenTime === undefined &&
+						isAnthropicFastModeUnsupportedError(streamFailure)
+					) {
+						logger.debug("anthropic: fast mode unsupported, retrying without speed", {
+							model: model.id,
+							error: streamFailure instanceof Error ? streamFailure.message : String(streamFailure),
+						});
+						if (providerSessionState) {
+							providerSessionState.fastModeDisabled = true;
+						}
+						dropFastMode = true;
+						params = await prepareParams();
+						providerRetryAttempt = 0;
+						output.content.length = 0;
+						output.responseId = undefined;
+						output.providerPayload = undefined;
+						output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
+						output.stopReason = "stop";
+						firstTokenTime = undefined;
+						continue;
+					}
 					const isTransientEnvelopeFailure =
 						isTransientStreamParseError(streamFailure) || isTransientStreamEnvelopeError(streamFailure);
 					const canRetryTransientEnvelopeFailure = isTransientEnvelopeFailure && !streamedReplayUnsafeContent;
@@ -1315,6 +1416,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 			output.duration = Date.now() - startTime;
 			if (firstTokenTime) output.ttft = firstTokenTime - startTime;
+			if (dropFastMode && resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
+				output.disabledFeatures = [...(output.disabledFeatures ?? []), "priority"];
+			}
 			stream.push({ type: "done", reason: output.stopReason, message: output });
 			stream.end();
 		} catch (error) {
@@ -1862,6 +1966,10 @@ function buildParams(
 		params.metadata = { user_id: metadataUserId };
 	}
+	if (resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
+		(params as ParamsWithSpeed).speed = "fast";
+	}
 	if (options?.toolChoice) {
 		if (typeof options.toolChoice === "string") {
 			params.tool_choice = { type: options.toolChoice };
@@ -2280,7 +2388,12 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
 		result.properties = normalizedProperties;
 	}
 	if (isRecord(result.additionalProperties)) {
-		result.additionalProperties = normalizeAnthropicToolSchema(result.additionalProperties);
+		const normalized = normalizeAnthropicToolSchema(result.additionalProperties);
+		if (isRecord(normalized) && Object.keys(normalized).length === 0) {
+			result.additionalProperties = true;
+		} else {
+			result.additionalProperties = normalized;
+		}
 	}
 	if (Array.isArray(result.items)) {
 		result.items = result.items.map(item => normalizeAnthropicToolSchema(item));

package/src/providers/openai-chat-server.ts CHANGED Viewed

@@ -11,7 +11,7 @@ import type {
 	Context,
 	ImageContent,
 	Message,
-	ServiceTier,
+	ResolvedServiceTier,
 	StopReason,
 	TextContent,
 	Tool,
@@ -36,7 +36,7 @@ function isReasoningEffort(value: unknown): value is ReasoningEffort {
 	return value === "minimal" || value === "low" || value === "medium" || value === "high" || value === "xhigh";
 }
-function isServiceTier(value: unknown): value is ServiceTier {
+function isServiceTier(value: unknown): value is ResolvedServiceTier {
 	return value === "auto" || value === "default" || value === "flex" || value === "scale" || value === "priority";
 }

package/src/providers/openai-codex-responses.ts CHANGED Viewed

@@ -29,10 +29,10 @@ import {
 	type FetchImpl,
 	type Model,
 	type ProviderSessionState,
+	resolveServiceTier,
 	type ServiceTier,
 	type StreamFunction,
 	type StreamOptions,
-	shouldSendServiceTier,
 	type TextContent,
 	type ThinkingContent,
 	type Tool,
@@ -590,8 +590,9 @@ async function buildTransformedCodexRequestBody(
 	if (options?.repetitionPenalty !== undefined) {
 		params.repetition_penalty = options.repetitionPenalty;
 	}
-	if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
-		params.service_tier = options.serviceTier;
+	const resolvedServiceTier = resolveServiceTier(options?.serviceTier, model.provider);
+	if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
+		params.service_tier = resolvedServiceTier;
 	}
 	if (context.tools && context.tools.length > 0) {
 		params.tools = convertOpenAICodexResponsesTools(context.tools, model);

package/src/providers/openai-completions-compat.ts CHANGED Viewed

@@ -52,7 +52,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 	const isCerebras = provider === "cerebras" || baseUrl.includes("cerebras.ai");
 	const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
 	const isKilo = provider === "kilo" || baseUrl.includes("api.kilo.ai");
-	const isKimiModel = model.id.includes("moonshotai/kimi") || /^kimi[-.]/i.test(model.id);
+	const isKimiModel = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
 	const isMoonshotKimi =
 		isKimiModel &&
 		(provider === "moonshot" ||
@@ -79,7 +79,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 		baseUrl.includes("deepseek.com") ||
 		lowerId.includes("deepseek") ||
 		lowerName.includes("deepseek");
+	const isDirectDeepseekApi = provider === "deepseek" || baseUrl.includes("api.deepseek.com");
+	const isDirectDeepseekReasoning = isDirectDeepseekApi && isDeepseekFamily && Boolean(model.reasoning);
 	const isNonStandard =
 		isCerebras ||
 		provider === "xai" ||
@@ -102,7 +103,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 		provider === "mistral" ||
 		baseUrl.includes("mistral.ai") ||
 		baseUrl.includes("chutes.ai") ||
-		baseUrl.includes("fireworks.ai");
+		baseUrl.includes("fireworks.ai") ||
+		isDirectDeepseekApi;
 	const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
 	const isMistral = provider === "mistral" || baseUrl.includes("mistral.ai");
@@ -162,7 +164,13 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 					xhigh: "default",
 				} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
 			: isDeepseekFamily && model.reasoning
-				? { xhigh: "max" }
+				? ({
+						minimal: "high",
+						low: "high",
+						medium: "high",
+						high: "high",
+						xhigh: "max",
+					} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
 				: {};
 	return {
@@ -173,8 +181,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 		reasoningEffortMap,
 		supportsUsageInStreaming: !isCerebras,
 		disableReasoningOnForcedToolChoice: isKimiModel || isAnthropicModel,
-		disableReasoningOnToolChoice: isDeepseekFamily && Boolean(model.reasoning),
-		supportsToolChoice: true,
+		disableReasoningOnToolChoice: isDeepseekFamily && Boolean(model.reasoning) && !isOpenRouter,
+		supportsToolChoice: !isDirectDeepseekReasoning,
 		maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
 		requiresToolResultName: isMistral,
 		requiresAssistantAfterToolResult: false,
@@ -204,11 +212,11 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
 		// DeepSeek V4 rejects synthetic reasoning_content placeholders (".") on tool-call turns.
 		// Kimi and OpenRouter accept them when actual reasoning is unavailable.
 		allowsSyntheticReasoningContentForToolCalls: !isDeepseekFamily || !model.reasoning,
-		requiresAssistantContentForToolCalls: isKimiModel,
+		requiresAssistantContentForToolCalls: isKimiModel || isDirectDeepseekReasoning,
 		openRouterRouting: undefined,
 		vercelGatewayRouting: undefined,
 		supportsStrictMode: detectStrictModeSupport(provider, baseUrl),
-		extraBody: undefined,
+		extraBody: isDirectDeepseekReasoning ? { thinking: { type: "enabled" } } : undefined,
 		toolStrictMode: isCerebras ? "all_strict" : "mixed",
 	};
 }
@@ -235,7 +243,7 @@ export function resolveOpenAICompat(
 		supportsMultipleSystemMessages:
 			model.compat.supportsMultipleSystemMessages ?? detected.supportsMultipleSystemMessages,
 		supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
-		reasoningEffortMap: model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
+		reasoningEffortMap: { ...detected.reasoningEffortMap, ...(model.compat.reasoningEffortMap ?? {}) },
 		supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
 		supportsToolChoice: model.compat.supportsToolChoice ?? detected.supportsToolChoice,
 		maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
@@ -259,7 +267,7 @@ export function resolveOpenAICompat(
 		openRouterRouting: model.compat.openRouterRouting ?? detected.openRouterRouting,
 		vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
 		supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
-		extraBody: model.compat.extraBody,
+		extraBody: model.compat.extraBody ?? detected.extraBody,
 		toolStrictMode: model.compat.toolStrictMode ?? detected.toolStrictMode,
 	};
 }

package/src/providers/openai-completions.ts CHANGED Viewed

@@ -22,6 +22,7 @@ import {
 	type Model,
 	type OpenAICompat,
 	type ProviderSessionState,
+	resolveServiceTier,
 	type ServiceTier,
 	type StopReason,
 	type StreamFunction,
@@ -37,7 +38,7 @@ import {
 import { normalizeSystemPrompts } from "../utils";
 import { createAbortSourceTracker } from "../utils/abort";
 import { AssistantMessageEventStream } from "../utils/event-stream";
-import { toFireworksWireModelId } from "../utils/fireworks-model-id";
+import { toFirepassWireModelId, toFireworksWireModelId } from "../utils/fireworks-model-id";
 import {
 	type CapturedHttpErrorResponse,
 	finalizeErrorMessage,
@@ -486,7 +487,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 			}
 			stream.push({ type: "start", partial: output });
-			const parseMiniMaxThinkTags = model.provider === "minimax-code";
+			const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
 			// Some OpenAI-compatible DeepSeek hosts (including NVIDIA NIM and DeepSeek's
 			// native API) leak chat-template tool-call markers in `delta.content` even
 			// though tool calls are also surfaced structurally. Strip the leaked markers
@@ -1037,13 +1038,23 @@ function buildParams(
 	maybeAddOpenRouterAnthropicCacheControl(model, messages);
 	const supportsReasoningParams = model.provider !== "github-copilot";
-	// Kimi (including via OpenRouter) calculates TPM rate limits based on max_tokens, not actual output.
-	// Always send max_tokens to avoid their high default causing rate limit issues.
+	// Kimi (including via OpenRouter and Fireworks router-form IDs such as
+	// `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
+	// max_tokens, not actual output. The official Kimi K2 model guidance
+	// (https://docs.fireworks.ai/models/kimi-k2) also requires `max_tokens` for
+	// every call since the family can otherwise emit very long reasoning traces
+	// before the final answer. Always send max_tokens — match the same
+	// Kimi-family regex used by the compat detector.
 	// Note: Direct kimi-code provider is handled by the dedicated Kimi provider in kimi.ts.
-	const isKimi = model.id.includes("moonshotai/kimi");
+	const isKimi = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
 	const effectiveMaxTokens = options?.maxTokens ?? (isKimi ? model.maxTokens : undefined);
-	const requestModelId = model.provider === "fireworks" ? toFireworksWireModelId(model.id) : model.id;
+	const requestModelId =
+		model.provider === "fireworks"
+			? toFireworksWireModelId(model.id)
+			: model.provider === "firepass"
+				? toFirepassWireModelId(model.id)
+				: model.id;
 	const params: OpenAICompletionsParams = {
 		model: requestModelId,
 		messages,
@@ -1093,7 +1104,10 @@ function buildParams(
 		params.frequency_penalty = options.frequencyPenalty;
 	}
 	if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
-		params.service_tier = options.serviceTier;
+		const resolved = resolveServiceTier(options?.serviceTier, model.provider);
+		if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
+			params.service_tier = resolved;
+		}
 	}
 	if (context.tools) {

package/src/providers/openai-responses-shared.ts CHANGED Viewed

@@ -17,6 +17,7 @@ import {
 	type AssistantMessage,
 	type ImageContent,
 	type Model,
+	resolveServiceTier,
 	type ServiceTier,
 	type StopReason,
 	type StreamOptions,
@@ -651,7 +652,10 @@ export function applyCommonResponsesSamplingParams<P extends CommonResponsesPara
 	if (options?.presencePenalty !== undefined) params.presence_penalty = options.presencePenalty;
 	if (options?.repetitionPenalty !== undefined) params.repetition_penalty = options.repetitionPenalty;
 	if (shouldSendServiceTier(options?.serviceTier, provider)) {
-		params.service_tier = options.serviceTier;
+		const resolved = resolveServiceTier(options?.serviceTier, provider);
+		if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
+			params.service_tier = resolved;
+		}
 	}
 }

package/src/stream.ts CHANGED Viewed

@@ -83,6 +83,7 @@ const serviceProviderMap: Record<string, KeyResolver> = {
 	cerebras: "CEREBRAS_API_KEY",
 	xai: "XAI_API_KEY",
 	fireworks: "FIREWORKS_API_KEY",
+	firepass: "FIREPASS_API_KEY",
 	openrouter: "OPENROUTER_API_KEY",
 	kilo: "KILO_API_KEY",
 	"vercel-ai-gateway": "AI_GATEWAY_API_KEY",
@@ -580,6 +581,7 @@ function mapOptionsForApi<TApi extends Api>(
 					thinkingEnabled: false,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
 				});
 			}
@@ -590,6 +592,7 @@ function mapOptionsForApi<TApi extends Api>(
 					thinkingEnabled: false,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
 				});
 			}
@@ -603,6 +606,7 @@ function mapOptionsForApi<TApi extends Api>(
 					effort,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
 				});
 			}
@@ -613,6 +617,7 @@ function mapOptionsForApi<TApi extends Api>(
 					thinkingBudgetTokens: thinkingBudget,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
 				});
 			}
@@ -631,6 +636,7 @@ function mapOptionsForApi<TApi extends Api>(
 					thinkingEnabled: false,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
 				});
 			} else {
 				return castApi<"anthropic-messages">({
@@ -640,6 +646,7 @@ function mapOptionsForApi<TApi extends Api>(
 					thinkingBudgetTokens: thinkingBudget,
 					toolChoice: mapAnthropicToolChoice(options?.toolChoice),
 					thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
+					serviceTier: options?.serviceTier,
 				});
 			}
 		}

package/src/types.ts CHANGED Viewed

@@ -110,6 +110,7 @@ export type KnownProvider =
 	| "minimax-code-cn"
 	| "github-copilot"
 	| "fireworks"
+	| "firepass"
 	| "gitlab-duo"
 	| "cursor"
 	| "deepseek"
@@ -162,29 +163,78 @@ export type ToolChoice =
 // Base options all providers share
 export type CacheRetention = "none" | "short" | "long";
-/** OpenAI service tier for processing priority. Only applies to OpenAI-compatible APIs. */
-export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority";
+/**
+ * Service tier hint for processing priority / cost control.
+ *
+ * The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
+ * `"priority"`) are passed through to providers that understand them
+ * (OpenAI's `service_tier` field directly; Anthropic translates
+ * `"priority"` into `speed: "fast"` on supported Opus models).
+ *
+ * The scoped values target a specific provider family and behave as the
+ * unscoped value on the matching provider, or `undefined` everywhere else.
+ * They let users opt into priority on one family without paying premium
+ * costs on the other when switching models mid-session.
+ *
+ * - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
+ * - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
+ */
+export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
-export function shouldSendServiceTier(
-	serviceTier?: ServiceTier | null,
-	provider?: Provider,
-): serviceTier is "flex" | "scale" | "priority" {
-	if (provider !== "openai" && provider !== "openai-codex") {
-		return false;
+/** Resolved tier — one of the values that providers actually consume on the wire. */
+export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
+/**
+ * Resolves a possibly scoped `ServiceTier` to the effective tier for the
+ * given provider. Scoped values match their target family and otherwise
+ * collapse to `undefined`; unscoped values pass through unchanged.
+ */
+export function resolveServiceTier(
+	serviceTier: ServiceTier | null | undefined,
+	provider: Provider | undefined,
+): ResolvedServiceTier | undefined {
+	if (!serviceTier) return undefined;
+	switch (serviceTier) {
+		case "openai-only":
+			return provider === "openai" || provider === "openai-codex" ? "priority" : undefined;
+		case "claude-only":
+			return provider === "anthropic" ? "priority" : undefined;
+		default:
+			return serviceTier;
 	}
-	return serviceTier === "flex" || serviceTier === "scale" || serviceTier === "priority";
 }
 /**
- * Premium-request weight contributed by sending a `priority` service tier to
- * a provider that supports it. Mirrors GitHub Copilot's `premiumRequests`
- * accounting so the "premium requests" stat aggregates priority traffic too.
+ * True when the (possibly scoped) tier should be sent as OpenAI's
+ * `service_tier` request field for the given provider. Non-OpenAI
+ * providers, unsupported tiers (`"auto"`, `"default"`), and scope
+ * mismatches all return false.
+ */
+export function shouldSendServiceTier(
+	serviceTier: ServiceTier | null | undefined,
+	provider: Provider | undefined,
+): boolean {
+	if (provider !== "openai" && provider !== "openai-codex") return false;
+	const resolved = resolveServiceTier(serviceTier, provider);
+	return resolved === "flex" || resolved === "scale" || resolved === "priority";
+}
+/**
+ * Premium-request weight contributed by sending priority to a provider
+ * that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
+ * so the "premium requests" stat aggregates priority traffic across the
+ * OpenAI family and Anthropic fast-mode realizations.
  *
- * Returns 1 per priority request, 0 otherwise. Non-priority tiers (`flex`,
- * `scale`) and providers that ignore `service_tier` always return 0.
+ * Returns 1 per resolved priority request, 0 otherwise.
  */
-export function getPriorityPremiumRequests(serviceTier?: ServiceTier | null, provider?: Provider): number {
-	return shouldSendServiceTier(serviceTier, provider) && serviceTier === "priority" ? 1 : 0;
+export function getPriorityPremiumRequests(
+	serviceTier: ServiceTier | null | undefined,
+	provider: Provider | undefined,
+): number {
+	if (resolveServiceTier(serviceTier, provider) !== "priority") return 0;
+	// Only providers that realize `priority` on the wire bill the user.
+	// Everywhere else, the field is silently dropped and nothing is charged.
+	return provider === "openai" || provider === "openai-codex" || provider === "anthropic" ? 1 : 0;
 }
 export interface ProviderSessionState {
@@ -502,6 +552,14 @@ export interface AssistantMessage {
 	errorMessage?: string;
 	/** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
 	errorStatus?: number;
+	/**
+	 * Stable identifiers for request features the provider silently dropped
+	 * during this turn (e.g. `"priority"`). Set when a server-side rejection
+	 * triggered an in-provider fallback retry that succeeded without the
+	 * feature. Callers can use this to sync user-facing toggles back to the
+	 * server's actual state.
+	 */
+	disabledFeatures?: string[];
 	/** Provider-specific opaque payload used to reconstruct transport-native history. */
 	providerPayload?: ProviderPayload;
 	timestamp: number; // Unix timestamp in milliseconds

package/src/utils/fireworks-model-id.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 const FIREWORKS_WIRE_PREFIX = "accounts/fireworks/models/";
+const FIREPASS_WIRE_PREFIX = "accounts/fireworks/routers/";
 const VERSION_SEPARATOR_PATTERN = /(?<=\d)p(?=\d)/g;
 const VERSION_DOT_PATTERN = /(?<=\d)\.(?=\d)/g;
@@ -11,3 +12,19 @@ export function toFireworksWireModelId(modelId: string): string {
 	const stripped = modelId.startsWith(FIREWORKS_WIRE_PREFIX) ? modelId.slice(FIREWORKS_WIRE_PREFIX.length) : modelId;
 	return `${FIREWORKS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
 }
+/**
+ * Fire Pass exposes its Kimi K2.6 Turbo subscription through a dedicated router
+ * endpoint at `accounts/fireworks/routers/<id>` rather than the `models/` namespace.
+ * We keep a friendly public id (e.g. `kimi-k2.6-turbo`) in the catalog and translate
+ * to the wire form (`accounts/fireworks/routers/kimi-k2p6-turbo`) at request time.
+ */
+export function toFirepassPublicModelId(modelId: string): string {
+	const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
+	return stripped.replace(VERSION_SEPARATOR_PATTERN, ".");
+}
+export function toFirepassWireModelId(modelId: string): string {
+	const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
+	return `${FIREPASS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
+}

package/src/utils/oauth/firepass.ts ADDED Viewed

@@ -0,0 +1,24 @@
+/**
+ * Fire Pass login flow.
+ *
+ * Fire Pass is a Fireworks subscription product whose dedicated `fpk_…` API
+ * keys are scoped to the `accounts/fireworks/routers/kimi-k2p6-turbo` router
+ * (Kimi K2.6 Turbo). The key does NOT authorize `/v1/models`, so validation
+ * pings the chat completions endpoint with the router id directly.
+ * See https://docs.fireworks.ai/firepass.
+ */
+import { createApiKeyLogin } from "./api-key-login";
+export const loginFirepass = createApiKeyLogin({
+	providerLabel: "Fire Pass",
+	authUrl: "https://app.fireworks.ai/settings/users/api-keys",
+	instructions: "Create a dedicated Fire Pass API key in the Fireworks dashboard",
+	promptMessage: "Paste your Fire Pass API key",
+	placeholder: "fpk_...",
+	validation: {
+		kind: "chat-completions",
+		provider: "Fire Pass",
+		baseUrl: "https://api.fireworks.ai/inference/v1",
+		model: "accounts/fireworks/routers/kimi-k2p6-turbo",
+	},
+});

package/src/utils/oauth/index.ts CHANGED Viewed

@@ -55,6 +55,11 @@ const builtInOAuthProviders: OAuthProviderInfo[] = [
 		name: "Fireworks",
 		available: true,
 	},
+	{
+		id: "firepass",
+		name: "Fire Pass (Fireworks Kimi K2.6 Turbo subscription)",
+		available: true,
+	},
 	{
 		id: "github-copilot",
 		name: "GitHub Copilot",
@@ -301,6 +306,7 @@ export async function refreshOAuthToken(
 		case "opencode-go":
 		case "cerebras":
 		case "fireworks":
+		case "firepass":
 		case "nvidia":
 		case "nanogpt":
 		case "synthetic":
@@ -363,10 +369,14 @@ export async function getOAuthApiKey(
 	}
 	if (provider === "perplexity") {
+		// Perplexity JWTs usually omit `exp` (server-side sessions). Trust the JWT
+		// claim when present; otherwise treat the credential as non-expiring rather
+		// than honoring a stale stored `expires` (older logins wrote loginTime+1h).
+		const NEVER_EXPIRES = 8.64e15;
 		const normalizedExpires =
 			creds.expires > 0 && creds.expires < 10_000_000_000 ? creds.expires * 1000 : creds.expires;
 		const jwtExpiry = getPerplexityJwtExpiryMs(creds.access);
-		const expires = jwtExpiry && jwtExpiry > normalizedExpires ? jwtExpiry : normalizedExpires;
+		const expires = jwtExpiry ?? Math.max(normalizedExpires, NEVER_EXPIRES);
 		if (expires !== creds.expires) {
 			creds = { ...creds, expires };
 		}

package/src/utils/oauth/perplexity.ts CHANGED Viewed

@@ -24,20 +24,26 @@ const APP_USER_AGENT = "Perplexity/641 CFNetwork/1568 Darwin/25.2.0";
 // JWT helpers
 // ---------------------------------------------------------------------------
-/** Extract expiry from a JWT. Falls back to 1 hour from now. Subtracts 5 min safety margin. */
+/**
+ * Extract expiry from a JWT. Perplexity tokens generally lack an `exp` claim
+ * (their sessions are server-side and effectively non-expiring from the client's
+ * point of view), so we return a far-future sentinel when no `exp` is present.
+ * When `exp` IS present, subtract a 5-minute safety margin.
+ */
+const NEVER_EXPIRES = 8.64e15; // max safe Date value
 function getJwtExpiry(token: string): number {
 	try {
 		const parts = token.split(".");
-		if (parts.length !== 3) return Date.now() + 3600_000;
+		if (parts.length !== 3) return NEVER_EXPIRES;
 		const payload = parts[1] ?? "";
 		const decoded = JSON.parse(atob(payload.replace(/-/g, "+").replace(/_/g, "/")));
-		if (decoded?.exp && typeof decoded.exp === "number") {
+		if (typeof decoded?.exp === "number" && Number.isFinite(decoded.exp)) {
 			return decoded.exp * 1000 - 5 * 60_000;
 		}
 	} catch {
 		// Ignore decode errors
 	}
-	return Date.now() + 3600_000;
+	return NEVER_EXPIRES;
 }
 /** Build OAuthCredentials from a Perplexity JWT string. */

package/src/utils/oauth/types.ts CHANGED Viewed

@@ -15,6 +15,7 @@ export type OAuthProvider =
 	| "cloudflare-ai-gateway"
 	| "cursor"
 	| "fireworks"
+	| "firepass"
 	| "github-copilot"
 	| "google-gemini-cli"
 	| "google-antigravity"

package/src/utils/schema/zod-decontaminate.ts CHANGED Viewed

@@ -243,8 +243,17 @@ function rewriteZodNode(node: JsonObject, seen: WeakSet<object>): unknown {
 		case "pipe":
 		case "transform": {
 			const inner = walk(unwrapInnerSchema(def), seen);
-			if (kind === "nullable" && isJsonObject(inner) && typeof inner.type === "string") {
-				return { ...inner, type: [inner.type, "null"] };
+			if (kind === "nullable" && isJsonObject(inner)) {
+				if (typeof inner.type === "string") {
+					return { ...inner, type: [inner.type, "null"] };
+				}
+				if (Array.isArray(inner.type)) {
+					return (inner.type as string[]).includes("null")
+						? inner
+						: { ...inner, type: [...(inner.type as string[]), "null"] };
+				}
+				// anyOf / allOf / $ref shapes — no scalar `type` field
+				return { anyOf: [inner, { type: "null" }] };
 			}
 			return inner;
 		}