@oh-my-pi/pi-ai 15.1.6 → 15.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,28 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.1.8] - 2026-05-20
6
+ ### Added
7
+
8
+ - Added Fireworks Fire Pass as a separate `firepass` provider with API-key login flow, bundled `kimi-k2.6-turbo` model entry (Kimi K2.6 Turbo), and wire-id translation from the friendly catalog id to the `accounts/fireworks/routers/kimi-k2p6-turbo` router endpoint. Fire Pass keys (`fpk_…`) authorize only the dedicated router and reject `/v1/models`, so login validation pings chat completions against the router id directly. Extended the openai-completions Kimi-family safety net so the firepass entry inherits the per-Fireworks-docs "always send `max_tokens`" default ([Kimi K2 guide](https://docs.fireworks.ai/models/kimi-k2)); the router's accepted `reasoning_effort` set includes `xhigh`, so it is forwarded verbatim rather than remapped. See https://docs.fireworks.ai/firepass.
9
+
10
+ ### Fixed
11
+
12
+ - Fixed DeepSeek V4 direct API requests with tools to keep documented thinking mode instead of dropping reasoning: lower OMP efforts now map to DeepSeek's supported `high`, `tool_choice` is omitted, `thinking: { type: "enabled" }` and `max_tokens` are sent, and partial user `reasoningEffortMap` overrides merge with DeepSeek defaults. ([#1207](https://github.com/can1357/oh-my-pi/issues/1207))
13
+ - Fixed model cache schema v2 databases so offline refreshes preserve cached provider discoveries after upgrading to schema v3 and subsequent online refreshes can overwrite the cache. ([#1219](https://github.com/can1357/oh-my-pi/issues/1219))
14
+ - Fixed Perplexity OAuth credentials being treated as expired one hour after login. `getJwtExpiry` was fabricating `expires = now + 1h` whenever the JWT had no `exp` claim (the common case — Perplexity sessions are server-side). Once the hour elapsed, `getOAuthApiKey` would mark the cred expired and the search provider's loader would silently skip it, surfacing as "logged out". Logins with no `exp` now persist a far-future sentinel; `getOAuthApiKey` also normalizes any stale `expires` written by older builds.
15
+
16
+ ## [15.1.7] - 2026-05-19
17
+ ### Added
18
+
19
+ - Added Anthropic realization of `serviceTier: "priority"`. The anthropic-messages provider now sets `speed: "fast"` on the request and appends the `fast-mode-2026-02-01` beta to `Anthropic-Beta` whenever the caller passes `serviceTier: "priority"`. When the server rejects an unsupported model with `invalid_request_error`, the provider transparently retries the same turn without the fast-mode signal (mirroring the strict-tools fallback pattern), persists the disable via a new `providerSessionState.fastModeDisabled` flag so subsequent requests in the session skip the field, and surfaces the action via the new `AssistantMessage.disabledFeatures` array (id `"priority"`) so callers can sync user-facing toggles. A new `clearAnthropicFastModeFallback(providerSessionState)` helper lets callers re-arm priority after the auto-fallback fired.
20
+ - Added scoped `ServiceTier` values: `"openai-only"` (priority on `openai`/`openai-codex`, ignored elsewhere) and `"claude-only"` (priority on direct `anthropic`, ignored on Bedrock/Vertex Claude and elsewhere). A new `resolveServiceTier(serviceTier, provider)` helper computes the effective tier for the provider; existing OpenAI/Anthropic provider code routes through it, so `service_tier` and Anthropic fast-mode emission both respect scope. `getPriorityPremiumRequests` now counts Anthropic+priority as one premium request (previously zero) and continues to ignore providers that drop the field on the wire.
21
+
22
+ ### Fixed
23
+
24
+ - Fixed Anthropic fast mode (`serviceTier: "priority"`) looping on 429 `rate_limit_error: "Extra usage is required for fast mode."` for accounts without the extra-usage entitlement. `isAnthropicFastModeUnsupportedError` now matches the 429 phrasing in addition to the 400 `invalid_request_error` "does not support the `speed` parameter" case, so the provider drops `speed: "fast"` on the in-turn retry, sets `providerSessionState.fastModeDisabled` for the remainder of the session, and surfaces `disabledFeatures: ["priority"]` to the caller instead of retrying with the same payload until `PROVIDER_MAX_RETRIES` is exhausted.
25
+ - Fixed MiniMax Coding Plan CN streaming `<think>...</think>` reasoning as visible assistant text. The OpenAI-compatible stream parser now enables the existing MiniMax tag parser for both `minimax-code` and `minimax-code-cn`, so CN responses become structured `thinking` blocks instead of raw text. ([#1203](https://github.com/can1357/oh-my-pi/issues/1203))
26
+
5
27
  ## [15.1.6] - 2026-05-19
6
28
 
7
29
  ### Fixed
@@ -63,6 +63,18 @@ export interface FireworksModelManagerConfig {
63
63
  baseUrl?: string;
64
64
  }
65
65
  export declare function fireworksModelManagerOptions(config?: FireworksModelManagerConfig): ModelManagerOptions<"openai-completions">;
66
+ export interface FirepassModelManagerConfig {
67
+ apiKey?: string;
68
+ baseUrl?: string;
69
+ }
70
+ /**
71
+ * Fire Pass is a Fireworks subscription product that exposes a single router
72
+ * model (Kimi K2.6 Turbo) under `accounts/fireworks/routers/kimi-k2p6-turbo`.
73
+ * The dedicated `fpk_…` keys do not authorize `/v1/models`, so this manager
74
+ * never performs dynamic discovery — the bundled catalog entry is canonical.
75
+ * See https://docs.fireworks.ai/firepass.
76
+ */
77
+ export declare function firepassModelManagerOptions(_config?: FirepassModelManagerConfig): ModelManagerOptions<"openai-completions">;
66
78
  export interface MistralModelManagerConfig {
67
79
  apiKey?: string;
68
80
  baseUrl?: string;
@@ -1,6 +1,6 @@
1
1
  import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
2
2
  import type { MessageParam } from "@anthropic-ai/sdk/resources/messages";
3
- import type { FetchImpl, Message, Model, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
3
+ import type { FetchImpl, Message, Model, ProviderSessionState, ServiceTier, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
4
4
  export type AnthropicHeaderOptions = {
5
5
  apiKey: string;
6
6
  baseUrl?: string;
@@ -17,6 +17,15 @@ type AnthropicCacheControl = {
17
17
  type: "ephemeral";
18
18
  ttl?: "1h" | "5m";
19
19
  };
20
+ /**
21
+ * Clears the in-session "server rejected fast mode" sticky flag. Call when the
22
+ * caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
23
+ * `/fast on` after a previous turn auto-disabled it) so the next request
24
+ * actually carries `speed: "fast"` again. No-op when the map or state entry
25
+ * hasn't been materialized yet.
26
+ */
27
+ export declare function clearAnthropicFastModeFallback(providerSessionState: Map<string, ProviderSessionState> | undefined): void;
28
+ export declare function isAnthropicFastModeUnsupportedError(error: unknown): boolean;
20
29
  export declare const claudeCodeVersion = "2.1.63";
21
30
  export declare const claudeToolPrefix: string;
22
31
  export declare const claudeCodeSystemInstruction = "You are a Claude agent, built on Anthropic's Claude Agent SDK.";
@@ -77,6 +86,16 @@ export interface AnthropicOptions extends StreamOptions {
77
86
  name: string;
78
87
  };
79
88
  betas?: string[] | string;
89
+ /**
90
+ * Realization of `serviceTier: "priority"` on Anthropic models. When
91
+ * `"priority"`, sets `speed: "fast"` on the request and appends the
92
+ * `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
93
+ * with `invalid_request_error`, which triggers an in-provider one-shot
94
+ * fallback (see `fastModeDisabled` provider state).
95
+ *
96
+ * Other `ServiceTier` values are currently ignored on this provider.
97
+ */
98
+ serviceTier?: ServiceTier;
80
99
  /** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
81
100
  isOAuth?: boolean;
82
101
  /**
@@ -48,7 +48,7 @@ export interface ThinkingConfig {
48
48
  /** Provider-specific transport used to encode the selected effort. */
49
49
  mode: ThinkingControlMode;
50
50
  }
51
- export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
51
+ export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
52
52
  export type Provider = KnownProvider | string;
53
53
  import type { Effort } from "./model-thinking";
54
54
  /** Token budgets for each thinking level (token-based providers only) */
@@ -69,18 +69,47 @@ export type ToolChoice = "auto" | "none" | "any" | "required" | {
69
69
  name: string;
70
70
  };
71
71
  export type CacheRetention = "none" | "short" | "long";
72
- /** OpenAI service tier for processing priority. Only applies to OpenAI-compatible APIs. */
73
- export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority";
74
- export declare function shouldSendServiceTier(serviceTier?: ServiceTier | null, provider?: Provider): serviceTier is "flex" | "scale" | "priority";
75
72
  /**
76
- * Premium-request weight contributed by sending a `priority` service tier to
77
- * a provider that supports it. Mirrors GitHub Copilot's `premiumRequests`
78
- * accounting so the "premium requests" stat aggregates priority traffic too.
73
+ * Service tier hint for processing priority / cost control.
79
74
  *
80
- * Returns 1 per priority request, 0 otherwise. Non-priority tiers (`flex`,
81
- * `scale`) and providers that ignore `service_tier` always return 0.
75
+ * The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
76
+ * `"priority"`) are passed through to providers that understand them
77
+ * (OpenAI's `service_tier` field directly; Anthropic translates
78
+ * `"priority"` into `speed: "fast"` on supported Opus models).
79
+ *
80
+ * The scoped values target a specific provider family and behave as the
81
+ * unscoped value on the matching provider, or `undefined` everywhere else.
82
+ * They let users opt into priority on one family without paying premium
83
+ * costs on the other when switching models mid-session.
84
+ *
85
+ * - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
86
+ * - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
87
+ */
88
+ export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
89
+ /** Resolved tier — one of the values that providers actually consume on the wire. */
90
+ export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
91
+ /**
92
+ * Resolves a possibly scoped `ServiceTier` to the effective tier for the
93
+ * given provider. Scoped values match their target family and otherwise
94
+ * collapse to `undefined`; unscoped values pass through unchanged.
95
+ */
96
+ export declare function resolveServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): ResolvedServiceTier | undefined;
97
+ /**
98
+ * True when the (possibly scoped) tier should be sent as OpenAI's
99
+ * `service_tier` request field for the given provider. Non-OpenAI
100
+ * providers, unsupported tiers (`"auto"`, `"default"`), and scope
101
+ * mismatches all return false.
82
102
  */
83
- export declare function getPriorityPremiumRequests(serviceTier?: ServiceTier | null, provider?: Provider): number;
103
+ export declare function shouldSendServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): boolean;
104
+ /**
105
+ * Premium-request weight contributed by sending priority to a provider
106
+ * that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
107
+ * so the "premium requests" stat aggregates priority traffic across the
108
+ * OpenAI family and Anthropic fast-mode realizations.
109
+ *
110
+ * Returns 1 per resolved priority request, 0 otherwise.
111
+ */
112
+ export declare function getPriorityPremiumRequests(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): number;
84
113
  export interface ProviderSessionState {
85
114
  close(): void;
86
115
  }
@@ -371,6 +400,14 @@ export interface AssistantMessage {
371
400
  errorMessage?: string;
372
401
  /** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
373
402
  errorStatus?: number;
403
+ /**
404
+ * Stable identifiers for request features the provider silently dropped
405
+ * during this turn (e.g. `"priority"`). Set when a server-side rejection
406
+ * triggered an in-provider fallback retry that succeeded without the
407
+ * feature. Callers can use this to sync user-facing toggles back to the
408
+ * server's actual state.
409
+ */
410
+ disabledFeatures?: string[];
374
411
  /** Provider-specific opaque payload used to reconstruct transport-native history. */
375
412
  providerPayload?: ProviderPayload;
376
413
  timestamp: number;
@@ -1,2 +1,10 @@
1
1
  export declare function toFireworksPublicModelId(modelId: string): string;
2
2
  export declare function toFireworksWireModelId(modelId: string): string;
3
+ /**
4
+ * Fire Pass exposes its Kimi K2.6 Turbo subscription through a dedicated router
5
+ * endpoint at `accounts/fireworks/routers/<id>` rather than the `models/` namespace.
6
+ * We keep a friendly public id (e.g. `kimi-k2.6-turbo`) in the catalog and translate
7
+ * to the wire form (`accounts/fireworks/routers/kimi-k2p6-turbo`) at request time.
8
+ */
9
+ export declare function toFirepassPublicModelId(modelId: string): string;
10
+ export declare function toFirepassWireModelId(modelId: string): string;
@@ -0,0 +1 @@
1
+ export declare const loginFirepass: (options: import("./types").OAuthController) => Promise<string>;
@@ -7,7 +7,7 @@ export type OAuthCredentials = {
7
7
  email?: string;
8
8
  accountId?: string;
9
9
  };
10
- export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "fireworks" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
10
+ export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
11
11
  export type OAuthProviderId = OAuthProvider | (string & {});
12
12
  export type OAuthPrompt = {
13
13
  message: string;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.1.6",
4
+ "version": "15.1.8",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -43,7 +43,7 @@
43
43
  "dependencies": {
44
44
  "@anthropic-ai/sdk": "^0.94.0",
45
45
  "@bufbuild/protobuf": "^2.12.0",
46
- "@oh-my-pi/pi-utils": "15.1.6",
46
+ "@oh-my-pi/pi-utils": "15.1.8",
47
47
  "openai": "^6.36.0",
48
48
  "partial-json": "^0.1.7",
49
49
  "zod": "4.4.3"
@@ -1344,6 +1344,12 @@ export class AuthStorage {
1344
1344
  await saveApiKeyCredential(apiKey);
1345
1345
  return;
1346
1346
  }
1347
+ case "firepass": {
1348
+ const { loginFirepass } = await import("./utils/oauth/firepass");
1349
+ const apiKey = await loginFirepass(ctrl);
1350
+ await saveApiKeyCredential(apiKey);
1351
+ return;
1352
+ }
1347
1353
  case "zai": {
1348
1354
  const { loginZai } = await import("./utils/oauth/zai");
1349
1355
  const apiKey = await loginZai(ctrl);
@@ -17,6 +17,10 @@ interface CacheRow {
17
17
  models: string;
18
18
  }
19
19
 
20
+ interface TableInfoRow {
21
+ name: string;
22
+ }
23
+
20
24
  interface CacheEntry<TApi extends Api = Api> {
21
25
  models: Model<TApi>[];
22
26
  fresh: boolean;
@@ -55,11 +59,21 @@ function getDb(dbPath?: string): Database {
55
59
  models TEXT NOT NULL
56
60
  )
57
61
  `);
62
+ migrateCacheSchema(db);
63
+
58
64
  sharedDb = db;
59
65
  sharedDbPath = resolvedPath;
60
66
  return db;
61
67
  }
62
68
 
69
+ function migrateCacheSchema(db: Database): void {
70
+ const columns = db.prepare("PRAGMA table_info(model_cache)").all() as TableInfoRow[];
71
+ if (!columns.some(column => column.name === "static_fingerprint")) {
72
+ db.run("ALTER TABLE model_cache ADD COLUMN static_fingerprint TEXT NOT NULL DEFAULT ''");
73
+ }
74
+ db.run("UPDATE model_cache SET version = ? WHERE version = 2", [CACHE_SCHEMA_VERSION]);
75
+ }
76
+
63
77
  export function readModelCache<TApi extends Api>(
64
78
  providerId: string,
65
79
  ttlMs: number,
package/src/models.json CHANGED
@@ -5027,6 +5027,33 @@
5027
5027
  }
5028
5028
  }
5029
5029
  },
5030
+ "firepass": {
5031
+ "kimi-k2.6-turbo": {
5032
+ "id": "kimi-k2.6-turbo",
5033
+ "name": "Kimi K2.6 Turbo (Fire Pass)",
5034
+ "api": "openai-completions",
5035
+ "provider": "firepass",
5036
+ "baseUrl": "https://api.fireworks.ai/inference/v1",
5037
+ "reasoning": true,
5038
+ "input": [
5039
+ "text",
5040
+ "image"
5041
+ ],
5042
+ "cost": {
5043
+ "input": 0,
5044
+ "output": 0,
5045
+ "cacheRead": 0,
5046
+ "cacheWrite": 0
5047
+ },
5048
+ "contextWindow": 262144,
5049
+ "maxTokens": 65536,
5050
+ "thinking": {
5051
+ "mode": "effort",
5052
+ "minLevel": "minimal",
5053
+ "maxLevel": "xhigh"
5054
+ }
5055
+ }
5056
+ },
5030
5057
  "fireworks": {
5031
5058
  "deepseek-v4-pro": {
5032
5059
  "id": "deepseek-v4-pro",
@@ -14,6 +14,7 @@ import {
14
14
  cerebrasModelManagerOptions,
15
15
  cloudflareAiGatewayModelManagerOptions,
16
16
  deepseekModelManagerOptions,
17
+ firepassModelManagerOptions,
17
18
  fireworksModelManagerOptions,
18
19
  githubCopilotModelManagerOptions,
19
20
  groqModelManagerOptions,
@@ -152,6 +153,7 @@ export const PROVIDER_DESCRIPTORS: readonly ProviderDescriptor[] = [
152
153
  config => fireworksModelManagerOptions(config),
153
154
  catalog("Fireworks", ["FIREWORKS_API_KEY"]),
154
155
  ),
156
+ descriptor("firepass", "kimi-k2.6-turbo", config => firepassModelManagerOptions(config)),
155
157
  descriptor("xai", "grok-4-fast-non-reasoning", config => xaiModelManagerOptions(config)),
156
158
  catalogDescriptor(
157
159
  "deepseek",
@@ -692,6 +692,30 @@ export function fireworksModelManagerOptions(
692
692
  };
693
693
  }
694
694
 
695
+ // ---------------------------------------------------------------------------
696
+ // 7.6 Fire Pass (Fireworks Kimi K2.6 Turbo subscription)
697
+ // ---------------------------------------------------------------------------
698
+
699
+ export interface FirepassModelManagerConfig {
700
+ apiKey?: string;
701
+ baseUrl?: string;
702
+ }
703
+
704
+ /**
705
+ * Fire Pass is a Fireworks subscription product that exposes a single router
706
+ * model (Kimi K2.6 Turbo) under `accounts/fireworks/routers/kimi-k2p6-turbo`.
707
+ * The dedicated `fpk_…` keys do not authorize `/v1/models`, so this manager
708
+ * never performs dynamic discovery — the bundled catalog entry is canonical.
709
+ * See https://docs.fireworks.ai/firepass.
710
+ */
711
+ export function firepassModelManagerOptions(
712
+ _config?: FirepassModelManagerConfig,
713
+ ): ModelManagerOptions<"openai-completions"> {
714
+ return {
715
+ providerId: "firepass",
716
+ };
717
+ }
718
+
695
719
  // ---------------------------------------------------------------------------
696
720
  // 7. Mistral
697
721
  // ---------------------------------------------------------------------------
@@ -2083,18 +2107,26 @@ const MODELS_DEV_PROVIDER_DESCRIPTORS_CORE: readonly ModelsDevProviderDescriptor
2083
2107
  // ids are kept off the catalog until the issue thread asks for them.
2084
2108
  filterModel: (id, m) => m.tool_call === true && id.startsWith("deepseek-v4"),
2085
2109
  compat: {
2086
- // xhigh maps to DeepSeek's `max` reasoning_effort (#830 thread).
2110
+ // DeepSeek V4 only accepts `high`/`max`; map lower OMP levels upward so
2111
+ // subagent "minimal" turns stay in documented thinking mode instead of
2112
+ // sending unsupported effort strings.
2113
+ supportsDeveloperRole: false,
2087
2114
  supportsReasoningEffort: true,
2088
- reasoningEffortMap: { xhigh: "max" },
2089
- // `tool_choice` returns 400 against DeepSeek when reasoning_effort is set
2090
- // (per the issue thread). Tool calls still work without the parameter.
2115
+ reasoningEffortMap: { minimal: "high", low: "high", medium: "high", high: "high", xhigh: "max" },
2116
+ maxTokensField: "max_tokens",
2117
+ // DeepSeek V4 thinking mode rejects the `tool_choice` control parameter.
2118
+ // Tool calls still work without it; the API defaults to auto when tools exist.
2091
2119
  supportsToolChoice: false,
2120
+ // DeepSeek V4's OpenAI format docs enable thinking with both the toggle and
2121
+ // reasoning_effort. Keep the toggle explicit for built-in models.
2122
+ extraBody: { thinking: { type: "enabled" } },
2092
2123
  // DeepSeek emits chain-of-thought via `reasoning_content` and requires it
2093
2124
  // to round-trip on assistant tool-call messages so the model can resume
2094
2125
  // from prior thinking (interleaved.field=reasoning_content on models.dev,
2095
2126
  // matches the kimi/openrouter handling already in detectCompat).
2096
2127
  reasoningContentField: "reasoning_content",
2097
2128
  requiresReasoningContentForToolCalls: true,
2129
+ requiresAssistantContentForToolCalls: true,
2098
2130
  },
2099
2131
  }),
2100
2132
  ];
@@ -32,6 +32,7 @@ import type {
32
32
  Model,
33
33
  ProviderSessionState,
34
34
  RedactedThinkingContent,
35
+ ServiceTier,
35
36
  SimpleStreamOptions,
36
37
  StopReason,
37
38
  StreamFunction,
@@ -43,6 +44,7 @@ import type {
43
44
  ToolResultMessage,
44
45
  Usage,
45
46
  } from "../types";
47
+ import { resolveServiceTier } from "../types";
46
48
  import {
47
49
  isAnthropicOAuthToken,
48
50
  isRecord,
@@ -111,6 +113,7 @@ const claudeCodeBetaDefaults = [
111
113
  ];
112
114
  const fineGrainedToolStreamingBeta = "fine-grained-tool-streaming-2025-05-14";
113
115
  const interleavedThinkingBeta = "interleaved-thinking-2025-05-14";
116
+ const fastModeBeta = "fast-mode-2026-02-01";
114
117
 
115
118
  function getHeaderCaseInsensitive(headers: Record<string, string> | undefined, headerName: string): string | undefined {
116
119
  if (!headers) return undefined;
@@ -224,13 +227,16 @@ const ANTHROPIC_PROVIDER_SESSION_STATE_KEY = "anthropic-messages";
224
227
 
225
228
  type AnthropicProviderSessionState = ProviderSessionState & {
226
229
  strictToolsDisabled: boolean;
230
+ fastModeDisabled: boolean;
227
231
  };
228
232
 
229
233
  function createAnthropicProviderSessionState(): AnthropicProviderSessionState {
230
234
  const state: AnthropicProviderSessionState = {
231
235
  strictToolsDisabled: false,
236
+ fastModeDisabled: false,
232
237
  close: () => {
233
238
  state.strictToolsDisabled = false;
239
+ state.fastModeDisabled = false;
234
240
  },
235
241
  };
236
242
  return state;
@@ -249,6 +255,23 @@ function getAnthropicProviderSessionState(
249
255
  return created;
250
256
  }
251
257
 
258
+ /**
259
+ * Clears the in-session "server rejected fast mode" sticky flag. Call when the
260
+ * caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
261
+ * `/fast on` after a previous turn auto-disabled it) so the next request
262
+ * actually carries `speed: "fast"` again. No-op when the map or state entry
263
+ * hasn't been materialized yet.
264
+ */
265
+ export function clearAnthropicFastModeFallback(
266
+ providerSessionState: Map<string, ProviderSessionState> | undefined,
267
+ ): void {
268
+ if (!providerSessionState) return;
269
+ const state = providerSessionState.get(ANTHROPIC_PROVIDER_SESSION_STATE_KEY) as
270
+ | AnthropicProviderSessionState
271
+ | undefined;
272
+ if (state) state.fastModeDisabled = false;
273
+ }
274
+
252
275
  function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
253
276
  if (extractHttpStatusFromError(error) !== 400) return false;
254
277
  const message = error instanceof Error ? error.message : String(error);
@@ -258,11 +281,45 @@ function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
258
281
  return /invalid_request_error/i.test(message) && (isStrictGrammarTooLarge || isSchemaCompilationTooComplex);
259
282
  }
260
283
 
284
+ export function isAnthropicFastModeUnsupportedError(error: unknown): boolean {
285
+ const status = extractHttpStatusFromError(error);
286
+ if (status !== 400 && status !== 429) return false;
287
+ const message = error instanceof Error ? error.message : String(error);
288
+ // 400 invalid_request_error — model doesn't accept `speed` at all.
289
+ // Observed: "'claude-opus-4-5-20251101' does not support the `speed` parameter."
290
+ // Stay tolerant of phrasing drift ("is not supported", quoted vs backticked field).
291
+ if (
292
+ status === 400 &&
293
+ /invalid_request_error/i.test(message) &&
294
+ /\bspeed\b/i.test(message) &&
295
+ /not support/i.test(message)
296
+ ) {
297
+ return true;
298
+ }
299
+ // 429 rate_limit_error — account lacks the extra-usage entitlement fast mode requires.
300
+ // Observed: "Extra usage is required for fast mode."
301
+ if (status === 429 && /rate_limit_error/i.test(message) && /fast mode/i.test(message)) {
302
+ return true;
303
+ }
304
+ return false;
305
+ }
306
+
261
307
  function hasStrictAnthropicTools(params: MessageCreateParamsStreaming): boolean {
262
308
  const tools = params.tools as Array<{ strict?: unknown }> | undefined;
263
309
  return tools?.some(tool => tool.strict === true) ?? false;
264
310
  }
265
311
 
312
+ /**
313
+ * `speed` lives on `BetaMessageCreateParams` (client.beta.messages) but this
314
+ * provider posts via `client.messages.create`, whose param type doesn't
315
+ * include it. This alias narrows the cast to one place.
316
+ */
317
+ type ParamsWithSpeed = MessageCreateParamsStreaming & { speed?: "fast" };
318
+
319
+ function dropAnthropicFastMode(params: MessageCreateParamsStreaming): void {
320
+ delete (params as ParamsWithSpeed).speed;
321
+ }
322
+
266
323
  function dropAnthropicStrictTools(params: MessageCreateParamsStreaming): void {
267
324
  const tools = params.tools as Array<{ strict?: unknown }> | undefined;
268
325
  if (!tools) return;
@@ -526,6 +583,16 @@ export interface AnthropicOptions extends StreamOptions {
526
583
  interleavedThinking?: boolean;
527
584
  toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
528
585
  betas?: string[] | string;
586
+ /**
587
+ * Realization of `serviceTier: "priority"` on Anthropic models. When
588
+ * `"priority"`, sets `speed: "fast"` on the request and appends the
589
+ * `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
590
+ * with `invalid_request_error`, which triggers an in-provider one-shot
591
+ * fallback (see `fastModeDisabled` provider state).
592
+ *
593
+ * Other `ServiceTier` values are currently ignored on this provider.
594
+ */
595
+ serviceTier?: ServiceTier;
529
596
  /** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
530
597
  isOAuth?: boolean;
531
598
  /**
@@ -961,10 +1028,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
961
1028
  } else {
962
1029
  const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
963
1030
 
1031
+ const extraBetas = normalizeExtraBetas(options?.betas);
1032
+ const wantsAnthropicPriority = resolveServiceTier(options?.serviceTier, model.provider) === "priority";
1033
+ if (wantsAnthropicPriority && !extraBetas.includes(fastModeBeta)) {
1034
+ extraBetas.push(fastModeBeta);
1035
+ }
1036
+
964
1037
  const created = createClient(model, {
965
1038
  model,
966
1039
  apiKey,
967
- extraBetas: normalizeExtraBetas(options?.betas),
1040
+ extraBetas,
968
1041
  stream: true,
969
1042
  interleavedThinking: options?.interleavedThinking ?? true,
970
1043
  headers: options?.headers,
@@ -984,15 +1057,19 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
984
1057
  let disableStrictTools =
985
1058
  (providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
986
1059
  let strictFallbackErrorMessage: string | undefined;
1060
+ let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
987
1061
  const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
988
1062
  let nextParams = buildParams(model, baseUrl, context, isOAuthToken, options, disableStrictTools);
1063
+ if (disableStrictTools) {
1064
+ dropAnthropicStrictTools(nextParams);
1065
+ }
1066
+ if (dropFastMode) {
1067
+ dropAnthropicFastMode(nextParams);
1068
+ }
989
1069
  const replacementPayload = await options?.onPayload?.(nextParams, model);
990
1070
  if (replacementPayload !== undefined) {
991
1071
  nextParams = replacementPayload as typeof nextParams;
992
1072
  }
993
- if (disableStrictTools) {
994
- dropAnthropicStrictTools(nextParams);
995
- }
996
1073
  rawRequestDump = {
997
1074
  provider: model.provider,
998
1075
  api: output.api,
@@ -1284,6 +1361,30 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1284
1361
  firstTokenTime = undefined;
1285
1362
  continue;
1286
1363
  }
1364
+ if (
1365
+ !dropFastMode &&
1366
+ resolveServiceTier(options?.serviceTier, model.provider) === "priority" &&
1367
+ firstTokenTime === undefined &&
1368
+ isAnthropicFastModeUnsupportedError(streamFailure)
1369
+ ) {
1370
+ logger.debug("anthropic: fast mode unsupported, retrying without speed", {
1371
+ model: model.id,
1372
+ error: streamFailure instanceof Error ? streamFailure.message : String(streamFailure),
1373
+ });
1374
+ if (providerSessionState) {
1375
+ providerSessionState.fastModeDisabled = true;
1376
+ }
1377
+ dropFastMode = true;
1378
+ params = await prepareParams();
1379
+ providerRetryAttempt = 0;
1380
+ output.content.length = 0;
1381
+ output.responseId = undefined;
1382
+ output.providerPayload = undefined;
1383
+ output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
1384
+ output.stopReason = "stop";
1385
+ firstTokenTime = undefined;
1386
+ continue;
1387
+ }
1287
1388
  const isTransientEnvelopeFailure =
1288
1389
  isTransientStreamParseError(streamFailure) || isTransientStreamEnvelopeError(streamFailure);
1289
1390
  const canRetryTransientEnvelopeFailure = isTransientEnvelopeFailure && !streamedReplayUnsafeContent;
@@ -1315,6 +1416,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1315
1416
 
1316
1417
  output.duration = Date.now() - startTime;
1317
1418
  if (firstTokenTime) output.ttft = firstTokenTime - startTime;
1419
+ if (dropFastMode && resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
1420
+ output.disabledFeatures = [...(output.disabledFeatures ?? []), "priority"];
1421
+ }
1318
1422
  stream.push({ type: "done", reason: output.stopReason, message: output });
1319
1423
  stream.end();
1320
1424
  } catch (error) {
@@ -1862,6 +1966,10 @@ function buildParams(
1862
1966
  params.metadata = { user_id: metadataUserId };
1863
1967
  }
1864
1968
 
1969
+ if (resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
1970
+ (params as ParamsWithSpeed).speed = "fast";
1971
+ }
1972
+
1865
1973
  if (options?.toolChoice) {
1866
1974
  if (typeof options.toolChoice === "string") {
1867
1975
  params.tool_choice = { type: options.toolChoice };
@@ -2280,7 +2388,12 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
2280
2388
  result.properties = normalizedProperties;
2281
2389
  }
2282
2390
  if (isRecord(result.additionalProperties)) {
2283
- result.additionalProperties = normalizeAnthropicToolSchema(result.additionalProperties);
2391
+ const normalized = normalizeAnthropicToolSchema(result.additionalProperties);
2392
+ if (isRecord(normalized) && Object.keys(normalized).length === 0) {
2393
+ result.additionalProperties = true;
2394
+ } else {
2395
+ result.additionalProperties = normalized;
2396
+ }
2284
2397
  }
2285
2398
  if (Array.isArray(result.items)) {
2286
2399
  result.items = result.items.map(item => normalizeAnthropicToolSchema(item));
@@ -11,7 +11,7 @@ import type {
11
11
  Context,
12
12
  ImageContent,
13
13
  Message,
14
- ServiceTier,
14
+ ResolvedServiceTier,
15
15
  StopReason,
16
16
  TextContent,
17
17
  Tool,
@@ -36,7 +36,7 @@ function isReasoningEffort(value: unknown): value is ReasoningEffort {
36
36
  return value === "minimal" || value === "low" || value === "medium" || value === "high" || value === "xhigh";
37
37
  }
38
38
 
39
- function isServiceTier(value: unknown): value is ServiceTier {
39
+ function isServiceTier(value: unknown): value is ResolvedServiceTier {
40
40
  return value === "auto" || value === "default" || value === "flex" || value === "scale" || value === "priority";
41
41
  }
42
42
 
@@ -29,10 +29,10 @@ import {
29
29
  type FetchImpl,
30
30
  type Model,
31
31
  type ProviderSessionState,
32
+ resolveServiceTier,
32
33
  type ServiceTier,
33
34
  type StreamFunction,
34
35
  type StreamOptions,
35
- shouldSendServiceTier,
36
36
  type TextContent,
37
37
  type ThinkingContent,
38
38
  type Tool,
@@ -590,8 +590,9 @@ async function buildTransformedCodexRequestBody(
590
590
  if (options?.repetitionPenalty !== undefined) {
591
591
  params.repetition_penalty = options.repetitionPenalty;
592
592
  }
593
- if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
594
- params.service_tier = options.serviceTier;
593
+ const resolvedServiceTier = resolveServiceTier(options?.serviceTier, model.provider);
594
+ if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
595
+ params.service_tier = resolvedServiceTier;
595
596
  }
596
597
  if (context.tools && context.tools.length > 0) {
597
598
  params.tools = convertOpenAICodexResponsesTools(context.tools, model);
@@ -52,7 +52,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
52
52
  const isCerebras = provider === "cerebras" || baseUrl.includes("cerebras.ai");
53
53
  const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
54
54
  const isKilo = provider === "kilo" || baseUrl.includes("api.kilo.ai");
55
- const isKimiModel = model.id.includes("moonshotai/kimi") || /^kimi[-.]/i.test(model.id);
55
+ const isKimiModel = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
56
56
  const isMoonshotKimi =
57
57
  isKimiModel &&
58
58
  (provider === "moonshot" ||
@@ -79,7 +79,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
79
79
  baseUrl.includes("deepseek.com") ||
80
80
  lowerId.includes("deepseek") ||
81
81
  lowerName.includes("deepseek");
82
-
82
+ const isDirectDeepseekApi = provider === "deepseek" || baseUrl.includes("api.deepseek.com");
83
+ const isDirectDeepseekReasoning = isDirectDeepseekApi && isDeepseekFamily && Boolean(model.reasoning);
83
84
  const isNonStandard =
84
85
  isCerebras ||
85
86
  provider === "xai" ||
@@ -102,7 +103,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
102
103
  provider === "mistral" ||
103
104
  baseUrl.includes("mistral.ai") ||
104
105
  baseUrl.includes("chutes.ai") ||
105
- baseUrl.includes("fireworks.ai");
106
+ baseUrl.includes("fireworks.ai") ||
107
+ isDirectDeepseekApi;
106
108
  const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
107
109
  const isMistral = provider === "mistral" || baseUrl.includes("mistral.ai");
108
110
 
@@ -162,7 +164,13 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
162
164
  xhigh: "default",
163
165
  } satisfies Partial<Record<OpenAIReasoningEffort, string>>)
164
166
  : isDeepseekFamily && model.reasoning
165
- ? { xhigh: "max" }
167
+ ? ({
168
+ minimal: "high",
169
+ low: "high",
170
+ medium: "high",
171
+ high: "high",
172
+ xhigh: "max",
173
+ } satisfies Partial<Record<OpenAIReasoningEffort, string>>)
166
174
  : {};
167
175
 
168
176
  return {
@@ -173,8 +181,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
173
181
  reasoningEffortMap,
174
182
  supportsUsageInStreaming: !isCerebras,
175
183
  disableReasoningOnForcedToolChoice: isKimiModel || isAnthropicModel,
176
- disableReasoningOnToolChoice: isDeepseekFamily && Boolean(model.reasoning),
177
- supportsToolChoice: true,
184
+ disableReasoningOnToolChoice: isDeepseekFamily && Boolean(model.reasoning) && !isOpenRouter,
185
+ supportsToolChoice: !isDirectDeepseekReasoning,
178
186
  maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
179
187
  requiresToolResultName: isMistral,
180
188
  requiresAssistantAfterToolResult: false,
@@ -204,11 +212,11 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
204
212
  // DeepSeek V4 rejects synthetic reasoning_content placeholders (".") on tool-call turns.
205
213
  // Kimi and OpenRouter accept them when actual reasoning is unavailable.
206
214
  allowsSyntheticReasoningContentForToolCalls: !isDeepseekFamily || !model.reasoning,
207
- requiresAssistantContentForToolCalls: isKimiModel,
215
+ requiresAssistantContentForToolCalls: isKimiModel || isDirectDeepseekReasoning,
208
216
  openRouterRouting: undefined,
209
217
  vercelGatewayRouting: undefined,
210
218
  supportsStrictMode: detectStrictModeSupport(provider, baseUrl),
211
- extraBody: undefined,
219
+ extraBody: isDirectDeepseekReasoning ? { thinking: { type: "enabled" } } : undefined,
212
220
  toolStrictMode: isCerebras ? "all_strict" : "mixed",
213
221
  };
214
222
  }
@@ -235,7 +243,7 @@ export function resolveOpenAICompat(
235
243
  supportsMultipleSystemMessages:
236
244
  model.compat.supportsMultipleSystemMessages ?? detected.supportsMultipleSystemMessages,
237
245
  supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
238
- reasoningEffortMap: model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
246
+ reasoningEffortMap: { ...detected.reasoningEffortMap, ...(model.compat.reasoningEffortMap ?? {}) },
239
247
  supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
240
248
  supportsToolChoice: model.compat.supportsToolChoice ?? detected.supportsToolChoice,
241
249
  maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
@@ -259,7 +267,7 @@ export function resolveOpenAICompat(
259
267
  openRouterRouting: model.compat.openRouterRouting ?? detected.openRouterRouting,
260
268
  vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
261
269
  supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
262
- extraBody: model.compat.extraBody,
270
+ extraBody: model.compat.extraBody ?? detected.extraBody,
263
271
  toolStrictMode: model.compat.toolStrictMode ?? detected.toolStrictMode,
264
272
  };
265
273
  }
@@ -22,6 +22,7 @@ import {
22
22
  type Model,
23
23
  type OpenAICompat,
24
24
  type ProviderSessionState,
25
+ resolveServiceTier,
25
26
  type ServiceTier,
26
27
  type StopReason,
27
28
  type StreamFunction,
@@ -37,7 +38,7 @@ import {
37
38
  import { normalizeSystemPrompts } from "../utils";
38
39
  import { createAbortSourceTracker } from "../utils/abort";
39
40
  import { AssistantMessageEventStream } from "../utils/event-stream";
40
- import { toFireworksWireModelId } from "../utils/fireworks-model-id";
41
+ import { toFirepassWireModelId, toFireworksWireModelId } from "../utils/fireworks-model-id";
41
42
  import {
42
43
  type CapturedHttpErrorResponse,
43
44
  finalizeErrorMessage,
@@ -486,7 +487,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
486
487
  }
487
488
  stream.push({ type: "start", partial: output });
488
489
 
489
- const parseMiniMaxThinkTags = model.provider === "minimax-code";
490
+ const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
490
491
  // Some OpenAI-compatible DeepSeek hosts (including NVIDIA NIM and DeepSeek's
491
492
  // native API) leak chat-template tool-call markers in `delta.content` even
492
493
  // though tool calls are also surfaced structurally. Strip the leaked markers
@@ -1037,13 +1038,23 @@ function buildParams(
1037
1038
  maybeAddOpenRouterAnthropicCacheControl(model, messages);
1038
1039
  const supportsReasoningParams = model.provider !== "github-copilot";
1039
1040
 
1040
- // Kimi (including via OpenRouter) calculates TPM rate limits based on max_tokens, not actual output.
1041
- // Always send max_tokens to avoid their high default causing rate limit issues.
1041
+ // Kimi (including via OpenRouter and Fireworks router-form IDs such as
1042
+ // `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
1043
+ // max_tokens, not actual output. The official Kimi K2 model guidance
1044
+ // (https://docs.fireworks.ai/models/kimi-k2) also requires `max_tokens` for
1045
+ // every call since the family can otherwise emit very long reasoning traces
1046
+ // before the final answer. Always send max_tokens — match the same
1047
+ // Kimi-family regex used by the compat detector.
1042
1048
  // Note: Direct kimi-code provider is handled by the dedicated Kimi provider in kimi.ts.
1043
- const isKimi = model.id.includes("moonshotai/kimi");
1049
+ const isKimi = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
1044
1050
  const effectiveMaxTokens = options?.maxTokens ?? (isKimi ? model.maxTokens : undefined);
1045
1051
 
1046
- const requestModelId = model.provider === "fireworks" ? toFireworksWireModelId(model.id) : model.id;
1052
+ const requestModelId =
1053
+ model.provider === "fireworks"
1054
+ ? toFireworksWireModelId(model.id)
1055
+ : model.provider === "firepass"
1056
+ ? toFirepassWireModelId(model.id)
1057
+ : model.id;
1047
1058
  const params: OpenAICompletionsParams = {
1048
1059
  model: requestModelId,
1049
1060
  messages,
@@ -1093,7 +1104,10 @@ function buildParams(
1093
1104
  params.frequency_penalty = options.frequencyPenalty;
1094
1105
  }
1095
1106
  if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
1096
- params.service_tier = options.serviceTier;
1107
+ const resolved = resolveServiceTier(options?.serviceTier, model.provider);
1108
+ if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
1109
+ params.service_tier = resolved;
1110
+ }
1097
1111
  }
1098
1112
 
1099
1113
  if (context.tools) {
@@ -17,6 +17,7 @@ import {
17
17
  type AssistantMessage,
18
18
  type ImageContent,
19
19
  type Model,
20
+ resolveServiceTier,
20
21
  type ServiceTier,
21
22
  type StopReason,
22
23
  type StreamOptions,
@@ -651,7 +652,10 @@ export function applyCommonResponsesSamplingParams<P extends CommonResponsesPara
651
652
  if (options?.presencePenalty !== undefined) params.presence_penalty = options.presencePenalty;
652
653
  if (options?.repetitionPenalty !== undefined) params.repetition_penalty = options.repetitionPenalty;
653
654
  if (shouldSendServiceTier(options?.serviceTier, provider)) {
654
- params.service_tier = options.serviceTier;
655
+ const resolved = resolveServiceTier(options?.serviceTier, provider);
656
+ if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
657
+ params.service_tier = resolved;
658
+ }
655
659
  }
656
660
  }
657
661
 
package/src/stream.ts CHANGED
@@ -83,6 +83,7 @@ const serviceProviderMap: Record<string, KeyResolver> = {
83
83
  cerebras: "CEREBRAS_API_KEY",
84
84
  xai: "XAI_API_KEY",
85
85
  fireworks: "FIREWORKS_API_KEY",
86
+ firepass: "FIREPASS_API_KEY",
86
87
  openrouter: "OPENROUTER_API_KEY",
87
88
  kilo: "KILO_API_KEY",
88
89
  "vercel-ai-gateway": "AI_GATEWAY_API_KEY",
@@ -580,6 +581,7 @@ function mapOptionsForApi<TApi extends Api>(
580
581
  thinkingEnabled: false,
581
582
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
582
583
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
584
+ serviceTier: options?.serviceTier,
583
585
  });
584
586
  }
585
587
 
@@ -590,6 +592,7 @@ function mapOptionsForApi<TApi extends Api>(
590
592
  thinkingEnabled: false,
591
593
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
592
594
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
595
+ serviceTier: options?.serviceTier,
593
596
  });
594
597
  }
595
598
 
@@ -603,6 +606,7 @@ function mapOptionsForApi<TApi extends Api>(
603
606
  effort,
604
607
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
605
608
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
609
+ serviceTier: options?.serviceTier,
606
610
  });
607
611
  }
608
612
 
@@ -613,6 +617,7 @@ function mapOptionsForApi<TApi extends Api>(
613
617
  thinkingBudgetTokens: thinkingBudget,
614
618
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
615
619
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
620
+ serviceTier: options?.serviceTier,
616
621
  });
617
622
  }
618
623
 
@@ -631,6 +636,7 @@ function mapOptionsForApi<TApi extends Api>(
631
636
  thinkingEnabled: false,
632
637
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
633
638
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
639
+ serviceTier: options?.serviceTier,
634
640
  });
635
641
  } else {
636
642
  return castApi<"anthropic-messages">({
@@ -640,6 +646,7 @@ function mapOptionsForApi<TApi extends Api>(
640
646
  thinkingBudgetTokens: thinkingBudget,
641
647
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
642
648
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
649
+ serviceTier: options?.serviceTier,
643
650
  });
644
651
  }
645
652
  }
package/src/types.ts CHANGED
@@ -110,6 +110,7 @@ export type KnownProvider =
110
110
  | "minimax-code-cn"
111
111
  | "github-copilot"
112
112
  | "fireworks"
113
+ | "firepass"
113
114
  | "gitlab-duo"
114
115
  | "cursor"
115
116
  | "deepseek"
@@ -162,29 +163,78 @@ export type ToolChoice =
162
163
  // Base options all providers share
163
164
  export type CacheRetention = "none" | "short" | "long";
164
165
 
165
- /** OpenAI service tier for processing priority. Only applies to OpenAI-compatible APIs. */
166
- export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority";
166
+ /**
167
+ * Service tier hint for processing priority / cost control.
168
+ *
169
+ * The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
170
+ * `"priority"`) are passed through to providers that understand them
171
+ * (OpenAI's `service_tier` field directly; Anthropic translates
172
+ * `"priority"` into `speed: "fast"` on supported Opus models).
173
+ *
174
+ * The scoped values target a specific provider family and behave as the
175
+ * unscoped value on the matching provider, or `undefined` everywhere else.
176
+ * They let users opt into priority on one family without paying premium
177
+ * costs on the other when switching models mid-session.
178
+ *
179
+ * - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
180
+ * - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
181
+ */
182
+ export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
167
183
 
168
- export function shouldSendServiceTier(
169
- serviceTier?: ServiceTier | null,
170
- provider?: Provider,
171
- ): serviceTier is "flex" | "scale" | "priority" {
172
- if (provider !== "openai" && provider !== "openai-codex") {
173
- return false;
184
+ /** Resolved tier — one of the values that providers actually consume on the wire. */
185
+ export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
186
+
187
+ /**
188
+ * Resolves a possibly scoped `ServiceTier` to the effective tier for the
189
+ * given provider. Scoped values match their target family and otherwise
190
+ * collapse to `undefined`; unscoped values pass through unchanged.
191
+ */
192
+ export function resolveServiceTier(
193
+ serviceTier: ServiceTier | null | undefined,
194
+ provider: Provider | undefined,
195
+ ): ResolvedServiceTier | undefined {
196
+ if (!serviceTier) return undefined;
197
+ switch (serviceTier) {
198
+ case "openai-only":
199
+ return provider === "openai" || provider === "openai-codex" ? "priority" : undefined;
200
+ case "claude-only":
201
+ return provider === "anthropic" ? "priority" : undefined;
202
+ default:
203
+ return serviceTier;
174
204
  }
175
- return serviceTier === "flex" || serviceTier === "scale" || serviceTier === "priority";
176
205
  }
177
206
 
178
207
  /**
179
- * Premium-request weight contributed by sending a `priority` service tier to
180
- * a provider that supports it. Mirrors GitHub Copilot's `premiumRequests`
181
- * accounting so the "premium requests" stat aggregates priority traffic too.
208
+ * True when the (possibly scoped) tier should be sent as OpenAI's
209
+ * `service_tier` request field for the given provider. Non-OpenAI
210
+ * providers, unsupported tiers (`"auto"`, `"default"`), and scope
211
+ * mismatches all return false.
212
+ */
213
+ export function shouldSendServiceTier(
214
+ serviceTier: ServiceTier | null | undefined,
215
+ provider: Provider | undefined,
216
+ ): boolean {
217
+ if (provider !== "openai" && provider !== "openai-codex") return false;
218
+ const resolved = resolveServiceTier(serviceTier, provider);
219
+ return resolved === "flex" || resolved === "scale" || resolved === "priority";
220
+ }
221
+
222
+ /**
223
+ * Premium-request weight contributed by sending priority to a provider
224
+ * that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
225
+ * so the "premium requests" stat aggregates priority traffic across the
226
+ * OpenAI family and Anthropic fast-mode realizations.
182
227
  *
183
- * Returns 1 per priority request, 0 otherwise. Non-priority tiers (`flex`,
184
- * `scale`) and providers that ignore `service_tier` always return 0.
228
+ * Returns 1 per resolved priority request, 0 otherwise.
185
229
  */
186
- export function getPriorityPremiumRequests(serviceTier?: ServiceTier | null, provider?: Provider): number {
187
- return shouldSendServiceTier(serviceTier, provider) && serviceTier === "priority" ? 1 : 0;
230
+ export function getPriorityPremiumRequests(
231
+ serviceTier: ServiceTier | null | undefined,
232
+ provider: Provider | undefined,
233
+ ): number {
234
+ if (resolveServiceTier(serviceTier, provider) !== "priority") return 0;
235
+ // Only providers that realize `priority` on the wire bill the user.
236
+ // Everywhere else, the field is silently dropped and nothing is charged.
237
+ return provider === "openai" || provider === "openai-codex" || provider === "anthropic" ? 1 : 0;
188
238
  }
189
239
 
190
240
  export interface ProviderSessionState {
@@ -502,6 +552,14 @@ export interface AssistantMessage {
502
552
  errorMessage?: string;
503
553
  /** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
504
554
  errorStatus?: number;
555
+ /**
556
+ * Stable identifiers for request features the provider silently dropped
557
+ * during this turn (e.g. `"priority"`). Set when a server-side rejection
558
+ * triggered an in-provider fallback retry that succeeded without the
559
+ * feature. Callers can use this to sync user-facing toggles back to the
560
+ * server's actual state.
561
+ */
562
+ disabledFeatures?: string[];
505
563
  /** Provider-specific opaque payload used to reconstruct transport-native history. */
506
564
  providerPayload?: ProviderPayload;
507
565
  timestamp: number; // Unix timestamp in milliseconds
@@ -1,4 +1,5 @@
1
1
  const FIREWORKS_WIRE_PREFIX = "accounts/fireworks/models/";
2
+ const FIREPASS_WIRE_PREFIX = "accounts/fireworks/routers/";
2
3
  const VERSION_SEPARATOR_PATTERN = /(?<=\d)p(?=\d)/g;
3
4
  const VERSION_DOT_PATTERN = /(?<=\d)\.(?=\d)/g;
4
5
 
@@ -11,3 +12,19 @@ export function toFireworksWireModelId(modelId: string): string {
11
12
  const stripped = modelId.startsWith(FIREWORKS_WIRE_PREFIX) ? modelId.slice(FIREWORKS_WIRE_PREFIX.length) : modelId;
12
13
  return `${FIREWORKS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
13
14
  }
15
+
16
+ /**
17
+ * Fire Pass exposes its Kimi K2.6 Turbo subscription through a dedicated router
18
+ * endpoint at `accounts/fireworks/routers/<id>` rather than the `models/` namespace.
19
+ * We keep a friendly public id (e.g. `kimi-k2.6-turbo`) in the catalog and translate
20
+ * to the wire form (`accounts/fireworks/routers/kimi-k2p6-turbo`) at request time.
21
+ */
22
+ export function toFirepassPublicModelId(modelId: string): string {
23
+ const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
24
+ return stripped.replace(VERSION_SEPARATOR_PATTERN, ".");
25
+ }
26
+
27
+ export function toFirepassWireModelId(modelId: string): string {
28
+ const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
29
+ return `${FIREPASS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
30
+ }
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Fire Pass login flow.
3
+ *
4
+ * Fire Pass is a Fireworks subscription product whose dedicated `fpk_…` API
5
+ * keys are scoped to the `accounts/fireworks/routers/kimi-k2p6-turbo` router
6
+ * (Kimi K2.6 Turbo). The key does NOT authorize `/v1/models`, so validation
7
+ * pings the chat completions endpoint with the router id directly.
8
+ * See https://docs.fireworks.ai/firepass.
9
+ */
10
+ import { createApiKeyLogin } from "./api-key-login";
11
+
12
+ export const loginFirepass = createApiKeyLogin({
13
+ providerLabel: "Fire Pass",
14
+ authUrl: "https://app.fireworks.ai/settings/users/api-keys",
15
+ instructions: "Create a dedicated Fire Pass API key in the Fireworks dashboard",
16
+ promptMessage: "Paste your Fire Pass API key",
17
+ placeholder: "fpk_...",
18
+ validation: {
19
+ kind: "chat-completions",
20
+ provider: "Fire Pass",
21
+ baseUrl: "https://api.fireworks.ai/inference/v1",
22
+ model: "accounts/fireworks/routers/kimi-k2p6-turbo",
23
+ },
24
+ });
@@ -55,6 +55,11 @@ const builtInOAuthProviders: OAuthProviderInfo[] = [
55
55
  name: "Fireworks",
56
56
  available: true,
57
57
  },
58
+ {
59
+ id: "firepass",
60
+ name: "Fire Pass (Fireworks Kimi K2.6 Turbo subscription)",
61
+ available: true,
62
+ },
58
63
  {
59
64
  id: "github-copilot",
60
65
  name: "GitHub Copilot",
@@ -301,6 +306,7 @@ export async function refreshOAuthToken(
301
306
  case "opencode-go":
302
307
  case "cerebras":
303
308
  case "fireworks":
309
+ case "firepass":
304
310
  case "nvidia":
305
311
  case "nanogpt":
306
312
  case "synthetic":
@@ -363,10 +369,14 @@ export async function getOAuthApiKey(
363
369
  }
364
370
 
365
371
  if (provider === "perplexity") {
372
+ // Perplexity JWTs usually omit `exp` (server-side sessions). Trust the JWT
373
+ // claim when present; otherwise treat the credential as non-expiring rather
374
+ // than honoring a stale stored `expires` (older logins wrote loginTime+1h).
375
+ const NEVER_EXPIRES = 8.64e15;
366
376
  const normalizedExpires =
367
377
  creds.expires > 0 && creds.expires < 10_000_000_000 ? creds.expires * 1000 : creds.expires;
368
378
  const jwtExpiry = getPerplexityJwtExpiryMs(creds.access);
369
- const expires = jwtExpiry && jwtExpiry > normalizedExpires ? jwtExpiry : normalizedExpires;
379
+ const expires = jwtExpiry ?? Math.max(normalizedExpires, NEVER_EXPIRES);
370
380
  if (expires !== creds.expires) {
371
381
  creds = { ...creds, expires };
372
382
  }
@@ -24,20 +24,26 @@ const APP_USER_AGENT = "Perplexity/641 CFNetwork/1568 Darwin/25.2.0";
24
24
  // JWT helpers
25
25
  // ---------------------------------------------------------------------------
26
26
 
27
- /** Extract expiry from a JWT. Falls back to 1 hour from now. Subtracts 5 min safety margin. */
27
+ /**
28
+ * Extract expiry from a JWT. Perplexity tokens generally lack an `exp` claim
29
+ * (their sessions are server-side and effectively non-expiring from the client's
30
+ * point of view), so we return a far-future sentinel when no `exp` is present.
31
+ * When `exp` IS present, subtract a 5-minute safety margin.
32
+ */
33
+ const NEVER_EXPIRES = 8.64e15; // max safe Date value
28
34
  function getJwtExpiry(token: string): number {
29
35
  try {
30
36
  const parts = token.split(".");
31
- if (parts.length !== 3) return Date.now() + 3600_000;
37
+ if (parts.length !== 3) return NEVER_EXPIRES;
32
38
  const payload = parts[1] ?? "";
33
39
  const decoded = JSON.parse(atob(payload.replace(/-/g, "+").replace(/_/g, "/")));
34
- if (decoded?.exp && typeof decoded.exp === "number") {
40
+ if (typeof decoded?.exp === "number" && Number.isFinite(decoded.exp)) {
35
41
  return decoded.exp * 1000 - 5 * 60_000;
36
42
  }
37
43
  } catch {
38
44
  // Ignore decode errors
39
45
  }
40
- return Date.now() + 3600_000;
46
+ return NEVER_EXPIRES;
41
47
  }
42
48
 
43
49
  /** Build OAuthCredentials from a Perplexity JWT string. */
@@ -15,6 +15,7 @@ export type OAuthProvider =
15
15
  | "cloudflare-ai-gateway"
16
16
  | "cursor"
17
17
  | "fireworks"
18
+ | "firepass"
18
19
  | "github-copilot"
19
20
  | "google-gemini-cli"
20
21
  | "google-antigravity"
@@ -243,8 +243,17 @@ function rewriteZodNode(node: JsonObject, seen: WeakSet<object>): unknown {
243
243
  case "pipe":
244
244
  case "transform": {
245
245
  const inner = walk(unwrapInnerSchema(def), seen);
246
- if (kind === "nullable" && isJsonObject(inner) && typeof inner.type === "string") {
247
- return { ...inner, type: [inner.type, "null"] };
246
+ if (kind === "nullable" && isJsonObject(inner)) {
247
+ if (typeof inner.type === "string") {
248
+ return { ...inner, type: [inner.type, "null"] };
249
+ }
250
+ if (Array.isArray(inner.type)) {
251
+ return (inner.type as string[]).includes("null")
252
+ ? inner
253
+ : { ...inner, type: [...(inner.type as string[]), "null"] };
254
+ }
255
+ // anyOf / allOf / $ref shapes — no scalar `type` field
256
+ return { anyOf: [inner, { type: "null" }] };
248
257
  }
249
258
  return inner;
250
259
  }