@oh-my-pi/pi-ai 15.1.7 → 15.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,24 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.1.9] - 2026-05-21
6
+
7
+ ### Fixed
8
+
9
+ - Fixed Ollama named tool forcing to send only the requested tool when the caller passes a named `toolChoice`, preserving `tool_choice: "required"` while preventing local models from selecting a different tool. ([#1236](https://github.com/can1357/oh-my-pi/issues/1236))
10
+ - Fixed `/btw` (and IRC background replies) returning a `BedrockException` 400 (`The toolConfig field must be defined when using toolUse and toolResult content blocks.`) on LiteLLM → Bedrock once the session has tool-call history. Two source fixes in `buildParams`: (1) `if (context.tools)` → `if (context.tools?.length)` so an explicit `context.tools = []` (the /btw opt-out) never routes through `convertTools` and never emits an empty `"tools"` array; (2) `else if (hasToolHistory(...))` → `else if (context.tools === undefined && hasToolHistory(...))` so the Anthropic-proxy sentinel that injects `tools: []` for tool-history turns is suppressed when the caller explicitly opted out, preventing it from re-introducing the empty array. As defence-in-depth, `tool_choice: "none"` is also dropped when the resolved tools list is missing or empty. ([#1227](https://github.com/can1357/oh-my-pi/issues/1227))
11
+
12
+ ## [15.1.8] - 2026-05-20
13
+ ### Added
14
+
15
+ - Added Fireworks Fire Pass as a separate `firepass` provider with API-key login flow, bundled `kimi-k2.6-turbo` model entry (Kimi K2.6 Turbo), and wire-id translation from the friendly catalog id to the `accounts/fireworks/routers/kimi-k2p6-turbo` router endpoint. Fire Pass keys (`fpk_…`) authorize only the dedicated router and reject `/v1/models`, so login validation pings chat completions against the router id directly. Extended the openai-completions Kimi-family safety net so the firepass entry inherits the per-Fireworks-docs "always send `max_tokens`" default ([Kimi K2 guide](https://docs.fireworks.ai/models/kimi-k2)); the router's accepted `reasoning_effort` set includes `xhigh`, so it is forwarded verbatim rather than remapped. See https://docs.fireworks.ai/firepass.
16
+
17
+ ### Fixed
18
+
19
+ - Fixed DeepSeek V4 direct API requests with tools to keep documented thinking mode instead of dropping reasoning: lower OMP efforts now map to DeepSeek's supported `high`, `tool_choice` is omitted, `thinking: { type: "enabled" }` and `max_tokens` are sent, and partial user `reasoningEffortMap` overrides merge with DeepSeek defaults. ([#1207](https://github.com/can1357/oh-my-pi/issues/1207))
20
+ - Fixed model cache schema v2 databases so offline refreshes preserve cached provider discoveries after upgrading to schema v3 and subsequent online refreshes can overwrite the cache. ([#1219](https://github.com/can1357/oh-my-pi/issues/1219))
21
+ - Fixed Perplexity OAuth credentials being treated as expired one hour after login. `getJwtExpiry` was fabricating `expires = now + 1h` whenever the JWT had no `exp` claim (the common case — Perplexity sessions are server-side). Once the hour elapsed, `getOAuthApiKey` would mark the cred expired and the search provider's loader would silently skip it, surfacing as "logged out". Logins with no `exp` now persist a far-future sentinel; `getOAuthApiKey` also normalizes any stale `expires` written by older builds.
22
+
5
23
  ## [15.1.7] - 2026-05-19
6
24
  ### Added
7
25
 
@@ -11,6 +29,7 @@
11
29
  ### Fixed
12
30
 
13
31
  - Fixed Anthropic fast mode (`serviceTier: "priority"`) looping on 429 `rate_limit_error: "Extra usage is required for fast mode."` for accounts without the extra-usage entitlement. `isAnthropicFastModeUnsupportedError` now matches the 429 phrasing in addition to the 400 `invalid_request_error` "does not support the `speed` parameter" case, so the provider drops `speed: "fast"` on the in-turn retry, sets `providerSessionState.fastModeDisabled` for the remainder of the session, and surfaces `disabledFeatures: ["priority"]` to the caller instead of retrying with the same payload until `PROVIDER_MAX_RETRIES` is exhausted.
32
+ - Fixed MiniMax Coding Plan CN streaming `<think>...</think>` reasoning as visible assistant text. The OpenAI-compatible stream parser now enables the existing MiniMax tag parser for both `minimax-code` and `minimax-code-cn`, so CN responses become structured `thinking` blocks instead of raw text. ([#1203](https://github.com/can1357/oh-my-pi/issues/1203))
14
33
 
15
34
  ## [15.1.6] - 2026-05-19
16
35
 
@@ -63,6 +63,18 @@ export interface FireworksModelManagerConfig {
63
63
  baseUrl?: string;
64
64
  }
65
65
  export declare function fireworksModelManagerOptions(config?: FireworksModelManagerConfig): ModelManagerOptions<"openai-completions">;
66
+ export interface FirepassModelManagerConfig {
67
+ apiKey?: string;
68
+ baseUrl?: string;
69
+ }
70
+ /**
71
+ * Fire Pass is a Fireworks subscription product that exposes a single router
72
+ * model (Kimi K2.6 Turbo) under `accounts/fireworks/routers/kimi-k2p6-turbo`.
73
+ * The dedicated `fpk_…` keys do not authorize `/v1/models`, so this manager
74
+ * never performs dynamic discovery — the bundled catalog entry is canonical.
75
+ * See https://docs.fireworks.ai/firepass.
76
+ */
77
+ export declare function firepassModelManagerOptions(_config?: FirepassModelManagerConfig): ModelManagerOptions<"openai-completions">;
66
78
  export interface MistralModelManagerConfig {
67
79
  apiKey?: string;
68
80
  baseUrl?: string;
@@ -48,7 +48,7 @@ export interface ThinkingConfig {
48
48
  /** Provider-specific transport used to encode the selected effort. */
49
49
  mode: ThinkingControlMode;
50
50
  }
51
- export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
51
+ export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
52
52
  export type Provider = KnownProvider | string;
53
53
  import type { Effort } from "./model-thinking";
54
54
  /** Token budgets for each thinking level (token-based providers only) */
@@ -1,2 +1,10 @@
1
1
  export declare function toFireworksPublicModelId(modelId: string): string;
2
2
  export declare function toFireworksWireModelId(modelId: string): string;
3
+ /**
4
+ * Fire Pass exposes its Kimi K2.6 Turbo subscription through a dedicated router
5
+ * endpoint at `accounts/fireworks/routers/<id>` rather than the `models/` namespace.
6
+ * We keep a friendly public id (e.g. `kimi-k2.6-turbo`) in the catalog and translate
7
+ * to the wire form (`accounts/fireworks/routers/kimi-k2p6-turbo`) at request time.
8
+ */
9
+ export declare function toFirepassPublicModelId(modelId: string): string;
10
+ export declare function toFirepassWireModelId(modelId: string): string;
@@ -0,0 +1 @@
1
+ export declare const loginFirepass: (options: import("./types").OAuthController) => Promise<string>;
@@ -7,7 +7,7 @@ export type OAuthCredentials = {
7
7
  email?: string;
8
8
  accountId?: string;
9
9
  };
10
- export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "fireworks" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
10
+ export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
11
11
  export type OAuthProviderId = OAuthProvider | (string & {});
12
12
  export type OAuthPrompt = {
13
13
  message: string;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.1.7",
4
+ "version": "15.1.9",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -43,7 +43,7 @@
43
43
  "dependencies": {
44
44
  "@anthropic-ai/sdk": "^0.94.0",
45
45
  "@bufbuild/protobuf": "^2.12.0",
46
- "@oh-my-pi/pi-utils": "15.1.7",
46
+ "@oh-my-pi/pi-utils": "15.1.9",
47
47
  "openai": "^6.36.0",
48
48
  "partial-json": "^0.1.7",
49
49
  "zod": "4.4.3"
@@ -1344,6 +1344,12 @@ export class AuthStorage {
1344
1344
  await saveApiKeyCredential(apiKey);
1345
1345
  return;
1346
1346
  }
1347
+ case "firepass": {
1348
+ const { loginFirepass } = await import("./utils/oauth/firepass");
1349
+ const apiKey = await loginFirepass(ctrl);
1350
+ await saveApiKeyCredential(apiKey);
1351
+ return;
1352
+ }
1347
1353
  case "zai": {
1348
1354
  const { loginZai } = await import("./utils/oauth/zai");
1349
1355
  const apiKey = await loginZai(ctrl);
@@ -17,6 +17,10 @@ interface CacheRow {
17
17
  models: string;
18
18
  }
19
19
 
20
+ interface TableInfoRow {
21
+ name: string;
22
+ }
23
+
20
24
  interface CacheEntry<TApi extends Api = Api> {
21
25
  models: Model<TApi>[];
22
26
  fresh: boolean;
@@ -55,11 +59,21 @@ function getDb(dbPath?: string): Database {
55
59
  models TEXT NOT NULL
56
60
  )
57
61
  `);
62
+ migrateCacheSchema(db);
63
+
58
64
  sharedDb = db;
59
65
  sharedDbPath = resolvedPath;
60
66
  return db;
61
67
  }
62
68
 
69
+ function migrateCacheSchema(db: Database): void {
70
+ const columns = db.prepare("PRAGMA table_info(model_cache)").all() as TableInfoRow[];
71
+ if (!columns.some(column => column.name === "static_fingerprint")) {
72
+ db.run("ALTER TABLE model_cache ADD COLUMN static_fingerprint TEXT NOT NULL DEFAULT ''");
73
+ }
74
+ db.run("UPDATE model_cache SET version = ? WHERE version = 2", [CACHE_SCHEMA_VERSION]);
75
+ }
76
+
63
77
  export function readModelCache<TApi extends Api>(
64
78
  providerId: string,
65
79
  ttlMs: number,
package/src/models.json CHANGED
@@ -5027,6 +5027,33 @@
5027
5027
  }
5028
5028
  }
5029
5029
  },
5030
+ "firepass": {
5031
+ "kimi-k2.6-turbo": {
5032
+ "id": "kimi-k2.6-turbo",
5033
+ "name": "Kimi K2.6 Turbo (Fire Pass)",
5034
+ "api": "openai-completions",
5035
+ "provider": "firepass",
5036
+ "baseUrl": "https://api.fireworks.ai/inference/v1",
5037
+ "reasoning": true,
5038
+ "input": [
5039
+ "text",
5040
+ "image"
5041
+ ],
5042
+ "cost": {
5043
+ "input": 0,
5044
+ "output": 0,
5045
+ "cacheRead": 0,
5046
+ "cacheWrite": 0
5047
+ },
5048
+ "contextWindow": 262144,
5049
+ "maxTokens": 65536,
5050
+ "thinking": {
5051
+ "mode": "effort",
5052
+ "minLevel": "minimal",
5053
+ "maxLevel": "xhigh"
5054
+ }
5055
+ }
5056
+ },
5030
5057
  "fireworks": {
5031
5058
  "deepseek-v4-pro": {
5032
5059
  "id": "deepseek-v4-pro",
@@ -14,6 +14,7 @@ import {
14
14
  cerebrasModelManagerOptions,
15
15
  cloudflareAiGatewayModelManagerOptions,
16
16
  deepseekModelManagerOptions,
17
+ firepassModelManagerOptions,
17
18
  fireworksModelManagerOptions,
18
19
  githubCopilotModelManagerOptions,
19
20
  groqModelManagerOptions,
@@ -152,6 +153,7 @@ export const PROVIDER_DESCRIPTORS: readonly ProviderDescriptor[] = [
152
153
  config => fireworksModelManagerOptions(config),
153
154
  catalog("Fireworks", ["FIREWORKS_API_KEY"]),
154
155
  ),
156
+ descriptor("firepass", "kimi-k2.6-turbo", config => firepassModelManagerOptions(config)),
155
157
  descriptor("xai", "grok-4-fast-non-reasoning", config => xaiModelManagerOptions(config)),
156
158
  catalogDescriptor(
157
159
  "deepseek",
@@ -692,6 +692,30 @@ export function fireworksModelManagerOptions(
692
692
  };
693
693
  }
694
694
 
695
+ // ---------------------------------------------------------------------------
696
+ // 7.6 Fire Pass (Fireworks Kimi K2.6 Turbo subscription)
697
+ // ---------------------------------------------------------------------------
698
+
699
+ export interface FirepassModelManagerConfig {
700
+ apiKey?: string;
701
+ baseUrl?: string;
702
+ }
703
+
704
+ /**
705
+ * Fire Pass is a Fireworks subscription product that exposes a single router
706
+ * model (Kimi K2.6 Turbo) under `accounts/fireworks/routers/kimi-k2p6-turbo`.
707
+ * The dedicated `fpk_…` keys do not authorize `/v1/models`, so this manager
708
+ * never performs dynamic discovery — the bundled catalog entry is canonical.
709
+ * See https://docs.fireworks.ai/firepass.
710
+ */
711
+ export function firepassModelManagerOptions(
712
+ _config?: FirepassModelManagerConfig,
713
+ ): ModelManagerOptions<"openai-completions"> {
714
+ return {
715
+ providerId: "firepass",
716
+ };
717
+ }
718
+
695
719
  // ---------------------------------------------------------------------------
696
720
  // 7. Mistral
697
721
  // ---------------------------------------------------------------------------
@@ -2083,18 +2107,26 @@ const MODELS_DEV_PROVIDER_DESCRIPTORS_CORE: readonly ModelsDevProviderDescriptor
2083
2107
  // ids are kept off the catalog until the issue thread asks for them.
2084
2108
  filterModel: (id, m) => m.tool_call === true && id.startsWith("deepseek-v4"),
2085
2109
  compat: {
2086
- // xhigh maps to DeepSeek's `max` reasoning_effort (#830 thread).
2110
+ // DeepSeek V4 only accepts `high`/`max`; map lower OMP levels upward so
2111
+ // subagent "minimal" turns stay in documented thinking mode instead of
2112
+ // sending unsupported effort strings.
2113
+ supportsDeveloperRole: false,
2087
2114
  supportsReasoningEffort: true,
2088
- reasoningEffortMap: { xhigh: "max" },
2089
- // `tool_choice` returns 400 against DeepSeek when reasoning_effort is set
2090
- // (per the issue thread). Tool calls still work without the parameter.
2115
+ reasoningEffortMap: { minimal: "high", low: "high", medium: "high", high: "high", xhigh: "max" },
2116
+ maxTokensField: "max_tokens",
2117
+ // DeepSeek V4 thinking mode rejects the `tool_choice` control parameter.
2118
+ // Tool calls still work without it; the API defaults to auto when tools exist.
2091
2119
  supportsToolChoice: false,
2120
+ // DeepSeek V4's OpenAI format docs enable thinking with both the toggle and
2121
+ // reasoning_effort. Keep the toggle explicit for built-in models.
2122
+ extraBody: { thinking: { type: "enabled" } },
2092
2123
  // DeepSeek emits chain-of-thought via `reasoning_content` and requires it
2093
2124
  // to round-trip on assistant tool-call messages so the model can resume
2094
2125
  // from prior thinking (interleaved.field=reasoning_content on models.dev,
2095
2126
  // matches the kimi/openrouter handling already in detectCompat).
2096
2127
  reasoningContentField: "reasoning_content",
2097
2128
  requiresReasoningContentForToolCalls: true,
2129
+ requiresAssistantContentForToolCalls: true,
2098
2130
  },
2099
2131
  }),
2100
2132
  ];
@@ -1060,16 +1060,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1060
1060
  let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
1061
1061
  const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
1062
1062
  let nextParams = buildParams(model, baseUrl, context, isOAuthToken, options, disableStrictTools);
1063
- const replacementPayload = await options?.onPayload?.(nextParams, model);
1064
- if (replacementPayload !== undefined) {
1065
- nextParams = replacementPayload as typeof nextParams;
1066
- }
1067
1063
  if (disableStrictTools) {
1068
1064
  dropAnthropicStrictTools(nextParams);
1069
1065
  }
1070
1066
  if (dropFastMode) {
1071
1067
  dropAnthropicFastMode(nextParams);
1072
1068
  }
1069
+ const replacementPayload = await options?.onPayload?.(nextParams, model);
1070
+ if (replacementPayload !== undefined) {
1071
+ nextParams = replacementPayload as typeof nextParams;
1072
+ }
1073
1073
  rawRequestDump = {
1074
1074
  provider: model.provider,
1075
1075
  api: output.api,
@@ -2388,7 +2388,12 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
2388
2388
  result.properties = normalizedProperties;
2389
2389
  }
2390
2390
  if (isRecord(result.additionalProperties)) {
2391
- result.additionalProperties = normalizeAnthropicToolSchema(result.additionalProperties);
2391
+ const normalized = normalizeAnthropicToolSchema(result.additionalProperties);
2392
+ if (isRecord(normalized) && Object.keys(normalized).length === 0) {
2393
+ result.additionalProperties = true;
2394
+ } else {
2395
+ result.additionalProperties = normalized;
2396
+ }
2392
2397
  }
2393
2398
  if (Array.isArray(result.items)) {
2394
2399
  result.items = result.items.map(item => normalizeAnthropicToolSchema(item));
@@ -116,6 +116,29 @@ function mapToolChoice(toolChoice: ToolChoice | undefined): "auto" | "none" | "r
116
116
  return undefined;
117
117
  }
118
118
 
119
+ function getNamedToolChoiceName(toolChoice: ToolChoice | undefined): string | undefined {
120
+ if (!toolChoice || typeof toolChoice === "string") {
121
+ return undefined;
122
+ }
123
+ if ("function" in toolChoice) {
124
+ return toolChoice.function.name;
125
+ }
126
+ return toolChoice.name;
127
+ }
128
+
129
+ function selectToolsForToolChoice(tools: Tool[] | undefined, toolChoice: ToolChoice | undefined): Tool[] | undefined {
130
+ const toolName = getNamedToolChoiceName(toolChoice);
131
+ if (!toolName || !tools) {
132
+ return tools;
133
+ }
134
+ for (const tool of tools) {
135
+ if (tool.name === toolName) {
136
+ return [tool];
137
+ }
138
+ }
139
+ return [];
140
+ }
141
+
119
142
  function toPlainContent(content: string | Array<{ type: "text" | "image"; text?: string; data?: string }>): {
120
143
  content: string;
121
144
  images?: string[];
@@ -231,10 +254,12 @@ function convertTools(tools: Tool[] | undefined): OllamaFunctionTool[] | undefin
231
254
  function createChatBody(model: Model<"ollama-chat">, context: Context, options: OllamaChatOptions | undefined) {
232
255
  const think = mapReasoning(options?.reasoning);
233
256
  const toolChoice = mapToolChoice(options?.toolChoice);
257
+ const selectedTools = selectToolsForToolChoice(context.tools, options?.toolChoice);
258
+ const tools = convertTools(selectedTools);
234
259
  return {
235
260
  model: model.id,
236
261
  messages: convertMessages(model, context),
237
- ...(convertTools(context.tools) ? { tools: convertTools(context.tools) } : {}),
262
+ ...(tools ? { tools } : {}),
238
263
  ...(think !== undefined ? { think } : {}),
239
264
  ...(toolChoice !== undefined ? { tool_choice: toolChoice } : {}),
240
265
  ...(options?.maxTokens !== undefined ? { options: { num_predict: options.maxTokens } } : {}),
@@ -11,7 +11,7 @@ import type {
11
11
  Context,
12
12
  ImageContent,
13
13
  Message,
14
- ServiceTier,
14
+ ResolvedServiceTier,
15
15
  StopReason,
16
16
  TextContent,
17
17
  Tool,
@@ -36,7 +36,7 @@ function isReasoningEffort(value: unknown): value is ReasoningEffort {
36
36
  return value === "minimal" || value === "low" || value === "medium" || value === "high" || value === "xhigh";
37
37
  }
38
38
 
39
- function isServiceTier(value: unknown): value is ServiceTier {
39
+ function isServiceTier(value: unknown): value is ResolvedServiceTier {
40
40
  return value === "auto" || value === "default" || value === "flex" || value === "scale" || value === "priority";
41
41
  }
42
42
 
@@ -52,7 +52,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
52
52
  const isCerebras = provider === "cerebras" || baseUrl.includes("cerebras.ai");
53
53
  const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
54
54
  const isKilo = provider === "kilo" || baseUrl.includes("api.kilo.ai");
55
- const isKimiModel = model.id.includes("moonshotai/kimi") || /^kimi[-.]/i.test(model.id);
55
+ const isKimiModel = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
56
56
  const isMoonshotKimi =
57
57
  isKimiModel &&
58
58
  (provider === "moonshot" ||
@@ -79,7 +79,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
79
79
  baseUrl.includes("deepseek.com") ||
80
80
  lowerId.includes("deepseek") ||
81
81
  lowerName.includes("deepseek");
82
-
82
+ const isDirectDeepseekApi = provider === "deepseek" || baseUrl.includes("api.deepseek.com");
83
+ const isDirectDeepseekReasoning = isDirectDeepseekApi && isDeepseekFamily && Boolean(model.reasoning);
83
84
  const isNonStandard =
84
85
  isCerebras ||
85
86
  provider === "xai" ||
@@ -102,7 +103,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
102
103
  provider === "mistral" ||
103
104
  baseUrl.includes("mistral.ai") ||
104
105
  baseUrl.includes("chutes.ai") ||
105
- baseUrl.includes("fireworks.ai");
106
+ baseUrl.includes("fireworks.ai") ||
107
+ isDirectDeepseekApi;
106
108
  const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
107
109
  const isMistral = provider === "mistral" || baseUrl.includes("mistral.ai");
108
110
 
@@ -162,7 +164,13 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
162
164
  xhigh: "default",
163
165
  } satisfies Partial<Record<OpenAIReasoningEffort, string>>)
164
166
  : isDeepseekFamily && model.reasoning
165
- ? { xhigh: "max" }
167
+ ? ({
168
+ minimal: "high",
169
+ low: "high",
170
+ medium: "high",
171
+ high: "high",
172
+ xhigh: "max",
173
+ } satisfies Partial<Record<OpenAIReasoningEffort, string>>)
166
174
  : {};
167
175
 
168
176
  return {
@@ -173,8 +181,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
173
181
  reasoningEffortMap,
174
182
  supportsUsageInStreaming: !isCerebras,
175
183
  disableReasoningOnForcedToolChoice: isKimiModel || isAnthropicModel,
176
- disableReasoningOnToolChoice: isDeepseekFamily && Boolean(model.reasoning),
177
- supportsToolChoice: true,
184
+ disableReasoningOnToolChoice: isDeepseekFamily && Boolean(model.reasoning) && !isOpenRouter,
185
+ supportsToolChoice: !isDirectDeepseekReasoning,
178
186
  maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
179
187
  requiresToolResultName: isMistral,
180
188
  requiresAssistantAfterToolResult: false,
@@ -204,11 +212,11 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
204
212
  // DeepSeek V4 rejects synthetic reasoning_content placeholders (".") on tool-call turns.
205
213
  // Kimi and OpenRouter accept them when actual reasoning is unavailable.
206
214
  allowsSyntheticReasoningContentForToolCalls: !isDeepseekFamily || !model.reasoning,
207
- requiresAssistantContentForToolCalls: isKimiModel,
215
+ requiresAssistantContentForToolCalls: isKimiModel || isDirectDeepseekReasoning,
208
216
  openRouterRouting: undefined,
209
217
  vercelGatewayRouting: undefined,
210
218
  supportsStrictMode: detectStrictModeSupport(provider, baseUrl),
211
- extraBody: undefined,
219
+ extraBody: isDirectDeepseekReasoning ? { thinking: { type: "enabled" } } : undefined,
212
220
  toolStrictMode: isCerebras ? "all_strict" : "mixed",
213
221
  };
214
222
  }
@@ -235,7 +243,7 @@ export function resolveOpenAICompat(
235
243
  supportsMultipleSystemMessages:
236
244
  model.compat.supportsMultipleSystemMessages ?? detected.supportsMultipleSystemMessages,
237
245
  supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
238
- reasoningEffortMap: model.compat.reasoningEffortMap ?? detected.reasoningEffortMap,
246
+ reasoningEffortMap: { ...detected.reasoningEffortMap, ...(model.compat.reasoningEffortMap ?? {}) },
239
247
  supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
240
248
  supportsToolChoice: model.compat.supportsToolChoice ?? detected.supportsToolChoice,
241
249
  maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
@@ -259,7 +267,7 @@ export function resolveOpenAICompat(
259
267
  openRouterRouting: model.compat.openRouterRouting ?? detected.openRouterRouting,
260
268
  vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
261
269
  supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
262
- extraBody: model.compat.extraBody,
270
+ extraBody: model.compat.extraBody ?? detected.extraBody,
263
271
  toolStrictMode: model.compat.toolStrictMode ?? detected.toolStrictMode,
264
272
  };
265
273
  }
@@ -27,6 +27,7 @@ import {
27
27
  type StopReason,
28
28
  type StreamFunction,
29
29
  type StreamOptions,
30
+ shouldSendServiceTier,
30
31
  type TextContent,
31
32
  type ThinkingContent,
32
33
  type Tool,
@@ -37,7 +38,7 @@ import {
37
38
  import { normalizeSystemPrompts } from "../utils";
38
39
  import { createAbortSourceTracker } from "../utils/abort";
39
40
  import { AssistantMessageEventStream } from "../utils/event-stream";
40
- import { toFireworksWireModelId } from "../utils/fireworks-model-id";
41
+ import { toFirepassWireModelId, toFireworksWireModelId } from "../utils/fireworks-model-id";
41
42
  import {
42
43
  type CapturedHttpErrorResponse,
43
44
  finalizeErrorMessage,
@@ -486,7 +487,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
486
487
  }
487
488
  stream.push({ type: "start", partial: output });
488
489
 
489
- const parseMiniMaxThinkTags = model.provider === "minimax-code";
490
+ const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
490
491
  // Some OpenAI-compatible DeepSeek hosts (including NVIDIA NIM and DeepSeek's
491
492
  // native API) leak chat-template tool-call markers in `delta.content` even
492
493
  // though tool calls are also surfaced structurally. Strip the leaked markers
@@ -1037,13 +1038,23 @@ function buildParams(
1037
1038
  maybeAddOpenRouterAnthropicCacheControl(model, messages);
1038
1039
  const supportsReasoningParams = model.provider !== "github-copilot";
1039
1040
 
1040
- // Kimi (including via OpenRouter) calculates TPM rate limits based on max_tokens, not actual output.
1041
- // Always send max_tokens to avoid their high default causing rate limit issues.
1041
+ // Kimi (including via OpenRouter and Fireworks router-form IDs such as
1042
+ // `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
1043
+ // max_tokens, not actual output. The official Kimi K2 model guidance
1044
+ // (https://docs.fireworks.ai/models/kimi-k2) also requires `max_tokens` for
1045
+ // every call since the family can otherwise emit very long reasoning traces
1046
+ // before the final answer. Always send max_tokens — match the same
1047
+ // Kimi-family regex used by the compat detector.
1042
1048
  // Note: Direct kimi-code provider is handled by the dedicated Kimi provider in kimi.ts.
1043
- const isKimi = model.id.includes("moonshotai/kimi");
1049
+ const isKimi = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
1044
1050
  const effectiveMaxTokens = options?.maxTokens ?? (isKimi ? model.maxTokens : undefined);
1045
1051
 
1046
- const requestModelId = model.provider === "fireworks" ? toFireworksWireModelId(model.id) : model.id;
1052
+ const requestModelId =
1053
+ model.provider === "fireworks"
1054
+ ? toFireworksWireModelId(model.id)
1055
+ : model.provider === "firepass"
1056
+ ? toFirepassWireModelId(model.id)
1057
+ : model.id;
1047
1058
  const params: OpenAICompletionsParams = {
1048
1059
  model: requestModelId,
1049
1060
  messages,
@@ -1092,17 +1103,25 @@ function buildParams(
1092
1103
  if (options?.frequencyPenalty !== undefined) {
1093
1104
  params.frequency_penalty = options.frequencyPenalty;
1094
1105
  }
1095
- const resolvedServiceTier = resolveServiceTier(options?.serviceTier, model.provider);
1096
- if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
1097
- params.service_tier = resolvedServiceTier;
1106
+ if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
1107
+ const resolved = resolveServiceTier(options?.serviceTier, model.provider);
1108
+ if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
1109
+ params.service_tier = resolved;
1110
+ }
1098
1111
  }
1099
1112
 
1100
- if (context.tools) {
1113
+ if (context.tools?.length) {
1101
1114
  const builtTools = convertTools(context.tools, compat, toolStrictModeOverride);
1102
1115
  params.tools = builtTools.tools;
1103
1116
  toolStrictMode = builtTools.toolStrictMode;
1104
- } else if (hasToolHistory(context.messages)) {
1105
- // Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results
1117
+ } else if (context.tools === undefined && hasToolHistory(context.messages)) {
1118
+ // Anthropic (via LiteLLM/proxy) requires the `tools` param when the conversation
1119
+ // contains tool_calls/tool_results, even when no tools are offered this turn.
1120
+ // Only inject the sentinel when the caller passed `context.tools = undefined`
1121
+ // (i.e. tools were not specified at all). An explicit `context.tools = []` means
1122
+ // the caller opted out of tools for this turn (as /btw and IRC background replies
1123
+ // do via AgentSession.runEphemeralTurn) — honour that intent and emit nothing,
1124
+ // so LiteLLM → Bedrock never sees an empty `toolConfig` block.
1106
1125
  params.tools = [];
1107
1126
  }
1108
1127
 
@@ -1110,6 +1129,18 @@ function buildParams(
1110
1129
  params.tool_choice = mapToOpenAICompletionsToolChoice(options.toolChoice);
1111
1130
  }
1112
1131
 
1132
+ if (params.tool_choice === "none" && (!Array.isArray(params.tools) || params.tools.length === 0)) {
1133
+ // `tool_choice: "none"` with no tools to gate is redundant and also
1134
+ // trips LiteLLM → Bedrock: the proxy serializes the directive into a
1135
+ // `toolConfig` block, and Bedrock requires `toolConfig.tools` to be
1136
+ // non-empty whenever the conversation already holds `toolUse`/`toolResult`
1137
+ // content. Drop it whenever the resolved tools list is missing or empty.
1138
+ // Side-channel turns hit this: `/btw` and IRC background replies route
1139
+ // through `AgentSession.runEphemeralTurn`, which sets `context.tools = []`
1140
+ // and `toolChoice: "none"` (see packages/coding-agent/src/session/agent-session.ts).
1141
+ delete params.tool_choice;
1142
+ }
1143
+
1113
1144
  if (supportsReasoningParams && compat.thinkingFormat === "zai" && model.reasoning) {
1114
1145
  // Z.ai uses binary thinking: { type: "enabled" | "disabled" }
1115
1146
  // Must explicitly disable since z.ai defaults to thinking enabled.
@@ -21,6 +21,7 @@ import {
21
21
  type ServiceTier,
22
22
  type StopReason,
23
23
  type StreamOptions,
24
+ shouldSendServiceTier,
24
25
  type TextContent,
25
26
  type TextSignatureV1,
26
27
  type ThinkingContent,
@@ -650,9 +651,11 @@ export function applyCommonResponsesSamplingParams<P extends CommonResponsesPara
650
651
  if (options?.minP !== undefined) params.min_p = options.minP;
651
652
  if (options?.presencePenalty !== undefined) params.presence_penalty = options.presencePenalty;
652
653
  if (options?.repetitionPenalty !== undefined) params.repetition_penalty = options.repetitionPenalty;
653
- const resolvedServiceTier = resolveServiceTier(options?.serviceTier, provider);
654
- if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
655
- params.service_tier = resolvedServiceTier;
654
+ if (shouldSendServiceTier(options?.serviceTier, provider)) {
655
+ const resolved = resolveServiceTier(options?.serviceTier, provider);
656
+ if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
657
+ params.service_tier = resolved;
658
+ }
656
659
  }
657
660
  }
658
661
 
package/src/stream.ts CHANGED
@@ -83,6 +83,7 @@ const serviceProviderMap: Record<string, KeyResolver> = {
83
83
  cerebras: "CEREBRAS_API_KEY",
84
84
  xai: "XAI_API_KEY",
85
85
  fireworks: "FIREWORKS_API_KEY",
86
+ firepass: "FIREPASS_API_KEY",
86
87
  openrouter: "OPENROUTER_API_KEY",
87
88
  kilo: "KILO_API_KEY",
88
89
  "vercel-ai-gateway": "AI_GATEWAY_API_KEY",
package/src/types.ts CHANGED
@@ -110,6 +110,7 @@ export type KnownProvider =
110
110
  | "minimax-code-cn"
111
111
  | "github-copilot"
112
112
  | "fireworks"
113
+ | "firepass"
113
114
  | "gitlab-duo"
114
115
  | "cursor"
115
116
  | "deepseek"
@@ -1,4 +1,5 @@
1
1
  const FIREWORKS_WIRE_PREFIX = "accounts/fireworks/models/";
2
+ const FIREPASS_WIRE_PREFIX = "accounts/fireworks/routers/";
2
3
  const VERSION_SEPARATOR_PATTERN = /(?<=\d)p(?=\d)/g;
3
4
  const VERSION_DOT_PATTERN = /(?<=\d)\.(?=\d)/g;
4
5
 
@@ -11,3 +12,19 @@ export function toFireworksWireModelId(modelId: string): string {
11
12
  const stripped = modelId.startsWith(FIREWORKS_WIRE_PREFIX) ? modelId.slice(FIREWORKS_WIRE_PREFIX.length) : modelId;
12
13
  return `${FIREWORKS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
13
14
  }
15
+
16
+ /**
17
+ * Fire Pass exposes its Kimi K2.6 Turbo subscription through a dedicated router
18
+ * endpoint at `accounts/fireworks/routers/<id>` rather than the `models/` namespace.
19
+ * We keep a friendly public id (e.g. `kimi-k2.6-turbo`) in the catalog and translate
20
+ * to the wire form (`accounts/fireworks/routers/kimi-k2p6-turbo`) at request time.
21
+ */
22
+ export function toFirepassPublicModelId(modelId: string): string {
23
+ const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
24
+ return stripped.replace(VERSION_SEPARATOR_PATTERN, ".");
25
+ }
26
+
27
+ export function toFirepassWireModelId(modelId: string): string {
28
+ const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
29
+ return `${FIREPASS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
30
+ }
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Fire Pass login flow.
3
+ *
4
+ * Fire Pass is a Fireworks subscription product whose dedicated `fpk_…` API
5
+ * keys are scoped to the `accounts/fireworks/routers/kimi-k2p6-turbo` router
6
+ * (Kimi K2.6 Turbo). The key does NOT authorize `/v1/models`, so validation
7
+ * pings the chat completions endpoint with the router id directly.
8
+ * See https://docs.fireworks.ai/firepass.
9
+ */
10
+ import { createApiKeyLogin } from "./api-key-login";
11
+
12
+ export const loginFirepass = createApiKeyLogin({
13
+ providerLabel: "Fire Pass",
14
+ authUrl: "https://app.fireworks.ai/settings/users/api-keys",
15
+ instructions: "Create a dedicated Fire Pass API key in the Fireworks dashboard",
16
+ promptMessage: "Paste your Fire Pass API key",
17
+ placeholder: "fpk_...",
18
+ validation: {
19
+ kind: "chat-completions",
20
+ provider: "Fire Pass",
21
+ baseUrl: "https://api.fireworks.ai/inference/v1",
22
+ model: "accounts/fireworks/routers/kimi-k2p6-turbo",
23
+ },
24
+ });
@@ -55,6 +55,11 @@ const builtInOAuthProviders: OAuthProviderInfo[] = [
55
55
  name: "Fireworks",
56
56
  available: true,
57
57
  },
58
+ {
59
+ id: "firepass",
60
+ name: "Fire Pass (Fireworks Kimi K2.6 Turbo subscription)",
61
+ available: true,
62
+ },
58
63
  {
59
64
  id: "github-copilot",
60
65
  name: "GitHub Copilot",
@@ -301,6 +306,7 @@ export async function refreshOAuthToken(
301
306
  case "opencode-go":
302
307
  case "cerebras":
303
308
  case "fireworks":
309
+ case "firepass":
304
310
  case "nvidia":
305
311
  case "nanogpt":
306
312
  case "synthetic":
@@ -363,10 +369,14 @@ export async function getOAuthApiKey(
363
369
  }
364
370
 
365
371
  if (provider === "perplexity") {
372
+ // Perplexity JWTs usually omit `exp` (server-side sessions). Trust the JWT
373
+ // claim when present; otherwise treat the credential as non-expiring rather
374
+ // than honoring a stale stored `expires` (older logins wrote loginTime+1h).
375
+ const NEVER_EXPIRES = 8.64e15;
366
376
  const normalizedExpires =
367
377
  creds.expires > 0 && creds.expires < 10_000_000_000 ? creds.expires * 1000 : creds.expires;
368
378
  const jwtExpiry = getPerplexityJwtExpiryMs(creds.access);
369
- const expires = jwtExpiry && jwtExpiry > normalizedExpires ? jwtExpiry : normalizedExpires;
379
+ const expires = jwtExpiry ?? Math.max(normalizedExpires, NEVER_EXPIRES);
370
380
  if (expires !== creds.expires) {
371
381
  creds = { ...creds, expires };
372
382
  }
@@ -24,20 +24,26 @@ const APP_USER_AGENT = "Perplexity/641 CFNetwork/1568 Darwin/25.2.0";
24
24
  // JWT helpers
25
25
  // ---------------------------------------------------------------------------
26
26
 
27
- /** Extract expiry from a JWT. Falls back to 1 hour from now. Subtracts 5 min safety margin. */
27
+ /**
28
+ * Extract expiry from a JWT. Perplexity tokens generally lack an `exp` claim
29
+ * (their sessions are server-side and effectively non-expiring from the client's
30
+ * point of view), so we return a far-future sentinel when no `exp` is present.
31
+ * When `exp` IS present, subtract a 5-minute safety margin.
32
+ */
33
+ const NEVER_EXPIRES = 8.64e15; // max safe Date value
28
34
  function getJwtExpiry(token: string): number {
29
35
  try {
30
36
  const parts = token.split(".");
31
- if (parts.length !== 3) return Date.now() + 3600_000;
37
+ if (parts.length !== 3) return NEVER_EXPIRES;
32
38
  const payload = parts[1] ?? "";
33
39
  const decoded = JSON.parse(atob(payload.replace(/-/g, "+").replace(/_/g, "/")));
34
- if (decoded?.exp && typeof decoded.exp === "number") {
40
+ if (typeof decoded?.exp === "number" && Number.isFinite(decoded.exp)) {
35
41
  return decoded.exp * 1000 - 5 * 60_000;
36
42
  }
37
43
  } catch {
38
44
  // Ignore decode errors
39
45
  }
40
- return Date.now() + 3600_000;
46
+ return NEVER_EXPIRES;
41
47
  }
42
48
 
43
49
  /** Build OAuthCredentials from a Perplexity JWT string. */
@@ -15,6 +15,7 @@ export type OAuthProvider =
15
15
  | "cloudflare-ai-gateway"
16
16
  | "cursor"
17
17
  | "fireworks"
18
+ | "firepass"
18
19
  | "github-copilot"
19
20
  | "google-gemini-cli"
20
21
  | "google-antigravity"
@@ -243,8 +243,17 @@ function rewriteZodNode(node: JsonObject, seen: WeakSet<object>): unknown {
243
243
  case "pipe":
244
244
  case "transform": {
245
245
  const inner = walk(unwrapInnerSchema(def), seen);
246
- if (kind === "nullable" && isJsonObject(inner) && typeof inner.type === "string") {
247
- return { ...inner, type: [inner.type, "null"] };
246
+ if (kind === "nullable" && isJsonObject(inner)) {
247
+ if (typeof inner.type === "string") {
248
+ return { ...inner, type: [inner.type, "null"] };
249
+ }
250
+ if (Array.isArray(inner.type)) {
251
+ return (inner.type as string[]).includes("null")
252
+ ? inner
253
+ : { ...inner, type: [...(inner.type as string[]), "null"] };
254
+ }
255
+ // anyOf / allOf / $ref shapes — no scalar `type` field
256
+ return { anyOf: [inner, { type: "null" }] };
248
257
  }
249
258
  return inner;
250
259
  }