@oh-my-pi/pi-ai 15.1.7 → 15.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/dist/types/provider-models/openai-compat.d.ts +12 -0
- package/dist/types/types.d.ts +1 -1
- package/dist/types/utils/fireworks-model-id.d.ts +8 -0
- package/dist/types/utils/oauth/firepass.d.ts +1 -0
- package/dist/types/utils/oauth/types.d.ts +1 -1
- package/package.json +2 -2
- package/src/auth-storage.ts +6 -0
- package/src/model-cache.ts +14 -0
- package/src/models.json +27 -0
- package/src/provider-models/descriptors.ts +2 -0
- package/src/provider-models/openai-compat.ts +36 -4
- package/src/providers/anthropic.ts +10 -5
- package/src/providers/ollama.ts +26 -1
- package/src/providers/openai-chat-server.ts +2 -2
- package/src/providers/openai-completions-compat.ts +18 -10
- package/src/providers/openai-completions.ts +43 -12
- package/src/providers/openai-responses-shared.ts +6 -3
- package/src/stream.ts +1 -0
- package/src/types.ts +1 -0
- package/src/utils/fireworks-model-id.ts +17 -0
- package/src/utils/oauth/firepass.ts +24 -0
- package/src/utils/oauth/index.ts +11 -1
- package/src/utils/oauth/perplexity.ts +10 -4
- package/src/utils/oauth/types.ts +1 -0
- package/src/utils/schema/zod-decontaminate.ts +11 -2
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,24 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.1.9] - 2026-05-21
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- Fixed Ollama named tool forcing to send only the requested tool when the caller passes a named `toolChoice`, preserving `tool_choice: "required"` while preventing local models from selecting a different tool. ([#1236](https://github.com/can1357/oh-my-pi/issues/1236))
|
|
10
|
+
- Fixed `/btw` (and IRC background replies) returning a `BedrockException` 400 (`The toolConfig field must be defined when using toolUse and toolResult content blocks.`) on LiteLLM → Bedrock once the session has tool-call history. Two source fixes in `buildParams`: (1) `if (context.tools)` → `if (context.tools?.length)` so an explicit `context.tools = []` (the /btw opt-out) never routes through `convertTools` and never emits an empty `"tools"` array; (2) `else if (hasToolHistory(...))` → `else if (context.tools === undefined && hasToolHistory(...))` so the Anthropic-proxy sentinel that injects `tools: []` for tool-history turns is suppressed when the caller explicitly opted out, preventing it from re-introducing the empty array. As defence-in-depth, `tool_choice: "none"` is also dropped when the resolved tools list is missing or empty. ([#1227](https://github.com/can1357/oh-my-pi/issues/1227))
|
|
11
|
+
|
|
12
|
+
## [15.1.8] - 2026-05-20
|
|
13
|
+
### Added
|
|
14
|
+
|
|
15
|
+
- Added Fireworks Fire Pass as a separate `firepass` provider with API-key login flow, bundled `kimi-k2.6-turbo` model entry (Kimi K2.6 Turbo), and wire-id translation from the friendly catalog id to the `accounts/fireworks/routers/kimi-k2p6-turbo` router endpoint. Fire Pass keys (`fpk_…`) authorize only the dedicated router and reject `/v1/models`, so login validation pings chat completions against the router id directly. Extended the openai-completions Kimi-family safety net so the firepass entry inherits the per-Fireworks-docs "always send `max_tokens`" default ([Kimi K2 guide](https://docs.fireworks.ai/models/kimi-k2)); the router's accepted `reasoning_effort` set includes `xhigh`, so it is forwarded verbatim rather than remapped. See https://docs.fireworks.ai/firepass.
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
|
|
19
|
+
- Fixed DeepSeek V4 direct API requests with tools to keep documented thinking mode instead of dropping reasoning: lower OMP efforts now map to DeepSeek's supported `high`, `tool_choice` is omitted, `thinking: { type: "enabled" }` and `max_tokens` are sent, and partial user `reasoningEffortMap` overrides merge with DeepSeek defaults. ([#1207](https://github.com/can1357/oh-my-pi/issues/1207))
|
|
20
|
+
- Fixed model cache schema v2 databases so offline refreshes preserve cached provider discoveries after upgrading to schema v3 and subsequent online refreshes can overwrite the cache. ([#1219](https://github.com/can1357/oh-my-pi/issues/1219))
|
|
21
|
+
- Fixed Perplexity OAuth credentials being treated as expired one hour after login. `getJwtExpiry` was fabricating `expires = now + 1h` whenever the JWT had no `exp` claim (the common case — Perplexity sessions are server-side). Once the hour elapsed, `getOAuthApiKey` would mark the cred expired and the search provider's loader would silently skip it, surfacing as "logged out". Logins with no `exp` now persist a far-future sentinel; `getOAuthApiKey` also normalizes any stale `expires` written by older builds.
|
|
22
|
+
|
|
5
23
|
## [15.1.7] - 2026-05-19
|
|
6
24
|
### Added
|
|
7
25
|
|
|
@@ -11,6 +29,7 @@
|
|
|
11
29
|
### Fixed
|
|
12
30
|
|
|
13
31
|
- Fixed Anthropic fast mode (`serviceTier: "priority"`) looping on 429 `rate_limit_error: "Extra usage is required for fast mode."` for accounts without the extra-usage entitlement. `isAnthropicFastModeUnsupportedError` now matches the 429 phrasing in addition to the 400 `invalid_request_error` "does not support the `speed` parameter" case, so the provider drops `speed: "fast"` on the in-turn retry, sets `providerSessionState.fastModeDisabled` for the remainder of the session, and surfaces `disabledFeatures: ["priority"]` to the caller instead of retrying with the same payload until `PROVIDER_MAX_RETRIES` is exhausted.
|
|
32
|
+
- Fixed MiniMax Coding Plan CN streaming `<think>...</think>` reasoning as visible assistant text. The OpenAI-compatible stream parser now enables the existing MiniMax tag parser for both `minimax-code` and `minimax-code-cn`, so CN responses become structured `thinking` blocks instead of raw text. ([#1203](https://github.com/can1357/oh-my-pi/issues/1203))
|
|
14
33
|
|
|
15
34
|
## [15.1.6] - 2026-05-19
|
|
16
35
|
|
|
@@ -63,6 +63,18 @@ export interface FireworksModelManagerConfig {
|
|
|
63
63
|
baseUrl?: string;
|
|
64
64
|
}
|
|
65
65
|
export declare function fireworksModelManagerOptions(config?: FireworksModelManagerConfig): ModelManagerOptions<"openai-completions">;
|
|
66
|
+
export interface FirepassModelManagerConfig {
|
|
67
|
+
apiKey?: string;
|
|
68
|
+
baseUrl?: string;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Fire Pass is a Fireworks subscription product that exposes a single router
|
|
72
|
+
* model (Kimi K2.6 Turbo) under `accounts/fireworks/routers/kimi-k2p6-turbo`.
|
|
73
|
+
* The dedicated `fpk_…` keys do not authorize `/v1/models`, so this manager
|
|
74
|
+
* never performs dynamic discovery — the bundled catalog entry is canonical.
|
|
75
|
+
* See https://docs.fireworks.ai/firepass.
|
|
76
|
+
*/
|
|
77
|
+
export declare function firepassModelManagerOptions(_config?: FirepassModelManagerConfig): ModelManagerOptions<"openai-completions">;
|
|
66
78
|
export interface MistralModelManagerConfig {
|
|
67
79
|
apiKey?: string;
|
|
68
80
|
baseUrl?: string;
|
package/dist/types/types.d.ts
CHANGED
|
@@ -48,7 +48,7 @@ export interface ThinkingConfig {
|
|
|
48
48
|
/** Provider-specific transport used to encode the selected effort. */
|
|
49
49
|
mode: ThinkingControlMode;
|
|
50
50
|
}
|
|
51
|
-
export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
|
|
51
|
+
export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
|
|
52
52
|
export type Provider = KnownProvider | string;
|
|
53
53
|
import type { Effort } from "./model-thinking";
|
|
54
54
|
/** Token budgets for each thinking level (token-based providers only) */
|
|
@@ -1,2 +1,10 @@
|
|
|
1
1
|
export declare function toFireworksPublicModelId(modelId: string): string;
|
|
2
2
|
export declare function toFireworksWireModelId(modelId: string): string;
|
|
3
|
+
/**
|
|
4
|
+
* Fire Pass exposes its Kimi K2.6 Turbo subscription through a dedicated router
|
|
5
|
+
* endpoint at `accounts/fireworks/routers/<id>` rather than the `models/` namespace.
|
|
6
|
+
* We keep a friendly public id (e.g. `kimi-k2.6-turbo`) in the catalog and translate
|
|
7
|
+
* to the wire form (`accounts/fireworks/routers/kimi-k2p6-turbo`) at request time.
|
|
8
|
+
*/
|
|
9
|
+
export declare function toFirepassPublicModelId(modelId: string): string;
|
|
10
|
+
export declare function toFirepassWireModelId(modelId: string): string;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const loginFirepass: (options: import("./types").OAuthController) => Promise<string>;
|
|
@@ -7,7 +7,7 @@ export type OAuthCredentials = {
|
|
|
7
7
|
email?: string;
|
|
8
8
|
accountId?: string;
|
|
9
9
|
};
|
|
10
|
-
export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "fireworks" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
|
|
10
|
+
export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
|
|
11
11
|
export type OAuthProviderId = OAuthProvider | (string & {});
|
|
12
12
|
export type OAuthPrompt = {
|
|
13
13
|
message: string;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "15.1.
|
|
4
|
+
"version": "15.1.9",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -43,7 +43,7 @@
|
|
|
43
43
|
"dependencies": {
|
|
44
44
|
"@anthropic-ai/sdk": "^0.94.0",
|
|
45
45
|
"@bufbuild/protobuf": "^2.12.0",
|
|
46
|
-
"@oh-my-pi/pi-utils": "15.1.
|
|
46
|
+
"@oh-my-pi/pi-utils": "15.1.9",
|
|
47
47
|
"openai": "^6.36.0",
|
|
48
48
|
"partial-json": "^0.1.7",
|
|
49
49
|
"zod": "4.4.3"
|
package/src/auth-storage.ts
CHANGED
|
@@ -1344,6 +1344,12 @@ export class AuthStorage {
|
|
|
1344
1344
|
await saveApiKeyCredential(apiKey);
|
|
1345
1345
|
return;
|
|
1346
1346
|
}
|
|
1347
|
+
case "firepass": {
|
|
1348
|
+
const { loginFirepass } = await import("./utils/oauth/firepass");
|
|
1349
|
+
const apiKey = await loginFirepass(ctrl);
|
|
1350
|
+
await saveApiKeyCredential(apiKey);
|
|
1351
|
+
return;
|
|
1352
|
+
}
|
|
1347
1353
|
case "zai": {
|
|
1348
1354
|
const { loginZai } = await import("./utils/oauth/zai");
|
|
1349
1355
|
const apiKey = await loginZai(ctrl);
|
package/src/model-cache.ts
CHANGED
|
@@ -17,6 +17,10 @@ interface CacheRow {
|
|
|
17
17
|
models: string;
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
+
interface TableInfoRow {
|
|
21
|
+
name: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
20
24
|
interface CacheEntry<TApi extends Api = Api> {
|
|
21
25
|
models: Model<TApi>[];
|
|
22
26
|
fresh: boolean;
|
|
@@ -55,11 +59,21 @@ function getDb(dbPath?: string): Database {
|
|
|
55
59
|
models TEXT NOT NULL
|
|
56
60
|
)
|
|
57
61
|
`);
|
|
62
|
+
migrateCacheSchema(db);
|
|
63
|
+
|
|
58
64
|
sharedDb = db;
|
|
59
65
|
sharedDbPath = resolvedPath;
|
|
60
66
|
return db;
|
|
61
67
|
}
|
|
62
68
|
|
|
69
|
+
function migrateCacheSchema(db: Database): void {
|
|
70
|
+
const columns = db.prepare("PRAGMA table_info(model_cache)").all() as TableInfoRow[];
|
|
71
|
+
if (!columns.some(column => column.name === "static_fingerprint")) {
|
|
72
|
+
db.run("ALTER TABLE model_cache ADD COLUMN static_fingerprint TEXT NOT NULL DEFAULT ''");
|
|
73
|
+
}
|
|
74
|
+
db.run("UPDATE model_cache SET version = ? WHERE version = 2", [CACHE_SCHEMA_VERSION]);
|
|
75
|
+
}
|
|
76
|
+
|
|
63
77
|
export function readModelCache<TApi extends Api>(
|
|
64
78
|
providerId: string,
|
|
65
79
|
ttlMs: number,
|
package/src/models.json
CHANGED
|
@@ -5027,6 +5027,33 @@
|
|
|
5027
5027
|
}
|
|
5028
5028
|
}
|
|
5029
5029
|
},
|
|
5030
|
+
"firepass": {
|
|
5031
|
+
"kimi-k2.6-turbo": {
|
|
5032
|
+
"id": "kimi-k2.6-turbo",
|
|
5033
|
+
"name": "Kimi K2.6 Turbo (Fire Pass)",
|
|
5034
|
+
"api": "openai-completions",
|
|
5035
|
+
"provider": "firepass",
|
|
5036
|
+
"baseUrl": "https://api.fireworks.ai/inference/v1",
|
|
5037
|
+
"reasoning": true,
|
|
5038
|
+
"input": [
|
|
5039
|
+
"text",
|
|
5040
|
+
"image"
|
|
5041
|
+
],
|
|
5042
|
+
"cost": {
|
|
5043
|
+
"input": 0,
|
|
5044
|
+
"output": 0,
|
|
5045
|
+
"cacheRead": 0,
|
|
5046
|
+
"cacheWrite": 0
|
|
5047
|
+
},
|
|
5048
|
+
"contextWindow": 262144,
|
|
5049
|
+
"maxTokens": 65536,
|
|
5050
|
+
"thinking": {
|
|
5051
|
+
"mode": "effort",
|
|
5052
|
+
"minLevel": "minimal",
|
|
5053
|
+
"maxLevel": "xhigh"
|
|
5054
|
+
}
|
|
5055
|
+
}
|
|
5056
|
+
},
|
|
5030
5057
|
"fireworks": {
|
|
5031
5058
|
"deepseek-v4-pro": {
|
|
5032
5059
|
"id": "deepseek-v4-pro",
|
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
cerebrasModelManagerOptions,
|
|
15
15
|
cloudflareAiGatewayModelManagerOptions,
|
|
16
16
|
deepseekModelManagerOptions,
|
|
17
|
+
firepassModelManagerOptions,
|
|
17
18
|
fireworksModelManagerOptions,
|
|
18
19
|
githubCopilotModelManagerOptions,
|
|
19
20
|
groqModelManagerOptions,
|
|
@@ -152,6 +153,7 @@ export const PROVIDER_DESCRIPTORS: readonly ProviderDescriptor[] = [
|
|
|
152
153
|
config => fireworksModelManagerOptions(config),
|
|
153
154
|
catalog("Fireworks", ["FIREWORKS_API_KEY"]),
|
|
154
155
|
),
|
|
156
|
+
descriptor("firepass", "kimi-k2.6-turbo", config => firepassModelManagerOptions(config)),
|
|
155
157
|
descriptor("xai", "grok-4-fast-non-reasoning", config => xaiModelManagerOptions(config)),
|
|
156
158
|
catalogDescriptor(
|
|
157
159
|
"deepseek",
|
|
@@ -692,6 +692,30 @@ export function fireworksModelManagerOptions(
|
|
|
692
692
|
};
|
|
693
693
|
}
|
|
694
694
|
|
|
695
|
+
// ---------------------------------------------------------------------------
|
|
696
|
+
// 7.6 Fire Pass (Fireworks Kimi K2.6 Turbo subscription)
|
|
697
|
+
// ---------------------------------------------------------------------------
|
|
698
|
+
|
|
699
|
+
export interface FirepassModelManagerConfig {
|
|
700
|
+
apiKey?: string;
|
|
701
|
+
baseUrl?: string;
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
/**
|
|
705
|
+
* Fire Pass is a Fireworks subscription product that exposes a single router
|
|
706
|
+
* model (Kimi K2.6 Turbo) under `accounts/fireworks/routers/kimi-k2p6-turbo`.
|
|
707
|
+
* The dedicated `fpk_…` keys do not authorize `/v1/models`, so this manager
|
|
708
|
+
* never performs dynamic discovery — the bundled catalog entry is canonical.
|
|
709
|
+
* See https://docs.fireworks.ai/firepass.
|
|
710
|
+
*/
|
|
711
|
+
export function firepassModelManagerOptions(
|
|
712
|
+
_config?: FirepassModelManagerConfig,
|
|
713
|
+
): ModelManagerOptions<"openai-completions"> {
|
|
714
|
+
return {
|
|
715
|
+
providerId: "firepass",
|
|
716
|
+
};
|
|
717
|
+
}
|
|
718
|
+
|
|
695
719
|
// ---------------------------------------------------------------------------
|
|
696
720
|
// 7. Mistral
|
|
697
721
|
// ---------------------------------------------------------------------------
|
|
@@ -2083,18 +2107,26 @@ const MODELS_DEV_PROVIDER_DESCRIPTORS_CORE: readonly ModelsDevProviderDescriptor
|
|
|
2083
2107
|
// ids are kept off the catalog until the issue thread asks for them.
|
|
2084
2108
|
filterModel: (id, m) => m.tool_call === true && id.startsWith("deepseek-v4"),
|
|
2085
2109
|
compat: {
|
|
2086
|
-
//
|
|
2110
|
+
// DeepSeek V4 only accepts `high`/`max`; map lower OMP levels upward so
|
|
2111
|
+
// subagent "minimal" turns stay in documented thinking mode instead of
|
|
2112
|
+
// sending unsupported effort strings.
|
|
2113
|
+
supportsDeveloperRole: false,
|
|
2087
2114
|
supportsReasoningEffort: true,
|
|
2088
|
-
reasoningEffortMap: { xhigh: "max" },
|
|
2089
|
-
|
|
2090
|
-
//
|
|
2115
|
+
reasoningEffortMap: { minimal: "high", low: "high", medium: "high", high: "high", xhigh: "max" },
|
|
2116
|
+
maxTokensField: "max_tokens",
|
|
2117
|
+
// DeepSeek V4 thinking mode rejects the `tool_choice` control parameter.
|
|
2118
|
+
// Tool calls still work without it; the API defaults to auto when tools exist.
|
|
2091
2119
|
supportsToolChoice: false,
|
|
2120
|
+
// DeepSeek V4's OpenAI format docs enable thinking with both the toggle and
|
|
2121
|
+
// reasoning_effort. Keep the toggle explicit for built-in models.
|
|
2122
|
+
extraBody: { thinking: { type: "enabled" } },
|
|
2092
2123
|
// DeepSeek emits chain-of-thought via `reasoning_content` and requires it
|
|
2093
2124
|
// to round-trip on assistant tool-call messages so the model can resume
|
|
2094
2125
|
// from prior thinking (interleaved.field=reasoning_content on models.dev,
|
|
2095
2126
|
// matches the kimi/openrouter handling already in detectCompat).
|
|
2096
2127
|
reasoningContentField: "reasoning_content",
|
|
2097
2128
|
requiresReasoningContentForToolCalls: true,
|
|
2129
|
+
requiresAssistantContentForToolCalls: true,
|
|
2098
2130
|
},
|
|
2099
2131
|
}),
|
|
2100
2132
|
];
|
|
@@ -1060,16 +1060,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1060
1060
|
let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
|
|
1061
1061
|
const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
|
|
1062
1062
|
let nextParams = buildParams(model, baseUrl, context, isOAuthToken, options, disableStrictTools);
|
|
1063
|
-
const replacementPayload = await options?.onPayload?.(nextParams, model);
|
|
1064
|
-
if (replacementPayload !== undefined) {
|
|
1065
|
-
nextParams = replacementPayload as typeof nextParams;
|
|
1066
|
-
}
|
|
1067
1063
|
if (disableStrictTools) {
|
|
1068
1064
|
dropAnthropicStrictTools(nextParams);
|
|
1069
1065
|
}
|
|
1070
1066
|
if (dropFastMode) {
|
|
1071
1067
|
dropAnthropicFastMode(nextParams);
|
|
1072
1068
|
}
|
|
1069
|
+
const replacementPayload = await options?.onPayload?.(nextParams, model);
|
|
1070
|
+
if (replacementPayload !== undefined) {
|
|
1071
|
+
nextParams = replacementPayload as typeof nextParams;
|
|
1072
|
+
}
|
|
1073
1073
|
rawRequestDump = {
|
|
1074
1074
|
provider: model.provider,
|
|
1075
1075
|
api: output.api,
|
|
@@ -2388,7 +2388,12 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
|
|
|
2388
2388
|
result.properties = normalizedProperties;
|
|
2389
2389
|
}
|
|
2390
2390
|
if (isRecord(result.additionalProperties)) {
|
|
2391
|
-
|
|
2391
|
+
const normalized = normalizeAnthropicToolSchema(result.additionalProperties);
|
|
2392
|
+
if (isRecord(normalized) && Object.keys(normalized).length === 0) {
|
|
2393
|
+
result.additionalProperties = true;
|
|
2394
|
+
} else {
|
|
2395
|
+
result.additionalProperties = normalized;
|
|
2396
|
+
}
|
|
2392
2397
|
}
|
|
2393
2398
|
if (Array.isArray(result.items)) {
|
|
2394
2399
|
result.items = result.items.map(item => normalizeAnthropicToolSchema(item));
|
package/src/providers/ollama.ts
CHANGED
|
@@ -116,6 +116,29 @@ function mapToolChoice(toolChoice: ToolChoice | undefined): "auto" | "none" | "r
|
|
|
116
116
|
return undefined;
|
|
117
117
|
}
|
|
118
118
|
|
|
119
|
+
function getNamedToolChoiceName(toolChoice: ToolChoice | undefined): string | undefined {
|
|
120
|
+
if (!toolChoice || typeof toolChoice === "string") {
|
|
121
|
+
return undefined;
|
|
122
|
+
}
|
|
123
|
+
if ("function" in toolChoice) {
|
|
124
|
+
return toolChoice.function.name;
|
|
125
|
+
}
|
|
126
|
+
return toolChoice.name;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function selectToolsForToolChoice(tools: Tool[] | undefined, toolChoice: ToolChoice | undefined): Tool[] | undefined {
|
|
130
|
+
const toolName = getNamedToolChoiceName(toolChoice);
|
|
131
|
+
if (!toolName || !tools) {
|
|
132
|
+
return tools;
|
|
133
|
+
}
|
|
134
|
+
for (const tool of tools) {
|
|
135
|
+
if (tool.name === toolName) {
|
|
136
|
+
return [tool];
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return [];
|
|
140
|
+
}
|
|
141
|
+
|
|
119
142
|
function toPlainContent(content: string | Array<{ type: "text" | "image"; text?: string; data?: string }>): {
|
|
120
143
|
content: string;
|
|
121
144
|
images?: string[];
|
|
@@ -231,10 +254,12 @@ function convertTools(tools: Tool[] | undefined): OllamaFunctionTool[] | undefin
|
|
|
231
254
|
function createChatBody(model: Model<"ollama-chat">, context: Context, options: OllamaChatOptions | undefined) {
|
|
232
255
|
const think = mapReasoning(options?.reasoning);
|
|
233
256
|
const toolChoice = mapToolChoice(options?.toolChoice);
|
|
257
|
+
const selectedTools = selectToolsForToolChoice(context.tools, options?.toolChoice);
|
|
258
|
+
const tools = convertTools(selectedTools);
|
|
234
259
|
return {
|
|
235
260
|
model: model.id,
|
|
236
261
|
messages: convertMessages(model, context),
|
|
237
|
-
...(
|
|
262
|
+
...(tools ? { tools } : {}),
|
|
238
263
|
...(think !== undefined ? { think } : {}),
|
|
239
264
|
...(toolChoice !== undefined ? { tool_choice: toolChoice } : {}),
|
|
240
265
|
...(options?.maxTokens !== undefined ? { options: { num_predict: options.maxTokens } } : {}),
|
|
@@ -11,7 +11,7 @@ import type {
|
|
|
11
11
|
Context,
|
|
12
12
|
ImageContent,
|
|
13
13
|
Message,
|
|
14
|
-
|
|
14
|
+
ResolvedServiceTier,
|
|
15
15
|
StopReason,
|
|
16
16
|
TextContent,
|
|
17
17
|
Tool,
|
|
@@ -36,7 +36,7 @@ function isReasoningEffort(value: unknown): value is ReasoningEffort {
|
|
|
36
36
|
return value === "minimal" || value === "low" || value === "medium" || value === "high" || value === "xhigh";
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
-
function isServiceTier(value: unknown): value is
|
|
39
|
+
function isServiceTier(value: unknown): value is ResolvedServiceTier {
|
|
40
40
|
return value === "auto" || value === "default" || value === "flex" || value === "scale" || value === "priority";
|
|
41
41
|
}
|
|
42
42
|
|
|
@@ -52,7 +52,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
52
52
|
const isCerebras = provider === "cerebras" || baseUrl.includes("cerebras.ai");
|
|
53
53
|
const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
|
|
54
54
|
const isKilo = provider === "kilo" || baseUrl.includes("api.kilo.ai");
|
|
55
|
-
const isKimiModel = model.id.includes("moonshotai/kimi") ||
|
|
55
|
+
const isKimiModel = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
|
|
56
56
|
const isMoonshotKimi =
|
|
57
57
|
isKimiModel &&
|
|
58
58
|
(provider === "moonshot" ||
|
|
@@ -79,7 +79,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
79
79
|
baseUrl.includes("deepseek.com") ||
|
|
80
80
|
lowerId.includes("deepseek") ||
|
|
81
81
|
lowerName.includes("deepseek");
|
|
82
|
-
|
|
82
|
+
const isDirectDeepseekApi = provider === "deepseek" || baseUrl.includes("api.deepseek.com");
|
|
83
|
+
const isDirectDeepseekReasoning = isDirectDeepseekApi && isDeepseekFamily && Boolean(model.reasoning);
|
|
83
84
|
const isNonStandard =
|
|
84
85
|
isCerebras ||
|
|
85
86
|
provider === "xai" ||
|
|
@@ -102,7 +103,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
102
103
|
provider === "mistral" ||
|
|
103
104
|
baseUrl.includes("mistral.ai") ||
|
|
104
105
|
baseUrl.includes("chutes.ai") ||
|
|
105
|
-
baseUrl.includes("fireworks.ai")
|
|
106
|
+
baseUrl.includes("fireworks.ai") ||
|
|
107
|
+
isDirectDeepseekApi;
|
|
106
108
|
const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
|
|
107
109
|
const isMistral = provider === "mistral" || baseUrl.includes("mistral.ai");
|
|
108
110
|
|
|
@@ -162,7 +164,13 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
162
164
|
xhigh: "default",
|
|
163
165
|
} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
|
|
164
166
|
: isDeepseekFamily && model.reasoning
|
|
165
|
-
? {
|
|
167
|
+
? ({
|
|
168
|
+
minimal: "high",
|
|
169
|
+
low: "high",
|
|
170
|
+
medium: "high",
|
|
171
|
+
high: "high",
|
|
172
|
+
xhigh: "max",
|
|
173
|
+
} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
|
|
166
174
|
: {};
|
|
167
175
|
|
|
168
176
|
return {
|
|
@@ -173,8 +181,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
173
181
|
reasoningEffortMap,
|
|
174
182
|
supportsUsageInStreaming: !isCerebras,
|
|
175
183
|
disableReasoningOnForcedToolChoice: isKimiModel || isAnthropicModel,
|
|
176
|
-
disableReasoningOnToolChoice: isDeepseekFamily && Boolean(model.reasoning),
|
|
177
|
-
supportsToolChoice:
|
|
184
|
+
disableReasoningOnToolChoice: isDeepseekFamily && Boolean(model.reasoning) && !isOpenRouter,
|
|
185
|
+
supportsToolChoice: !isDirectDeepseekReasoning,
|
|
178
186
|
maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
|
|
179
187
|
requiresToolResultName: isMistral,
|
|
180
188
|
requiresAssistantAfterToolResult: false,
|
|
@@ -204,11 +212,11 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
204
212
|
// DeepSeek V4 rejects synthetic reasoning_content placeholders (".") on tool-call turns.
|
|
205
213
|
// Kimi and OpenRouter accept them when actual reasoning is unavailable.
|
|
206
214
|
allowsSyntheticReasoningContentForToolCalls: !isDeepseekFamily || !model.reasoning,
|
|
207
|
-
requiresAssistantContentForToolCalls: isKimiModel,
|
|
215
|
+
requiresAssistantContentForToolCalls: isKimiModel || isDirectDeepseekReasoning,
|
|
208
216
|
openRouterRouting: undefined,
|
|
209
217
|
vercelGatewayRouting: undefined,
|
|
210
218
|
supportsStrictMode: detectStrictModeSupport(provider, baseUrl),
|
|
211
|
-
extraBody: undefined,
|
|
219
|
+
extraBody: isDirectDeepseekReasoning ? { thinking: { type: "enabled" } } : undefined,
|
|
212
220
|
toolStrictMode: isCerebras ? "all_strict" : "mixed",
|
|
213
221
|
};
|
|
214
222
|
}
|
|
@@ -235,7 +243,7 @@ export function resolveOpenAICompat(
|
|
|
235
243
|
supportsMultipleSystemMessages:
|
|
236
244
|
model.compat.supportsMultipleSystemMessages ?? detected.supportsMultipleSystemMessages,
|
|
237
245
|
supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
|
|
238
|
-
reasoningEffortMap: model.compat.reasoningEffortMap ??
|
|
246
|
+
reasoningEffortMap: { ...detected.reasoningEffortMap, ...(model.compat.reasoningEffortMap ?? {}) },
|
|
239
247
|
supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
|
|
240
248
|
supportsToolChoice: model.compat.supportsToolChoice ?? detected.supportsToolChoice,
|
|
241
249
|
maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
|
|
@@ -259,7 +267,7 @@ export function resolveOpenAICompat(
|
|
|
259
267
|
openRouterRouting: model.compat.openRouterRouting ?? detected.openRouterRouting,
|
|
260
268
|
vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
|
|
261
269
|
supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
|
|
262
|
-
extraBody: model.compat.extraBody,
|
|
270
|
+
extraBody: model.compat.extraBody ?? detected.extraBody,
|
|
263
271
|
toolStrictMode: model.compat.toolStrictMode ?? detected.toolStrictMode,
|
|
264
272
|
};
|
|
265
273
|
}
|
|
@@ -27,6 +27,7 @@ import {
|
|
|
27
27
|
type StopReason,
|
|
28
28
|
type StreamFunction,
|
|
29
29
|
type StreamOptions,
|
|
30
|
+
shouldSendServiceTier,
|
|
30
31
|
type TextContent,
|
|
31
32
|
type ThinkingContent,
|
|
32
33
|
type Tool,
|
|
@@ -37,7 +38,7 @@ import {
|
|
|
37
38
|
import { normalizeSystemPrompts } from "../utils";
|
|
38
39
|
import { createAbortSourceTracker } from "../utils/abort";
|
|
39
40
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
40
|
-
import { toFireworksWireModelId } from "../utils/fireworks-model-id";
|
|
41
|
+
import { toFirepassWireModelId, toFireworksWireModelId } from "../utils/fireworks-model-id";
|
|
41
42
|
import {
|
|
42
43
|
type CapturedHttpErrorResponse,
|
|
43
44
|
finalizeErrorMessage,
|
|
@@ -486,7 +487,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
486
487
|
}
|
|
487
488
|
stream.push({ type: "start", partial: output });
|
|
488
489
|
|
|
489
|
-
const parseMiniMaxThinkTags = model.provider === "minimax-code";
|
|
490
|
+
const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
|
|
490
491
|
// Some OpenAI-compatible DeepSeek hosts (including NVIDIA NIM and DeepSeek's
|
|
491
492
|
// native API) leak chat-template tool-call markers in `delta.content` even
|
|
492
493
|
// though tool calls are also surfaced structurally. Strip the leaked markers
|
|
@@ -1037,13 +1038,23 @@ function buildParams(
|
|
|
1037
1038
|
maybeAddOpenRouterAnthropicCacheControl(model, messages);
|
|
1038
1039
|
const supportsReasoningParams = model.provider !== "github-copilot";
|
|
1039
1040
|
|
|
1040
|
-
// Kimi (including via OpenRouter
|
|
1041
|
-
//
|
|
1041
|
+
// Kimi (including via OpenRouter and Fireworks router-form IDs such as
|
|
1042
|
+
// `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
|
|
1043
|
+
// max_tokens, not actual output. The official Kimi K2 model guidance
|
|
1044
|
+
// (https://docs.fireworks.ai/models/kimi-k2) also requires `max_tokens` for
|
|
1045
|
+
// every call since the family can otherwise emit very long reasoning traces
|
|
1046
|
+
// before the final answer. Always send max_tokens — match the same
|
|
1047
|
+
// Kimi-family regex used by the compat detector.
|
|
1042
1048
|
// Note: Direct kimi-code provider is handled by the dedicated Kimi provider in kimi.ts.
|
|
1043
|
-
const isKimi = model.id.includes("moonshotai/kimi");
|
|
1049
|
+
const isKimi = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
|
|
1044
1050
|
const effectiveMaxTokens = options?.maxTokens ?? (isKimi ? model.maxTokens : undefined);
|
|
1045
1051
|
|
|
1046
|
-
const requestModelId =
|
|
1052
|
+
const requestModelId =
|
|
1053
|
+
model.provider === "fireworks"
|
|
1054
|
+
? toFireworksWireModelId(model.id)
|
|
1055
|
+
: model.provider === "firepass"
|
|
1056
|
+
? toFirepassWireModelId(model.id)
|
|
1057
|
+
: model.id;
|
|
1047
1058
|
const params: OpenAICompletionsParams = {
|
|
1048
1059
|
model: requestModelId,
|
|
1049
1060
|
messages,
|
|
@@ -1092,17 +1103,25 @@ function buildParams(
|
|
|
1092
1103
|
if (options?.frequencyPenalty !== undefined) {
|
|
1093
1104
|
params.frequency_penalty = options.frequencyPenalty;
|
|
1094
1105
|
}
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1106
|
+
if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
|
|
1107
|
+
const resolved = resolveServiceTier(options?.serviceTier, model.provider);
|
|
1108
|
+
if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
|
|
1109
|
+
params.service_tier = resolved;
|
|
1110
|
+
}
|
|
1098
1111
|
}
|
|
1099
1112
|
|
|
1100
|
-
if (context.tools) {
|
|
1113
|
+
if (context.tools?.length) {
|
|
1101
1114
|
const builtTools = convertTools(context.tools, compat, toolStrictModeOverride);
|
|
1102
1115
|
params.tools = builtTools.tools;
|
|
1103
1116
|
toolStrictMode = builtTools.toolStrictMode;
|
|
1104
|
-
} else if (hasToolHistory(context.messages)) {
|
|
1105
|
-
// Anthropic (via LiteLLM/proxy) requires tools param when conversation
|
|
1117
|
+
} else if (context.tools === undefined && hasToolHistory(context.messages)) {
|
|
1118
|
+
// Anthropic (via LiteLLM/proxy) requires the `tools` param when the conversation
|
|
1119
|
+
// contains tool_calls/tool_results, even when no tools are offered this turn.
|
|
1120
|
+
// Only inject the sentinel when the caller passed `context.tools = undefined`
|
|
1121
|
+
// (i.e. tools were not specified at all). An explicit `context.tools = []` means
|
|
1122
|
+
// the caller opted out of tools for this turn (as /btw and IRC background replies
|
|
1123
|
+
// do via AgentSession.runEphemeralTurn) — honour that intent and emit nothing,
|
|
1124
|
+
// so LiteLLM → Bedrock never sees an empty `toolConfig` block.
|
|
1106
1125
|
params.tools = [];
|
|
1107
1126
|
}
|
|
1108
1127
|
|
|
@@ -1110,6 +1129,18 @@ function buildParams(
|
|
|
1110
1129
|
params.tool_choice = mapToOpenAICompletionsToolChoice(options.toolChoice);
|
|
1111
1130
|
}
|
|
1112
1131
|
|
|
1132
|
+
if (params.tool_choice === "none" && (!Array.isArray(params.tools) || params.tools.length === 0)) {
|
|
1133
|
+
// `tool_choice: "none"` with no tools to gate is redundant and also
|
|
1134
|
+
// trips LiteLLM → Bedrock: the proxy serializes the directive into a
|
|
1135
|
+
// `toolConfig` block, and Bedrock requires `toolConfig.tools` to be
|
|
1136
|
+
// non-empty whenever the conversation already holds `toolUse`/`toolResult`
|
|
1137
|
+
// content. Drop it whenever the resolved tools list is missing or empty.
|
|
1138
|
+
// Side-channel turns hit this: `/btw` and IRC background replies route
|
|
1139
|
+
// through `AgentSession.runEphemeralTurn`, which sets `context.tools = []`
|
|
1140
|
+
// and `toolChoice: "none"` (see packages/coding-agent/src/session/agent-session.ts).
|
|
1141
|
+
delete params.tool_choice;
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1113
1144
|
if (supportsReasoningParams && compat.thinkingFormat === "zai" && model.reasoning) {
|
|
1114
1145
|
// Z.ai uses binary thinking: { type: "enabled" | "disabled" }
|
|
1115
1146
|
// Must explicitly disable since z.ai defaults to thinking enabled.
|
|
@@ -21,6 +21,7 @@ import {
|
|
|
21
21
|
type ServiceTier,
|
|
22
22
|
type StopReason,
|
|
23
23
|
type StreamOptions,
|
|
24
|
+
shouldSendServiceTier,
|
|
24
25
|
type TextContent,
|
|
25
26
|
type TextSignatureV1,
|
|
26
27
|
type ThinkingContent,
|
|
@@ -650,9 +651,11 @@ export function applyCommonResponsesSamplingParams<P extends CommonResponsesPara
|
|
|
650
651
|
if (options?.minP !== undefined) params.min_p = options.minP;
|
|
651
652
|
if (options?.presencePenalty !== undefined) params.presence_penalty = options.presencePenalty;
|
|
652
653
|
if (options?.repetitionPenalty !== undefined) params.repetition_penalty = options.repetitionPenalty;
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
654
|
+
if (shouldSendServiceTier(options?.serviceTier, provider)) {
|
|
655
|
+
const resolved = resolveServiceTier(options?.serviceTier, provider);
|
|
656
|
+
if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
|
|
657
|
+
params.service_tier = resolved;
|
|
658
|
+
}
|
|
656
659
|
}
|
|
657
660
|
}
|
|
658
661
|
|
package/src/stream.ts
CHANGED
|
@@ -83,6 +83,7 @@ const serviceProviderMap: Record<string, KeyResolver> = {
|
|
|
83
83
|
cerebras: "CEREBRAS_API_KEY",
|
|
84
84
|
xai: "XAI_API_KEY",
|
|
85
85
|
fireworks: "FIREWORKS_API_KEY",
|
|
86
|
+
firepass: "FIREPASS_API_KEY",
|
|
86
87
|
openrouter: "OPENROUTER_API_KEY",
|
|
87
88
|
kilo: "KILO_API_KEY",
|
|
88
89
|
"vercel-ai-gateway": "AI_GATEWAY_API_KEY",
|
package/src/types.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
const FIREWORKS_WIRE_PREFIX = "accounts/fireworks/models/";
|
|
2
|
+
const FIREPASS_WIRE_PREFIX = "accounts/fireworks/routers/";
|
|
2
3
|
const VERSION_SEPARATOR_PATTERN = /(?<=\d)p(?=\d)/g;
|
|
3
4
|
const VERSION_DOT_PATTERN = /(?<=\d)\.(?=\d)/g;
|
|
4
5
|
|
|
@@ -11,3 +12,19 @@ export function toFireworksWireModelId(modelId: string): string {
|
|
|
11
12
|
const stripped = modelId.startsWith(FIREWORKS_WIRE_PREFIX) ? modelId.slice(FIREWORKS_WIRE_PREFIX.length) : modelId;
|
|
12
13
|
return `${FIREWORKS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
|
|
13
14
|
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Fire Pass exposes its Kimi K2.6 Turbo subscription through a dedicated router
|
|
18
|
+
* endpoint at `accounts/fireworks/routers/<id>` rather than the `models/` namespace.
|
|
19
|
+
* We keep a friendly public id (e.g. `kimi-k2.6-turbo`) in the catalog and translate
|
|
20
|
+
* to the wire form (`accounts/fireworks/routers/kimi-k2p6-turbo`) at request time.
|
|
21
|
+
*/
|
|
22
|
+
export function toFirepassPublicModelId(modelId: string): string {
|
|
23
|
+
const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
|
|
24
|
+
return stripped.replace(VERSION_SEPARATOR_PATTERN, ".");
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function toFirepassWireModelId(modelId: string): string {
|
|
28
|
+
const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
|
|
29
|
+
return `${FIREPASS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
|
|
30
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fire Pass login flow.
|
|
3
|
+
*
|
|
4
|
+
* Fire Pass is a Fireworks subscription product whose dedicated `fpk_…` API
|
|
5
|
+
* keys are scoped to the `accounts/fireworks/routers/kimi-k2p6-turbo` router
|
|
6
|
+
* (Kimi K2.6 Turbo). The key does NOT authorize `/v1/models`, so validation
|
|
7
|
+
* pings the chat completions endpoint with the router id directly.
|
|
8
|
+
* See https://docs.fireworks.ai/firepass.
|
|
9
|
+
*/
|
|
10
|
+
import { createApiKeyLogin } from "./api-key-login";
|
|
11
|
+
|
|
12
|
+
export const loginFirepass = createApiKeyLogin({
|
|
13
|
+
providerLabel: "Fire Pass",
|
|
14
|
+
authUrl: "https://app.fireworks.ai/settings/users/api-keys",
|
|
15
|
+
instructions: "Create a dedicated Fire Pass API key in the Fireworks dashboard",
|
|
16
|
+
promptMessage: "Paste your Fire Pass API key",
|
|
17
|
+
placeholder: "fpk_...",
|
|
18
|
+
validation: {
|
|
19
|
+
kind: "chat-completions",
|
|
20
|
+
provider: "Fire Pass",
|
|
21
|
+
baseUrl: "https://api.fireworks.ai/inference/v1",
|
|
22
|
+
model: "accounts/fireworks/routers/kimi-k2p6-turbo",
|
|
23
|
+
},
|
|
24
|
+
});
|
package/src/utils/oauth/index.ts
CHANGED
|
@@ -55,6 +55,11 @@ const builtInOAuthProviders: OAuthProviderInfo[] = [
|
|
|
55
55
|
name: "Fireworks",
|
|
56
56
|
available: true,
|
|
57
57
|
},
|
|
58
|
+
{
|
|
59
|
+
id: "firepass",
|
|
60
|
+
name: "Fire Pass (Fireworks Kimi K2.6 Turbo subscription)",
|
|
61
|
+
available: true,
|
|
62
|
+
},
|
|
58
63
|
{
|
|
59
64
|
id: "github-copilot",
|
|
60
65
|
name: "GitHub Copilot",
|
|
@@ -301,6 +306,7 @@ export async function refreshOAuthToken(
|
|
|
301
306
|
case "opencode-go":
|
|
302
307
|
case "cerebras":
|
|
303
308
|
case "fireworks":
|
|
309
|
+
case "firepass":
|
|
304
310
|
case "nvidia":
|
|
305
311
|
case "nanogpt":
|
|
306
312
|
case "synthetic":
|
|
@@ -363,10 +369,14 @@ export async function getOAuthApiKey(
|
|
|
363
369
|
}
|
|
364
370
|
|
|
365
371
|
if (provider === "perplexity") {
|
|
372
|
+
// Perplexity JWTs usually omit `exp` (server-side sessions). Trust the JWT
|
|
373
|
+
// claim when present; otherwise treat the credential as non-expiring rather
|
|
374
|
+
// than honoring a stale stored `expires` (older logins wrote loginTime+1h).
|
|
375
|
+
const NEVER_EXPIRES = 8.64e15;
|
|
366
376
|
const normalizedExpires =
|
|
367
377
|
creds.expires > 0 && creds.expires < 10_000_000_000 ? creds.expires * 1000 : creds.expires;
|
|
368
378
|
const jwtExpiry = getPerplexityJwtExpiryMs(creds.access);
|
|
369
|
-
const expires = jwtExpiry
|
|
379
|
+
const expires = jwtExpiry ?? Math.max(normalizedExpires, NEVER_EXPIRES);
|
|
370
380
|
if (expires !== creds.expires) {
|
|
371
381
|
creds = { ...creds, expires };
|
|
372
382
|
}
|
|
@@ -24,20 +24,26 @@ const APP_USER_AGENT = "Perplexity/641 CFNetwork/1568 Darwin/25.2.0";
|
|
|
24
24
|
// JWT helpers
|
|
25
25
|
// ---------------------------------------------------------------------------
|
|
26
26
|
|
|
27
|
-
/**
|
|
27
|
+
/**
|
|
28
|
+
* Extract expiry from a JWT. Perplexity tokens generally lack an `exp` claim
|
|
29
|
+
* (their sessions are server-side and effectively non-expiring from the client's
|
|
30
|
+
* point of view), so we return a far-future sentinel when no `exp` is present.
|
|
31
|
+
* When `exp` IS present, subtract a 5-minute safety margin.
|
|
32
|
+
*/
|
|
33
|
+
const NEVER_EXPIRES = 8.64e15; // max safe Date value
|
|
28
34
|
function getJwtExpiry(token: string): number {
|
|
29
35
|
try {
|
|
30
36
|
const parts = token.split(".");
|
|
31
|
-
if (parts.length !== 3) return
|
|
37
|
+
if (parts.length !== 3) return NEVER_EXPIRES;
|
|
32
38
|
const payload = parts[1] ?? "";
|
|
33
39
|
const decoded = JSON.parse(atob(payload.replace(/-/g, "+").replace(/_/g, "/")));
|
|
34
|
-
if (
|
|
40
|
+
if (typeof decoded?.exp === "number" && Number.isFinite(decoded.exp)) {
|
|
35
41
|
return decoded.exp * 1000 - 5 * 60_000;
|
|
36
42
|
}
|
|
37
43
|
} catch {
|
|
38
44
|
// Ignore decode errors
|
|
39
45
|
}
|
|
40
|
-
return
|
|
46
|
+
return NEVER_EXPIRES;
|
|
41
47
|
}
|
|
42
48
|
|
|
43
49
|
/** Build OAuthCredentials from a Perplexity JWT string. */
|
package/src/utils/oauth/types.ts
CHANGED
|
@@ -243,8 +243,17 @@ function rewriteZodNode(node: JsonObject, seen: WeakSet<object>): unknown {
|
|
|
243
243
|
case "pipe":
|
|
244
244
|
case "transform": {
|
|
245
245
|
const inner = walk(unwrapInnerSchema(def), seen);
|
|
246
|
-
if (kind === "nullable" && isJsonObject(inner)
|
|
247
|
-
|
|
246
|
+
if (kind === "nullable" && isJsonObject(inner)) {
|
|
247
|
+
if (typeof inner.type === "string") {
|
|
248
|
+
return { ...inner, type: [inner.type, "null"] };
|
|
249
|
+
}
|
|
250
|
+
if (Array.isArray(inner.type)) {
|
|
251
|
+
return (inner.type as string[]).includes("null")
|
|
252
|
+
? inner
|
|
253
|
+
: { ...inner, type: [...(inner.type as string[]), "null"] };
|
|
254
|
+
}
|
|
255
|
+
// anyOf / allOf / $ref shapes — no scalar `type` field
|
|
256
|
+
return { anyOf: [inner, { type: "null" }] };
|
|
248
257
|
}
|
|
249
258
|
return inner;
|
|
250
259
|
}
|