@oh-my-pi/pi-ai 15.1.6 → 15.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/dist/types/provider-models/openai-compat.d.ts +12 -0
- package/dist/types/providers/anthropic.d.ts +20 -1
- package/dist/types/types.d.ts +47 -10
- package/dist/types/utils/fireworks-model-id.d.ts +8 -0
- package/dist/types/utils/oauth/firepass.d.ts +1 -0
- package/dist/types/utils/oauth/types.d.ts +1 -1
- package/package.json +2 -2
- package/src/auth-storage.ts +6 -0
- package/src/model-cache.ts +14 -0
- package/src/models.json +27 -0
- package/src/provider-models/descriptors.ts +2 -0
- package/src/provider-models/openai-compat.ts +36 -4
- package/src/providers/anthropic.ts +118 -5
- package/src/providers/openai-chat-server.ts +2 -2
- package/src/providers/openai-codex-responses.ts +4 -3
- package/src/providers/openai-completions-compat.ts +18 -10
- package/src/providers/openai-completions.ts +21 -7
- package/src/providers/openai-responses-shared.ts +5 -1
- package/src/stream.ts +7 -0
- package/src/types.ts +74 -16
- package/src/utils/fireworks-model-id.ts +17 -0
- package/src/utils/oauth/firepass.ts +24 -0
- package/src/utils/oauth/index.ts +11 -1
- package/src/utils/oauth/perplexity.ts +10 -4
- package/src/utils/oauth/types.ts +1 -0
- package/src/utils/schema/zod-decontaminate.ts +11 -2
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,28 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.1.8] - 2026-05-20
|
|
6
|
+
### Added
|
|
7
|
+
|
|
8
|
+
- Added Fireworks Fire Pass as a separate `firepass` provider with API-key login flow, bundled `kimi-k2.6-turbo` model entry (Kimi K2.6 Turbo), and wire-id translation from the friendly catalog id to the `accounts/fireworks/routers/kimi-k2p6-turbo` router endpoint. Fire Pass keys (`fpk_…`) authorize only the dedicated router and reject `/v1/models`, so login validation pings chat completions against the router id directly. Extended the openai-completions Kimi-family safety net so the firepass entry inherits the per-Fireworks-docs "always send `max_tokens`" default ([Kimi K2 guide](https://docs.fireworks.ai/models/kimi-k2)); the router's accepted `reasoning_effort` set includes `xhigh`, so it is forwarded verbatim rather than remapped. See https://docs.fireworks.ai/firepass.
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
|
|
12
|
+
- Fixed DeepSeek V4 direct API requests with tools to keep documented thinking mode instead of dropping reasoning: lower OMP efforts now map to DeepSeek's supported `high`, `tool_choice` is omitted, `thinking: { type: "enabled" }` and `max_tokens` are sent, and partial user `reasoningEffortMap` overrides merge with DeepSeek defaults. ([#1207](https://github.com/can1357/oh-my-pi/issues/1207))
|
|
13
|
+
- Fixed model cache schema v2 databases so offline refreshes preserve cached provider discoveries after upgrading to schema v3 and subsequent online refreshes can overwrite the cache. ([#1219](https://github.com/can1357/oh-my-pi/issues/1219))
|
|
14
|
+
- Fixed Perplexity OAuth credentials being treated as expired one hour after login. `getJwtExpiry` was fabricating `expires = now + 1h` whenever the JWT had no `exp` claim (the common case — Perplexity sessions are server-side). Once the hour elapsed, `getOAuthApiKey` would mark the cred expired and the search provider's loader would silently skip it, surfacing as "logged out". Logins with no `exp` now persist a far-future sentinel; `getOAuthApiKey` also normalizes any stale `expires` written by older builds.
|
|
15
|
+
|
|
16
|
+
## [15.1.7] - 2026-05-19
|
|
17
|
+
### Added
|
|
18
|
+
|
|
19
|
+
- Added Anthropic realization of `serviceTier: "priority"`. The anthropic-messages provider now sets `speed: "fast"` on the request and appends the `fast-mode-2026-02-01` beta to `Anthropic-Beta` whenever the caller passes `serviceTier: "priority"`. When the server rejects an unsupported model with `invalid_request_error`, the provider transparently retries the same turn without the fast-mode signal (mirroring the strict-tools fallback pattern), persists the disable via a new `providerSessionState.fastModeDisabled` flag so subsequent requests in the session skip the field, and surfaces the action via the new `AssistantMessage.disabledFeatures` array (id `"priority"`) so callers can sync user-facing toggles. A new `clearAnthropicFastModeFallback(providerSessionState)` helper lets callers re-arm priority after the auto-fallback fired.
|
|
20
|
+
- Added scoped `ServiceTier` values: `"openai-only"` (priority on `openai`/`openai-codex`, ignored elsewhere) and `"claude-only"` (priority on direct `anthropic`, ignored on Bedrock/Vertex Claude and elsewhere). A new `resolveServiceTier(serviceTier, provider)` helper computes the effective tier for the provider; existing OpenAI/Anthropic provider code routes through it, so `service_tier` and Anthropic fast-mode emission both respect scope. `getPriorityPremiumRequests` now counts Anthropic+priority as one premium request (previously zero) and continues to ignore providers that drop the field on the wire.
|
|
21
|
+
|
|
22
|
+
### Fixed
|
|
23
|
+
|
|
24
|
+
- Fixed Anthropic fast mode (`serviceTier: "priority"`) looping on 429 `rate_limit_error: "Extra usage is required for fast mode."` for accounts without the extra-usage entitlement. `isAnthropicFastModeUnsupportedError` now matches the 429 phrasing in addition to the 400 `invalid_request_error` "does not support the `speed` parameter" case, so the provider drops `speed: "fast"` on the in-turn retry, sets `providerSessionState.fastModeDisabled` for the remainder of the session, and surfaces `disabledFeatures: ["priority"]` to the caller instead of retrying with the same payload until `PROVIDER_MAX_RETRIES` is exhausted.
|
|
25
|
+
- Fixed MiniMax Coding Plan CN streaming `<think>...</think>` reasoning as visible assistant text. The OpenAI-compatible stream parser now enables the existing MiniMax tag parser for both `minimax-code` and `minimax-code-cn`, so CN responses become structured `thinking` blocks instead of raw text. ([#1203](https://github.com/can1357/oh-my-pi/issues/1203))
|
|
26
|
+
|
|
5
27
|
## [15.1.6] - 2026-05-19
|
|
6
28
|
|
|
7
29
|
### Fixed
|
|
@@ -63,6 +63,18 @@ export interface FireworksModelManagerConfig {
|
|
|
63
63
|
baseUrl?: string;
|
|
64
64
|
}
|
|
65
65
|
export declare function fireworksModelManagerOptions(config?: FireworksModelManagerConfig): ModelManagerOptions<"openai-completions">;
|
|
66
|
+
export interface FirepassModelManagerConfig {
|
|
67
|
+
apiKey?: string;
|
|
68
|
+
baseUrl?: string;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Fire Pass is a Fireworks subscription product that exposes a single router
|
|
72
|
+
* model (Kimi K2.6 Turbo) under `accounts/fireworks/routers/kimi-k2p6-turbo`.
|
|
73
|
+
* The dedicated `fpk_…` keys do not authorize `/v1/models`, so this manager
|
|
74
|
+
* never performs dynamic discovery — the bundled catalog entry is canonical.
|
|
75
|
+
* See https://docs.fireworks.ai/firepass.
|
|
76
|
+
*/
|
|
77
|
+
export declare function firepassModelManagerOptions(_config?: FirepassModelManagerConfig): ModelManagerOptions<"openai-completions">;
|
|
66
78
|
export interface MistralModelManagerConfig {
|
|
67
79
|
apiKey?: string;
|
|
68
80
|
baseUrl?: string;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
|
|
2
2
|
import type { MessageParam } from "@anthropic-ai/sdk/resources/messages";
|
|
3
|
-
import type { FetchImpl, Message, Model, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
|
|
3
|
+
import type { FetchImpl, Message, Model, ProviderSessionState, ServiceTier, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
|
|
4
4
|
export type AnthropicHeaderOptions = {
|
|
5
5
|
apiKey: string;
|
|
6
6
|
baseUrl?: string;
|
|
@@ -17,6 +17,15 @@ type AnthropicCacheControl = {
|
|
|
17
17
|
type: "ephemeral";
|
|
18
18
|
ttl?: "1h" | "5m";
|
|
19
19
|
};
|
|
20
|
+
/**
|
|
21
|
+
* Clears the in-session "server rejected fast mode" sticky flag. Call when the
|
|
22
|
+
* caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
|
|
23
|
+
* `/fast on` after a previous turn auto-disabled it) so the next request
|
|
24
|
+
* actually carries `speed: "fast"` again. No-op when the map or state entry
|
|
25
|
+
* hasn't been materialized yet.
|
|
26
|
+
*/
|
|
27
|
+
export declare function clearAnthropicFastModeFallback(providerSessionState: Map<string, ProviderSessionState> | undefined): void;
|
|
28
|
+
export declare function isAnthropicFastModeUnsupportedError(error: unknown): boolean;
|
|
20
29
|
export declare const claudeCodeVersion = "2.1.63";
|
|
21
30
|
export declare const claudeToolPrefix: string;
|
|
22
31
|
export declare const claudeCodeSystemInstruction = "You are a Claude agent, built on Anthropic's Claude Agent SDK.";
|
|
@@ -77,6 +86,16 @@ export interface AnthropicOptions extends StreamOptions {
|
|
|
77
86
|
name: string;
|
|
78
87
|
};
|
|
79
88
|
betas?: string[] | string;
|
|
89
|
+
/**
|
|
90
|
+
* Realization of `serviceTier: "priority"` on Anthropic models. When
|
|
91
|
+
* `"priority"`, sets `speed: "fast"` on the request and appends the
|
|
92
|
+
* `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
|
|
93
|
+
* with `invalid_request_error`, which triggers an in-provider one-shot
|
|
94
|
+
* fallback (see `fastModeDisabled` provider state).
|
|
95
|
+
*
|
|
96
|
+
* Other `ServiceTier` values are currently ignored on this provider.
|
|
97
|
+
*/
|
|
98
|
+
serviceTier?: ServiceTier;
|
|
80
99
|
/** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
|
|
81
100
|
isOAuth?: boolean;
|
|
82
101
|
/**
|
package/dist/types/types.d.ts
CHANGED
|
@@ -48,7 +48,7 @@ export interface ThinkingConfig {
|
|
|
48
48
|
/** Provider-specific transport used to encode the selected effort. */
|
|
49
49
|
mode: ThinkingControlMode;
|
|
50
50
|
}
|
|
51
|
-
export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
|
|
51
|
+
export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
|
|
52
52
|
export type Provider = KnownProvider | string;
|
|
53
53
|
import type { Effort } from "./model-thinking";
|
|
54
54
|
/** Token budgets for each thinking level (token-based providers only) */
|
|
@@ -69,18 +69,47 @@ export type ToolChoice = "auto" | "none" | "any" | "required" | {
|
|
|
69
69
|
name: string;
|
|
70
70
|
};
|
|
71
71
|
export type CacheRetention = "none" | "short" | "long";
|
|
72
|
-
/** OpenAI service tier for processing priority. Only applies to OpenAI-compatible APIs. */
|
|
73
|
-
export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority";
|
|
74
|
-
export declare function shouldSendServiceTier(serviceTier?: ServiceTier | null, provider?: Provider): serviceTier is "flex" | "scale" | "priority";
|
|
75
72
|
/**
|
|
76
|
-
*
|
|
77
|
-
* a provider that supports it. Mirrors GitHub Copilot's `premiumRequests`
|
|
78
|
-
* accounting so the "premium requests" stat aggregates priority traffic too.
|
|
73
|
+
* Service tier hint for processing priority / cost control.
|
|
79
74
|
*
|
|
80
|
-
*
|
|
81
|
-
* `
|
|
75
|
+
* The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
|
|
76
|
+
* `"priority"`) are passed through to providers that understand them
|
|
77
|
+
* (OpenAI's `service_tier` field directly; Anthropic translates
|
|
78
|
+
* `"priority"` into `speed: "fast"` on supported Opus models).
|
|
79
|
+
*
|
|
80
|
+
* The scoped values target a specific provider family and behave as the
|
|
81
|
+
* unscoped value on the matching provider, or `undefined` everywhere else.
|
|
82
|
+
* They let users opt into priority on one family without paying premium
|
|
83
|
+
* costs on the other when switching models mid-session.
|
|
84
|
+
*
|
|
85
|
+
* - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
|
|
86
|
+
* - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
|
|
87
|
+
*/
|
|
88
|
+
export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
|
|
89
|
+
/** Resolved tier — one of the values that providers actually consume on the wire. */
|
|
90
|
+
export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
|
|
91
|
+
/**
|
|
92
|
+
* Resolves a possibly scoped `ServiceTier` to the effective tier for the
|
|
93
|
+
* given provider. Scoped values match their target family and otherwise
|
|
94
|
+
* collapse to `undefined`; unscoped values pass through unchanged.
|
|
95
|
+
*/
|
|
96
|
+
export declare function resolveServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): ResolvedServiceTier | undefined;
|
|
97
|
+
/**
|
|
98
|
+
* True when the (possibly scoped) tier should be sent as OpenAI's
|
|
99
|
+
* `service_tier` request field for the given provider. Non-OpenAI
|
|
100
|
+
* providers, unsupported tiers (`"auto"`, `"default"`), and scope
|
|
101
|
+
* mismatches all return false.
|
|
82
102
|
*/
|
|
83
|
-
export declare function
|
|
103
|
+
export declare function shouldSendServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): boolean;
|
|
104
|
+
/**
|
|
105
|
+
* Premium-request weight contributed by sending priority to a provider
|
|
106
|
+
* that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
|
|
107
|
+
* so the "premium requests" stat aggregates priority traffic across the
|
|
108
|
+
* OpenAI family and Anthropic fast-mode realizations.
|
|
109
|
+
*
|
|
110
|
+
* Returns 1 per resolved priority request, 0 otherwise.
|
|
111
|
+
*/
|
|
112
|
+
export declare function getPriorityPremiumRequests(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): number;
|
|
84
113
|
export interface ProviderSessionState {
|
|
85
114
|
close(): void;
|
|
86
115
|
}
|
|
@@ -371,6 +400,14 @@ export interface AssistantMessage {
|
|
|
371
400
|
errorMessage?: string;
|
|
372
401
|
/** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
|
|
373
402
|
errorStatus?: number;
|
|
403
|
+
/**
|
|
404
|
+
* Stable identifiers for request features the provider silently dropped
|
|
405
|
+
* during this turn (e.g. `"priority"`). Set when a server-side rejection
|
|
406
|
+
* triggered an in-provider fallback retry that succeeded without the
|
|
407
|
+
* feature. Callers can use this to sync user-facing toggles back to the
|
|
408
|
+
* server's actual state.
|
|
409
|
+
*/
|
|
410
|
+
disabledFeatures?: string[];
|
|
374
411
|
/** Provider-specific opaque payload used to reconstruct transport-native history. */
|
|
375
412
|
providerPayload?: ProviderPayload;
|
|
376
413
|
timestamp: number;
|
|
@@ -1,2 +1,10 @@
|
|
|
1
1
|
export declare function toFireworksPublicModelId(modelId: string): string;
|
|
2
2
|
export declare function toFireworksWireModelId(modelId: string): string;
|
|
3
|
+
/**
|
|
4
|
+
* Fire Pass exposes its Kimi K2.6 Turbo subscription through a dedicated router
|
|
5
|
+
* endpoint at `accounts/fireworks/routers/<id>` rather than the `models/` namespace.
|
|
6
|
+
* We keep a friendly public id (e.g. `kimi-k2.6-turbo`) in the catalog and translate
|
|
7
|
+
* to the wire form (`accounts/fireworks/routers/kimi-k2p6-turbo`) at request time.
|
|
8
|
+
*/
|
|
9
|
+
export declare function toFirepassPublicModelId(modelId: string): string;
|
|
10
|
+
export declare function toFirepassWireModelId(modelId: string): string;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const loginFirepass: (options: import("./types").OAuthController) => Promise<string>;
|
|
@@ -7,7 +7,7 @@ export type OAuthCredentials = {
|
|
|
7
7
|
email?: string;
|
|
8
8
|
accountId?: string;
|
|
9
9
|
};
|
|
10
|
-
export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "fireworks" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
|
|
10
|
+
export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai";
|
|
11
11
|
export type OAuthProviderId = OAuthProvider | (string & {});
|
|
12
12
|
export type OAuthPrompt = {
|
|
13
13
|
message: string;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "15.1.
|
|
4
|
+
"version": "15.1.8",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -43,7 +43,7 @@
|
|
|
43
43
|
"dependencies": {
|
|
44
44
|
"@anthropic-ai/sdk": "^0.94.0",
|
|
45
45
|
"@bufbuild/protobuf": "^2.12.0",
|
|
46
|
-
"@oh-my-pi/pi-utils": "15.1.
|
|
46
|
+
"@oh-my-pi/pi-utils": "15.1.8",
|
|
47
47
|
"openai": "^6.36.0",
|
|
48
48
|
"partial-json": "^0.1.7",
|
|
49
49
|
"zod": "4.4.3"
|
package/src/auth-storage.ts
CHANGED
|
@@ -1344,6 +1344,12 @@ export class AuthStorage {
|
|
|
1344
1344
|
await saveApiKeyCredential(apiKey);
|
|
1345
1345
|
return;
|
|
1346
1346
|
}
|
|
1347
|
+
case "firepass": {
|
|
1348
|
+
const { loginFirepass } = await import("./utils/oauth/firepass");
|
|
1349
|
+
const apiKey = await loginFirepass(ctrl);
|
|
1350
|
+
await saveApiKeyCredential(apiKey);
|
|
1351
|
+
return;
|
|
1352
|
+
}
|
|
1347
1353
|
case "zai": {
|
|
1348
1354
|
const { loginZai } = await import("./utils/oauth/zai");
|
|
1349
1355
|
const apiKey = await loginZai(ctrl);
|
package/src/model-cache.ts
CHANGED
|
@@ -17,6 +17,10 @@ interface CacheRow {
|
|
|
17
17
|
models: string;
|
|
18
18
|
}
|
|
19
19
|
|
|
20
|
+
interface TableInfoRow {
|
|
21
|
+
name: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
20
24
|
interface CacheEntry<TApi extends Api = Api> {
|
|
21
25
|
models: Model<TApi>[];
|
|
22
26
|
fresh: boolean;
|
|
@@ -55,11 +59,21 @@ function getDb(dbPath?: string): Database {
|
|
|
55
59
|
models TEXT NOT NULL
|
|
56
60
|
)
|
|
57
61
|
`);
|
|
62
|
+
migrateCacheSchema(db);
|
|
63
|
+
|
|
58
64
|
sharedDb = db;
|
|
59
65
|
sharedDbPath = resolvedPath;
|
|
60
66
|
return db;
|
|
61
67
|
}
|
|
62
68
|
|
|
69
|
+
function migrateCacheSchema(db: Database): void {
|
|
70
|
+
const columns = db.prepare("PRAGMA table_info(model_cache)").all() as TableInfoRow[];
|
|
71
|
+
if (!columns.some(column => column.name === "static_fingerprint")) {
|
|
72
|
+
db.run("ALTER TABLE model_cache ADD COLUMN static_fingerprint TEXT NOT NULL DEFAULT ''");
|
|
73
|
+
}
|
|
74
|
+
db.run("UPDATE model_cache SET version = ? WHERE version = 2", [CACHE_SCHEMA_VERSION]);
|
|
75
|
+
}
|
|
76
|
+
|
|
63
77
|
export function readModelCache<TApi extends Api>(
|
|
64
78
|
providerId: string,
|
|
65
79
|
ttlMs: number,
|
package/src/models.json
CHANGED
|
@@ -5027,6 +5027,33 @@
|
|
|
5027
5027
|
}
|
|
5028
5028
|
}
|
|
5029
5029
|
},
|
|
5030
|
+
"firepass": {
|
|
5031
|
+
"kimi-k2.6-turbo": {
|
|
5032
|
+
"id": "kimi-k2.6-turbo",
|
|
5033
|
+
"name": "Kimi K2.6 Turbo (Fire Pass)",
|
|
5034
|
+
"api": "openai-completions",
|
|
5035
|
+
"provider": "firepass",
|
|
5036
|
+
"baseUrl": "https://api.fireworks.ai/inference/v1",
|
|
5037
|
+
"reasoning": true,
|
|
5038
|
+
"input": [
|
|
5039
|
+
"text",
|
|
5040
|
+
"image"
|
|
5041
|
+
],
|
|
5042
|
+
"cost": {
|
|
5043
|
+
"input": 0,
|
|
5044
|
+
"output": 0,
|
|
5045
|
+
"cacheRead": 0,
|
|
5046
|
+
"cacheWrite": 0
|
|
5047
|
+
},
|
|
5048
|
+
"contextWindow": 262144,
|
|
5049
|
+
"maxTokens": 65536,
|
|
5050
|
+
"thinking": {
|
|
5051
|
+
"mode": "effort",
|
|
5052
|
+
"minLevel": "minimal",
|
|
5053
|
+
"maxLevel": "xhigh"
|
|
5054
|
+
}
|
|
5055
|
+
}
|
|
5056
|
+
},
|
|
5030
5057
|
"fireworks": {
|
|
5031
5058
|
"deepseek-v4-pro": {
|
|
5032
5059
|
"id": "deepseek-v4-pro",
|
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
cerebrasModelManagerOptions,
|
|
15
15
|
cloudflareAiGatewayModelManagerOptions,
|
|
16
16
|
deepseekModelManagerOptions,
|
|
17
|
+
firepassModelManagerOptions,
|
|
17
18
|
fireworksModelManagerOptions,
|
|
18
19
|
githubCopilotModelManagerOptions,
|
|
19
20
|
groqModelManagerOptions,
|
|
@@ -152,6 +153,7 @@ export const PROVIDER_DESCRIPTORS: readonly ProviderDescriptor[] = [
|
|
|
152
153
|
config => fireworksModelManagerOptions(config),
|
|
153
154
|
catalog("Fireworks", ["FIREWORKS_API_KEY"]),
|
|
154
155
|
),
|
|
156
|
+
descriptor("firepass", "kimi-k2.6-turbo", config => firepassModelManagerOptions(config)),
|
|
155
157
|
descriptor("xai", "grok-4-fast-non-reasoning", config => xaiModelManagerOptions(config)),
|
|
156
158
|
catalogDescriptor(
|
|
157
159
|
"deepseek",
|
|
@@ -692,6 +692,30 @@ export function fireworksModelManagerOptions(
|
|
|
692
692
|
};
|
|
693
693
|
}
|
|
694
694
|
|
|
695
|
+
// ---------------------------------------------------------------------------
|
|
696
|
+
// 7.6 Fire Pass (Fireworks Kimi K2.6 Turbo subscription)
|
|
697
|
+
// ---------------------------------------------------------------------------
|
|
698
|
+
|
|
699
|
+
export interface FirepassModelManagerConfig {
|
|
700
|
+
apiKey?: string;
|
|
701
|
+
baseUrl?: string;
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
/**
|
|
705
|
+
* Fire Pass is a Fireworks subscription product that exposes a single router
|
|
706
|
+
* model (Kimi K2.6 Turbo) under `accounts/fireworks/routers/kimi-k2p6-turbo`.
|
|
707
|
+
* The dedicated `fpk_…` keys do not authorize `/v1/models`, so this manager
|
|
708
|
+
* never performs dynamic discovery — the bundled catalog entry is canonical.
|
|
709
|
+
* See https://docs.fireworks.ai/firepass.
|
|
710
|
+
*/
|
|
711
|
+
export function firepassModelManagerOptions(
|
|
712
|
+
_config?: FirepassModelManagerConfig,
|
|
713
|
+
): ModelManagerOptions<"openai-completions"> {
|
|
714
|
+
return {
|
|
715
|
+
providerId: "firepass",
|
|
716
|
+
};
|
|
717
|
+
}
|
|
718
|
+
|
|
695
719
|
// ---------------------------------------------------------------------------
|
|
696
720
|
// 7. Mistral
|
|
697
721
|
// ---------------------------------------------------------------------------
|
|
@@ -2083,18 +2107,26 @@ const MODELS_DEV_PROVIDER_DESCRIPTORS_CORE: readonly ModelsDevProviderDescriptor
|
|
|
2083
2107
|
// ids are kept off the catalog until the issue thread asks for them.
|
|
2084
2108
|
filterModel: (id, m) => m.tool_call === true && id.startsWith("deepseek-v4"),
|
|
2085
2109
|
compat: {
|
|
2086
|
-
//
|
|
2110
|
+
// DeepSeek V4 only accepts `high`/`max`; map lower OMP levels upward so
|
|
2111
|
+
// subagent "minimal" turns stay in documented thinking mode instead of
|
|
2112
|
+
// sending unsupported effort strings.
|
|
2113
|
+
supportsDeveloperRole: false,
|
|
2087
2114
|
supportsReasoningEffort: true,
|
|
2088
|
-
reasoningEffortMap: { xhigh: "max" },
|
|
2089
|
-
|
|
2090
|
-
//
|
|
2115
|
+
reasoningEffortMap: { minimal: "high", low: "high", medium: "high", high: "high", xhigh: "max" },
|
|
2116
|
+
maxTokensField: "max_tokens",
|
|
2117
|
+
// DeepSeek V4 thinking mode rejects the `tool_choice` control parameter.
|
|
2118
|
+
// Tool calls still work without it; the API defaults to auto when tools exist.
|
|
2091
2119
|
supportsToolChoice: false,
|
|
2120
|
+
// DeepSeek V4's OpenAI format docs enable thinking with both the toggle and
|
|
2121
|
+
// reasoning_effort. Keep the toggle explicit for built-in models.
|
|
2122
|
+
extraBody: { thinking: { type: "enabled" } },
|
|
2092
2123
|
// DeepSeek emits chain-of-thought via `reasoning_content` and requires it
|
|
2093
2124
|
// to round-trip on assistant tool-call messages so the model can resume
|
|
2094
2125
|
// from prior thinking (interleaved.field=reasoning_content on models.dev,
|
|
2095
2126
|
// matches the kimi/openrouter handling already in detectCompat).
|
|
2096
2127
|
reasoningContentField: "reasoning_content",
|
|
2097
2128
|
requiresReasoningContentForToolCalls: true,
|
|
2129
|
+
requiresAssistantContentForToolCalls: true,
|
|
2098
2130
|
},
|
|
2099
2131
|
}),
|
|
2100
2132
|
];
|
|
@@ -32,6 +32,7 @@ import type {
|
|
|
32
32
|
Model,
|
|
33
33
|
ProviderSessionState,
|
|
34
34
|
RedactedThinkingContent,
|
|
35
|
+
ServiceTier,
|
|
35
36
|
SimpleStreamOptions,
|
|
36
37
|
StopReason,
|
|
37
38
|
StreamFunction,
|
|
@@ -43,6 +44,7 @@ import type {
|
|
|
43
44
|
ToolResultMessage,
|
|
44
45
|
Usage,
|
|
45
46
|
} from "../types";
|
|
47
|
+
import { resolveServiceTier } from "../types";
|
|
46
48
|
import {
|
|
47
49
|
isAnthropicOAuthToken,
|
|
48
50
|
isRecord,
|
|
@@ -111,6 +113,7 @@ const claudeCodeBetaDefaults = [
|
|
|
111
113
|
];
|
|
112
114
|
const fineGrainedToolStreamingBeta = "fine-grained-tool-streaming-2025-05-14";
|
|
113
115
|
const interleavedThinkingBeta = "interleaved-thinking-2025-05-14";
|
|
116
|
+
const fastModeBeta = "fast-mode-2026-02-01";
|
|
114
117
|
|
|
115
118
|
function getHeaderCaseInsensitive(headers: Record<string, string> | undefined, headerName: string): string | undefined {
|
|
116
119
|
if (!headers) return undefined;
|
|
@@ -224,13 +227,16 @@ const ANTHROPIC_PROVIDER_SESSION_STATE_KEY = "anthropic-messages";
|
|
|
224
227
|
|
|
225
228
|
type AnthropicProviderSessionState = ProviderSessionState & {
|
|
226
229
|
strictToolsDisabled: boolean;
|
|
230
|
+
fastModeDisabled: boolean;
|
|
227
231
|
};
|
|
228
232
|
|
|
229
233
|
function createAnthropicProviderSessionState(): AnthropicProviderSessionState {
|
|
230
234
|
const state: AnthropicProviderSessionState = {
|
|
231
235
|
strictToolsDisabled: false,
|
|
236
|
+
fastModeDisabled: false,
|
|
232
237
|
close: () => {
|
|
233
238
|
state.strictToolsDisabled = false;
|
|
239
|
+
state.fastModeDisabled = false;
|
|
234
240
|
},
|
|
235
241
|
};
|
|
236
242
|
return state;
|
|
@@ -249,6 +255,23 @@ function getAnthropicProviderSessionState(
|
|
|
249
255
|
return created;
|
|
250
256
|
}
|
|
251
257
|
|
|
258
|
+
/**
|
|
259
|
+
* Clears the in-session "server rejected fast mode" sticky flag. Call when the
|
|
260
|
+
* caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
|
|
261
|
+
* `/fast on` after a previous turn auto-disabled it) so the next request
|
|
262
|
+
* actually carries `speed: "fast"` again. No-op when the map or state entry
|
|
263
|
+
* hasn't been materialized yet.
|
|
264
|
+
*/
|
|
265
|
+
export function clearAnthropicFastModeFallback(
|
|
266
|
+
providerSessionState: Map<string, ProviderSessionState> | undefined,
|
|
267
|
+
): void {
|
|
268
|
+
if (!providerSessionState) return;
|
|
269
|
+
const state = providerSessionState.get(ANTHROPIC_PROVIDER_SESSION_STATE_KEY) as
|
|
270
|
+
| AnthropicProviderSessionState
|
|
271
|
+
| undefined;
|
|
272
|
+
if (state) state.fastModeDisabled = false;
|
|
273
|
+
}
|
|
274
|
+
|
|
252
275
|
function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
|
|
253
276
|
if (extractHttpStatusFromError(error) !== 400) return false;
|
|
254
277
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -258,11 +281,45 @@ function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
|
|
|
258
281
|
return /invalid_request_error/i.test(message) && (isStrictGrammarTooLarge || isSchemaCompilationTooComplex);
|
|
259
282
|
}
|
|
260
283
|
|
|
284
|
+
export function isAnthropicFastModeUnsupportedError(error: unknown): boolean {
|
|
285
|
+
const status = extractHttpStatusFromError(error);
|
|
286
|
+
if (status !== 400 && status !== 429) return false;
|
|
287
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
288
|
+
// 400 invalid_request_error — model doesn't accept `speed` at all.
|
|
289
|
+
// Observed: "'claude-opus-4-5-20251101' does not support the `speed` parameter."
|
|
290
|
+
// Stay tolerant of phrasing drift ("is not supported", quoted vs backticked field).
|
|
291
|
+
if (
|
|
292
|
+
status === 400 &&
|
|
293
|
+
/invalid_request_error/i.test(message) &&
|
|
294
|
+
/\bspeed\b/i.test(message) &&
|
|
295
|
+
/not support/i.test(message)
|
|
296
|
+
) {
|
|
297
|
+
return true;
|
|
298
|
+
}
|
|
299
|
+
// 429 rate_limit_error — account lacks the extra-usage entitlement fast mode requires.
|
|
300
|
+
// Observed: "Extra usage is required for fast mode."
|
|
301
|
+
if (status === 429 && /rate_limit_error/i.test(message) && /fast mode/i.test(message)) {
|
|
302
|
+
return true;
|
|
303
|
+
}
|
|
304
|
+
return false;
|
|
305
|
+
}
|
|
306
|
+
|
|
261
307
|
function hasStrictAnthropicTools(params: MessageCreateParamsStreaming): boolean {
|
|
262
308
|
const tools = params.tools as Array<{ strict?: unknown }> | undefined;
|
|
263
309
|
return tools?.some(tool => tool.strict === true) ?? false;
|
|
264
310
|
}
|
|
265
311
|
|
|
312
|
+
/**
|
|
313
|
+
* `speed` lives on `BetaMessageCreateParams` (client.beta.messages) but this
|
|
314
|
+
* provider posts via `client.messages.create`, whose param type doesn't
|
|
315
|
+
* include it. This alias narrows the cast to one place.
|
|
316
|
+
*/
|
|
317
|
+
type ParamsWithSpeed = MessageCreateParamsStreaming & { speed?: "fast" };
|
|
318
|
+
|
|
319
|
+
function dropAnthropicFastMode(params: MessageCreateParamsStreaming): void {
|
|
320
|
+
delete (params as ParamsWithSpeed).speed;
|
|
321
|
+
}
|
|
322
|
+
|
|
266
323
|
function dropAnthropicStrictTools(params: MessageCreateParamsStreaming): void {
|
|
267
324
|
const tools = params.tools as Array<{ strict?: unknown }> | undefined;
|
|
268
325
|
if (!tools) return;
|
|
@@ -526,6 +583,16 @@ export interface AnthropicOptions extends StreamOptions {
|
|
|
526
583
|
interleavedThinking?: boolean;
|
|
527
584
|
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
|
528
585
|
betas?: string[] | string;
|
|
586
|
+
/**
|
|
587
|
+
* Realization of `serviceTier: "priority"` on Anthropic models. When
|
|
588
|
+
* `"priority"`, sets `speed: "fast"` on the request and appends the
|
|
589
|
+
* `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
|
|
590
|
+
* with `invalid_request_error`, which triggers an in-provider one-shot
|
|
591
|
+
* fallback (see `fastModeDisabled` provider state).
|
|
592
|
+
*
|
|
593
|
+
* Other `ServiceTier` values are currently ignored on this provider.
|
|
594
|
+
*/
|
|
595
|
+
serviceTier?: ServiceTier;
|
|
529
596
|
/** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
|
|
530
597
|
isOAuth?: boolean;
|
|
531
598
|
/**
|
|
@@ -961,10 +1028,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
961
1028
|
} else {
|
|
962
1029
|
const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
|
|
963
1030
|
|
|
1031
|
+
const extraBetas = normalizeExtraBetas(options?.betas);
|
|
1032
|
+
const wantsAnthropicPriority = resolveServiceTier(options?.serviceTier, model.provider) === "priority";
|
|
1033
|
+
if (wantsAnthropicPriority && !extraBetas.includes(fastModeBeta)) {
|
|
1034
|
+
extraBetas.push(fastModeBeta);
|
|
1035
|
+
}
|
|
1036
|
+
|
|
964
1037
|
const created = createClient(model, {
|
|
965
1038
|
model,
|
|
966
1039
|
apiKey,
|
|
967
|
-
extraBetas
|
|
1040
|
+
extraBetas,
|
|
968
1041
|
stream: true,
|
|
969
1042
|
interleavedThinking: options?.interleavedThinking ?? true,
|
|
970
1043
|
headers: options?.headers,
|
|
@@ -984,15 +1057,19 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
984
1057
|
let disableStrictTools =
|
|
985
1058
|
(providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
|
|
986
1059
|
let strictFallbackErrorMessage: string | undefined;
|
|
1060
|
+
let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
|
|
987
1061
|
const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
|
|
988
1062
|
let nextParams = buildParams(model, baseUrl, context, isOAuthToken, options, disableStrictTools);
|
|
1063
|
+
if (disableStrictTools) {
|
|
1064
|
+
dropAnthropicStrictTools(nextParams);
|
|
1065
|
+
}
|
|
1066
|
+
if (dropFastMode) {
|
|
1067
|
+
dropAnthropicFastMode(nextParams);
|
|
1068
|
+
}
|
|
989
1069
|
const replacementPayload = await options?.onPayload?.(nextParams, model);
|
|
990
1070
|
if (replacementPayload !== undefined) {
|
|
991
1071
|
nextParams = replacementPayload as typeof nextParams;
|
|
992
1072
|
}
|
|
993
|
-
if (disableStrictTools) {
|
|
994
|
-
dropAnthropicStrictTools(nextParams);
|
|
995
|
-
}
|
|
996
1073
|
rawRequestDump = {
|
|
997
1074
|
provider: model.provider,
|
|
998
1075
|
api: output.api,
|
|
@@ -1284,6 +1361,30 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1284
1361
|
firstTokenTime = undefined;
|
|
1285
1362
|
continue;
|
|
1286
1363
|
}
|
|
1364
|
+
if (
|
|
1365
|
+
!dropFastMode &&
|
|
1366
|
+
resolveServiceTier(options?.serviceTier, model.provider) === "priority" &&
|
|
1367
|
+
firstTokenTime === undefined &&
|
|
1368
|
+
isAnthropicFastModeUnsupportedError(streamFailure)
|
|
1369
|
+
) {
|
|
1370
|
+
logger.debug("anthropic: fast mode unsupported, retrying without speed", {
|
|
1371
|
+
model: model.id,
|
|
1372
|
+
error: streamFailure instanceof Error ? streamFailure.message : String(streamFailure),
|
|
1373
|
+
});
|
|
1374
|
+
if (providerSessionState) {
|
|
1375
|
+
providerSessionState.fastModeDisabled = true;
|
|
1376
|
+
}
|
|
1377
|
+
dropFastMode = true;
|
|
1378
|
+
params = await prepareParams();
|
|
1379
|
+
providerRetryAttempt = 0;
|
|
1380
|
+
output.content.length = 0;
|
|
1381
|
+
output.responseId = undefined;
|
|
1382
|
+
output.providerPayload = undefined;
|
|
1383
|
+
output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
|
|
1384
|
+
output.stopReason = "stop";
|
|
1385
|
+
firstTokenTime = undefined;
|
|
1386
|
+
continue;
|
|
1387
|
+
}
|
|
1287
1388
|
const isTransientEnvelopeFailure =
|
|
1288
1389
|
isTransientStreamParseError(streamFailure) || isTransientStreamEnvelopeError(streamFailure);
|
|
1289
1390
|
const canRetryTransientEnvelopeFailure = isTransientEnvelopeFailure && !streamedReplayUnsafeContent;
|
|
@@ -1315,6 +1416,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1315
1416
|
|
|
1316
1417
|
output.duration = Date.now() - startTime;
|
|
1317
1418
|
if (firstTokenTime) output.ttft = firstTokenTime - startTime;
|
|
1419
|
+
if (dropFastMode && resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
|
|
1420
|
+
output.disabledFeatures = [...(output.disabledFeatures ?? []), "priority"];
|
|
1421
|
+
}
|
|
1318
1422
|
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
1319
1423
|
stream.end();
|
|
1320
1424
|
} catch (error) {
|
|
@@ -1862,6 +1966,10 @@ function buildParams(
|
|
|
1862
1966
|
params.metadata = { user_id: metadataUserId };
|
|
1863
1967
|
}
|
|
1864
1968
|
|
|
1969
|
+
if (resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
|
|
1970
|
+
(params as ParamsWithSpeed).speed = "fast";
|
|
1971
|
+
}
|
|
1972
|
+
|
|
1865
1973
|
if (options?.toolChoice) {
|
|
1866
1974
|
if (typeof options.toolChoice === "string") {
|
|
1867
1975
|
params.tool_choice = { type: options.toolChoice };
|
|
@@ -2280,7 +2388,12 @@ export function normalizeAnthropicToolSchema(schema: unknown): unknown {
|
|
|
2280
2388
|
result.properties = normalizedProperties;
|
|
2281
2389
|
}
|
|
2282
2390
|
if (isRecord(result.additionalProperties)) {
|
|
2283
|
-
|
|
2391
|
+
const normalized = normalizeAnthropicToolSchema(result.additionalProperties);
|
|
2392
|
+
if (isRecord(normalized) && Object.keys(normalized).length === 0) {
|
|
2393
|
+
result.additionalProperties = true;
|
|
2394
|
+
} else {
|
|
2395
|
+
result.additionalProperties = normalized;
|
|
2396
|
+
}
|
|
2284
2397
|
}
|
|
2285
2398
|
if (Array.isArray(result.items)) {
|
|
2286
2399
|
result.items = result.items.map(item => normalizeAnthropicToolSchema(item));
|
|
@@ -11,7 +11,7 @@ import type {
|
|
|
11
11
|
Context,
|
|
12
12
|
ImageContent,
|
|
13
13
|
Message,
|
|
14
|
-
|
|
14
|
+
ResolvedServiceTier,
|
|
15
15
|
StopReason,
|
|
16
16
|
TextContent,
|
|
17
17
|
Tool,
|
|
@@ -36,7 +36,7 @@ function isReasoningEffort(value: unknown): value is ReasoningEffort {
|
|
|
36
36
|
return value === "minimal" || value === "low" || value === "medium" || value === "high" || value === "xhigh";
|
|
37
37
|
}
|
|
38
38
|
|
|
39
|
-
function isServiceTier(value: unknown): value is
|
|
39
|
+
function isServiceTier(value: unknown): value is ResolvedServiceTier {
|
|
40
40
|
return value === "auto" || value === "default" || value === "flex" || value === "scale" || value === "priority";
|
|
41
41
|
}
|
|
42
42
|
|
|
@@ -29,10 +29,10 @@ import {
|
|
|
29
29
|
type FetchImpl,
|
|
30
30
|
type Model,
|
|
31
31
|
type ProviderSessionState,
|
|
32
|
+
resolveServiceTier,
|
|
32
33
|
type ServiceTier,
|
|
33
34
|
type StreamFunction,
|
|
34
35
|
type StreamOptions,
|
|
35
|
-
shouldSendServiceTier,
|
|
36
36
|
type TextContent,
|
|
37
37
|
type ThinkingContent,
|
|
38
38
|
type Tool,
|
|
@@ -590,8 +590,9 @@ async function buildTransformedCodexRequestBody(
|
|
|
590
590
|
if (options?.repetitionPenalty !== undefined) {
|
|
591
591
|
params.repetition_penalty = options.repetitionPenalty;
|
|
592
592
|
}
|
|
593
|
-
|
|
594
|
-
|
|
593
|
+
const resolvedServiceTier = resolveServiceTier(options?.serviceTier, model.provider);
|
|
594
|
+
if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
|
|
595
|
+
params.service_tier = resolvedServiceTier;
|
|
595
596
|
}
|
|
596
597
|
if (context.tools && context.tools.length > 0) {
|
|
597
598
|
params.tools = convertOpenAICodexResponsesTools(context.tools, model);
|
|
@@ -52,7 +52,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
52
52
|
const isCerebras = provider === "cerebras" || baseUrl.includes("cerebras.ai");
|
|
53
53
|
const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
|
|
54
54
|
const isKilo = provider === "kilo" || baseUrl.includes("api.kilo.ai");
|
|
55
|
-
const isKimiModel = model.id.includes("moonshotai/kimi") ||
|
|
55
|
+
const isKimiModel = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
|
|
56
56
|
const isMoonshotKimi =
|
|
57
57
|
isKimiModel &&
|
|
58
58
|
(provider === "moonshot" ||
|
|
@@ -79,7 +79,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
79
79
|
baseUrl.includes("deepseek.com") ||
|
|
80
80
|
lowerId.includes("deepseek") ||
|
|
81
81
|
lowerName.includes("deepseek");
|
|
82
|
-
|
|
82
|
+
const isDirectDeepseekApi = provider === "deepseek" || baseUrl.includes("api.deepseek.com");
|
|
83
|
+
const isDirectDeepseekReasoning = isDirectDeepseekApi && isDeepseekFamily && Boolean(model.reasoning);
|
|
83
84
|
const isNonStandard =
|
|
84
85
|
isCerebras ||
|
|
85
86
|
provider === "xai" ||
|
|
@@ -102,7 +103,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
102
103
|
provider === "mistral" ||
|
|
103
104
|
baseUrl.includes("mistral.ai") ||
|
|
104
105
|
baseUrl.includes("chutes.ai") ||
|
|
105
|
-
baseUrl.includes("fireworks.ai")
|
|
106
|
+
baseUrl.includes("fireworks.ai") ||
|
|
107
|
+
isDirectDeepseekApi;
|
|
106
108
|
const isGrok = provider === "xai" || baseUrl.includes("api.x.ai");
|
|
107
109
|
const isMistral = provider === "mistral" || baseUrl.includes("mistral.ai");
|
|
108
110
|
|
|
@@ -162,7 +164,13 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
162
164
|
xhigh: "default",
|
|
163
165
|
} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
|
|
164
166
|
: isDeepseekFamily && model.reasoning
|
|
165
|
-
? {
|
|
167
|
+
? ({
|
|
168
|
+
minimal: "high",
|
|
169
|
+
low: "high",
|
|
170
|
+
medium: "high",
|
|
171
|
+
high: "high",
|
|
172
|
+
xhigh: "max",
|
|
173
|
+
} satisfies Partial<Record<OpenAIReasoningEffort, string>>)
|
|
166
174
|
: {};
|
|
167
175
|
|
|
168
176
|
return {
|
|
@@ -173,8 +181,8 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
173
181
|
reasoningEffortMap,
|
|
174
182
|
supportsUsageInStreaming: !isCerebras,
|
|
175
183
|
disableReasoningOnForcedToolChoice: isKimiModel || isAnthropicModel,
|
|
176
|
-
disableReasoningOnToolChoice: isDeepseekFamily && Boolean(model.reasoning),
|
|
177
|
-
supportsToolChoice:
|
|
184
|
+
disableReasoningOnToolChoice: isDeepseekFamily && Boolean(model.reasoning) && !isOpenRouter,
|
|
185
|
+
supportsToolChoice: !isDirectDeepseekReasoning,
|
|
178
186
|
maxTokensField: useMaxTokens ? "max_tokens" : "max_completion_tokens",
|
|
179
187
|
requiresToolResultName: isMistral,
|
|
180
188
|
requiresAssistantAfterToolResult: false,
|
|
@@ -204,11 +212,11 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
204
212
|
// DeepSeek V4 rejects synthetic reasoning_content placeholders (".") on tool-call turns.
|
|
205
213
|
// Kimi and OpenRouter accept them when actual reasoning is unavailable.
|
|
206
214
|
allowsSyntheticReasoningContentForToolCalls: !isDeepseekFamily || !model.reasoning,
|
|
207
|
-
requiresAssistantContentForToolCalls: isKimiModel,
|
|
215
|
+
requiresAssistantContentForToolCalls: isKimiModel || isDirectDeepseekReasoning,
|
|
208
216
|
openRouterRouting: undefined,
|
|
209
217
|
vercelGatewayRouting: undefined,
|
|
210
218
|
supportsStrictMode: detectStrictModeSupport(provider, baseUrl),
|
|
211
|
-
extraBody: undefined,
|
|
219
|
+
extraBody: isDirectDeepseekReasoning ? { thinking: { type: "enabled" } } : undefined,
|
|
212
220
|
toolStrictMode: isCerebras ? "all_strict" : "mixed",
|
|
213
221
|
};
|
|
214
222
|
}
|
|
@@ -235,7 +243,7 @@ export function resolveOpenAICompat(
|
|
|
235
243
|
supportsMultipleSystemMessages:
|
|
236
244
|
model.compat.supportsMultipleSystemMessages ?? detected.supportsMultipleSystemMessages,
|
|
237
245
|
supportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,
|
|
238
|
-
reasoningEffortMap: model.compat.reasoningEffortMap ??
|
|
246
|
+
reasoningEffortMap: { ...detected.reasoningEffortMap, ...(model.compat.reasoningEffortMap ?? {}) },
|
|
239
247
|
supportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,
|
|
240
248
|
supportsToolChoice: model.compat.supportsToolChoice ?? detected.supportsToolChoice,
|
|
241
249
|
maxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,
|
|
@@ -259,7 +267,7 @@ export function resolveOpenAICompat(
|
|
|
259
267
|
openRouterRouting: model.compat.openRouterRouting ?? detected.openRouterRouting,
|
|
260
268
|
vercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,
|
|
261
269
|
supportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,
|
|
262
|
-
extraBody: model.compat.extraBody,
|
|
270
|
+
extraBody: model.compat.extraBody ?? detected.extraBody,
|
|
263
271
|
toolStrictMode: model.compat.toolStrictMode ?? detected.toolStrictMode,
|
|
264
272
|
};
|
|
265
273
|
}
|
|
@@ -22,6 +22,7 @@ import {
|
|
|
22
22
|
type Model,
|
|
23
23
|
type OpenAICompat,
|
|
24
24
|
type ProviderSessionState,
|
|
25
|
+
resolveServiceTier,
|
|
25
26
|
type ServiceTier,
|
|
26
27
|
type StopReason,
|
|
27
28
|
type StreamFunction,
|
|
@@ -37,7 +38,7 @@ import {
|
|
|
37
38
|
import { normalizeSystemPrompts } from "../utils";
|
|
38
39
|
import { createAbortSourceTracker } from "../utils/abort";
|
|
39
40
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
40
|
-
import { toFireworksWireModelId } from "../utils/fireworks-model-id";
|
|
41
|
+
import { toFirepassWireModelId, toFireworksWireModelId } from "../utils/fireworks-model-id";
|
|
41
42
|
import {
|
|
42
43
|
type CapturedHttpErrorResponse,
|
|
43
44
|
finalizeErrorMessage,
|
|
@@ -486,7 +487,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
486
487
|
}
|
|
487
488
|
stream.push({ type: "start", partial: output });
|
|
488
489
|
|
|
489
|
-
const parseMiniMaxThinkTags = model.provider === "minimax-code";
|
|
490
|
+
const parseMiniMaxThinkTags = model.provider === "minimax-code" || model.provider === "minimax-code-cn";
|
|
490
491
|
// Some OpenAI-compatible DeepSeek hosts (including NVIDIA NIM and DeepSeek's
|
|
491
492
|
// native API) leak chat-template tool-call markers in `delta.content` even
|
|
492
493
|
// though tool calls are also surfaced structurally. Strip the leaked markers
|
|
@@ -1037,13 +1038,23 @@ function buildParams(
|
|
|
1037
1038
|
maybeAddOpenRouterAnthropicCacheControl(model, messages);
|
|
1038
1039
|
const supportsReasoningParams = model.provider !== "github-copilot";
|
|
1039
1040
|
|
|
1040
|
-
// Kimi (including via OpenRouter
|
|
1041
|
-
//
|
|
1041
|
+
// Kimi (including via OpenRouter and Fireworks router-form IDs such as
|
|
1042
|
+
// `accounts/fireworks/routers/kimi-*`) calculates TPM rate limits based on
|
|
1043
|
+
// max_tokens, not actual output. The official Kimi K2 model guidance
|
|
1044
|
+
// (https://docs.fireworks.ai/models/kimi-k2) also requires `max_tokens` for
|
|
1045
|
+
// every call since the family can otherwise emit very long reasoning traces
|
|
1046
|
+
// before the final answer. Always send max_tokens — match the same
|
|
1047
|
+
// Kimi-family regex used by the compat detector.
|
|
1042
1048
|
// Note: Direct kimi-code provider is handled by the dedicated Kimi provider in kimi.ts.
|
|
1043
|
-
const isKimi = model.id.includes("moonshotai/kimi");
|
|
1049
|
+
const isKimi = model.id.includes("moonshotai/kimi") || /(^|\/)kimi[-.]/i.test(model.id);
|
|
1044
1050
|
const effectiveMaxTokens = options?.maxTokens ?? (isKimi ? model.maxTokens : undefined);
|
|
1045
1051
|
|
|
1046
|
-
const requestModelId =
|
|
1052
|
+
const requestModelId =
|
|
1053
|
+
model.provider === "fireworks"
|
|
1054
|
+
? toFireworksWireModelId(model.id)
|
|
1055
|
+
: model.provider === "firepass"
|
|
1056
|
+
? toFirepassWireModelId(model.id)
|
|
1057
|
+
: model.id;
|
|
1047
1058
|
const params: OpenAICompletionsParams = {
|
|
1048
1059
|
model: requestModelId,
|
|
1049
1060
|
messages,
|
|
@@ -1093,7 +1104,10 @@ function buildParams(
|
|
|
1093
1104
|
params.frequency_penalty = options.frequencyPenalty;
|
|
1094
1105
|
}
|
|
1095
1106
|
if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
|
|
1096
|
-
|
|
1107
|
+
const resolved = resolveServiceTier(options?.serviceTier, model.provider);
|
|
1108
|
+
if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
|
|
1109
|
+
params.service_tier = resolved;
|
|
1110
|
+
}
|
|
1097
1111
|
}
|
|
1098
1112
|
|
|
1099
1113
|
if (context.tools) {
|
|
@@ -17,6 +17,7 @@ import {
|
|
|
17
17
|
type AssistantMessage,
|
|
18
18
|
type ImageContent,
|
|
19
19
|
type Model,
|
|
20
|
+
resolveServiceTier,
|
|
20
21
|
type ServiceTier,
|
|
21
22
|
type StopReason,
|
|
22
23
|
type StreamOptions,
|
|
@@ -651,7 +652,10 @@ export function applyCommonResponsesSamplingParams<P extends CommonResponsesPara
|
|
|
651
652
|
if (options?.presencePenalty !== undefined) params.presence_penalty = options.presencePenalty;
|
|
652
653
|
if (options?.repetitionPenalty !== undefined) params.repetition_penalty = options.repetitionPenalty;
|
|
653
654
|
if (shouldSendServiceTier(options?.serviceTier, provider)) {
|
|
654
|
-
|
|
655
|
+
const resolved = resolveServiceTier(options?.serviceTier, provider);
|
|
656
|
+
if (resolved === "flex" || resolved === "scale" || resolved === "priority") {
|
|
657
|
+
params.service_tier = resolved;
|
|
658
|
+
}
|
|
655
659
|
}
|
|
656
660
|
}
|
|
657
661
|
|
package/src/stream.ts
CHANGED
|
@@ -83,6 +83,7 @@ const serviceProviderMap: Record<string, KeyResolver> = {
|
|
|
83
83
|
cerebras: "CEREBRAS_API_KEY",
|
|
84
84
|
xai: "XAI_API_KEY",
|
|
85
85
|
fireworks: "FIREWORKS_API_KEY",
|
|
86
|
+
firepass: "FIREPASS_API_KEY",
|
|
86
87
|
openrouter: "OPENROUTER_API_KEY",
|
|
87
88
|
kilo: "KILO_API_KEY",
|
|
88
89
|
"vercel-ai-gateway": "AI_GATEWAY_API_KEY",
|
|
@@ -580,6 +581,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
580
581
|
thinkingEnabled: false,
|
|
581
582
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
582
583
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
584
|
+
serviceTier: options?.serviceTier,
|
|
583
585
|
});
|
|
584
586
|
}
|
|
585
587
|
|
|
@@ -590,6 +592,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
590
592
|
thinkingEnabled: false,
|
|
591
593
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
592
594
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
595
|
+
serviceTier: options?.serviceTier,
|
|
593
596
|
});
|
|
594
597
|
}
|
|
595
598
|
|
|
@@ -603,6 +606,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
603
606
|
effort,
|
|
604
607
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
605
608
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
609
|
+
serviceTier: options?.serviceTier,
|
|
606
610
|
});
|
|
607
611
|
}
|
|
608
612
|
|
|
@@ -613,6 +617,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
613
617
|
thinkingBudgetTokens: thinkingBudget,
|
|
614
618
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
615
619
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
620
|
+
serviceTier: options?.serviceTier,
|
|
616
621
|
});
|
|
617
622
|
}
|
|
618
623
|
|
|
@@ -631,6 +636,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
631
636
|
thinkingEnabled: false,
|
|
632
637
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
633
638
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
639
|
+
serviceTier: options?.serviceTier,
|
|
634
640
|
});
|
|
635
641
|
} else {
|
|
636
642
|
return castApi<"anthropic-messages">({
|
|
@@ -640,6 +646,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
640
646
|
thinkingBudgetTokens: thinkingBudget,
|
|
641
647
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
642
648
|
thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
|
|
649
|
+
serviceTier: options?.serviceTier,
|
|
643
650
|
});
|
|
644
651
|
}
|
|
645
652
|
}
|
package/src/types.ts
CHANGED
|
@@ -110,6 +110,7 @@ export type KnownProvider =
|
|
|
110
110
|
| "minimax-code-cn"
|
|
111
111
|
| "github-copilot"
|
|
112
112
|
| "fireworks"
|
|
113
|
+
| "firepass"
|
|
113
114
|
| "gitlab-duo"
|
|
114
115
|
| "cursor"
|
|
115
116
|
| "deepseek"
|
|
@@ -162,29 +163,78 @@ export type ToolChoice =
|
|
|
162
163
|
// Base options all providers share
|
|
163
164
|
export type CacheRetention = "none" | "short" | "long";
|
|
164
165
|
|
|
165
|
-
/**
|
|
166
|
-
|
|
166
|
+
/**
|
|
167
|
+
* Service tier hint for processing priority / cost control.
|
|
168
|
+
*
|
|
169
|
+
* The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
|
|
170
|
+
* `"priority"`) are passed through to providers that understand them
|
|
171
|
+
* (OpenAI's `service_tier` field directly; Anthropic translates
|
|
172
|
+
* `"priority"` into `speed: "fast"` on supported Opus models).
|
|
173
|
+
*
|
|
174
|
+
* The scoped values target a specific provider family and behave as the
|
|
175
|
+
* unscoped value on the matching provider, or `undefined` everywhere else.
|
|
176
|
+
* They let users opt into priority on one family without paying premium
|
|
177
|
+
* costs on the other when switching models mid-session.
|
|
178
|
+
*
|
|
179
|
+
* - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
|
|
180
|
+
* - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
|
|
181
|
+
*/
|
|
182
|
+
export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
|
|
167
183
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
184
|
+
/** Resolved tier — one of the values that providers actually consume on the wire. */
|
|
185
|
+
export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Resolves a possibly scoped `ServiceTier` to the effective tier for the
|
|
189
|
+
* given provider. Scoped values match their target family and otherwise
|
|
190
|
+
* collapse to `undefined`; unscoped values pass through unchanged.
|
|
191
|
+
*/
|
|
192
|
+
export function resolveServiceTier(
|
|
193
|
+
serviceTier: ServiceTier | null | undefined,
|
|
194
|
+
provider: Provider | undefined,
|
|
195
|
+
): ResolvedServiceTier | undefined {
|
|
196
|
+
if (!serviceTier) return undefined;
|
|
197
|
+
switch (serviceTier) {
|
|
198
|
+
case "openai-only":
|
|
199
|
+
return provider === "openai" || provider === "openai-codex" ? "priority" : undefined;
|
|
200
|
+
case "claude-only":
|
|
201
|
+
return provider === "anthropic" ? "priority" : undefined;
|
|
202
|
+
default:
|
|
203
|
+
return serviceTier;
|
|
174
204
|
}
|
|
175
|
-
return serviceTier === "flex" || serviceTier === "scale" || serviceTier === "priority";
|
|
176
205
|
}
|
|
177
206
|
|
|
178
207
|
/**
|
|
179
|
-
*
|
|
180
|
-
*
|
|
181
|
-
*
|
|
208
|
+
* True when the (possibly scoped) tier should be sent as OpenAI's
|
|
209
|
+
* `service_tier` request field for the given provider. Non-OpenAI
|
|
210
|
+
* providers, unsupported tiers (`"auto"`, `"default"`), and scope
|
|
211
|
+
* mismatches all return false.
|
|
212
|
+
*/
|
|
213
|
+
export function shouldSendServiceTier(
|
|
214
|
+
serviceTier: ServiceTier | null | undefined,
|
|
215
|
+
provider: Provider | undefined,
|
|
216
|
+
): boolean {
|
|
217
|
+
if (provider !== "openai" && provider !== "openai-codex") return false;
|
|
218
|
+
const resolved = resolveServiceTier(serviceTier, provider);
|
|
219
|
+
return resolved === "flex" || resolved === "scale" || resolved === "priority";
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
/**
|
|
223
|
+
* Premium-request weight contributed by sending priority to a provider
|
|
224
|
+
* that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
|
|
225
|
+
* so the "premium requests" stat aggregates priority traffic across the
|
|
226
|
+
* OpenAI family and Anthropic fast-mode realizations.
|
|
182
227
|
*
|
|
183
|
-
* Returns 1 per priority request, 0 otherwise.
|
|
184
|
-
* `scale`) and providers that ignore `service_tier` always return 0.
|
|
228
|
+
* Returns 1 per resolved priority request, 0 otherwise.
|
|
185
229
|
*/
|
|
186
|
-
export function getPriorityPremiumRequests(
|
|
187
|
-
|
|
230
|
+
export function getPriorityPremiumRequests(
|
|
231
|
+
serviceTier: ServiceTier | null | undefined,
|
|
232
|
+
provider: Provider | undefined,
|
|
233
|
+
): number {
|
|
234
|
+
if (resolveServiceTier(serviceTier, provider) !== "priority") return 0;
|
|
235
|
+
// Only providers that realize `priority` on the wire bill the user.
|
|
236
|
+
// Everywhere else, the field is silently dropped and nothing is charged.
|
|
237
|
+
return provider === "openai" || provider === "openai-codex" || provider === "anthropic" ? 1 : 0;
|
|
188
238
|
}
|
|
189
239
|
|
|
190
240
|
export interface ProviderSessionState {
|
|
@@ -502,6 +552,14 @@ export interface AssistantMessage {
|
|
|
502
552
|
errorMessage?: string;
|
|
503
553
|
/** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
|
|
504
554
|
errorStatus?: number;
|
|
555
|
+
/**
|
|
556
|
+
* Stable identifiers for request features the provider silently dropped
|
|
557
|
+
* during this turn (e.g. `"priority"`). Set when a server-side rejection
|
|
558
|
+
* triggered an in-provider fallback retry that succeeded without the
|
|
559
|
+
* feature. Callers can use this to sync user-facing toggles back to the
|
|
560
|
+
* server's actual state.
|
|
561
|
+
*/
|
|
562
|
+
disabledFeatures?: string[];
|
|
505
563
|
/** Provider-specific opaque payload used to reconstruct transport-native history. */
|
|
506
564
|
providerPayload?: ProviderPayload;
|
|
507
565
|
timestamp: number; // Unix timestamp in milliseconds
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
const FIREWORKS_WIRE_PREFIX = "accounts/fireworks/models/";
|
|
2
|
+
const FIREPASS_WIRE_PREFIX = "accounts/fireworks/routers/";
|
|
2
3
|
const VERSION_SEPARATOR_PATTERN = /(?<=\d)p(?=\d)/g;
|
|
3
4
|
const VERSION_DOT_PATTERN = /(?<=\d)\.(?=\d)/g;
|
|
4
5
|
|
|
@@ -11,3 +12,19 @@ export function toFireworksWireModelId(modelId: string): string {
|
|
|
11
12
|
const stripped = modelId.startsWith(FIREWORKS_WIRE_PREFIX) ? modelId.slice(FIREWORKS_WIRE_PREFIX.length) : modelId;
|
|
12
13
|
return `${FIREWORKS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
|
|
13
14
|
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Fire Pass exposes its Kimi K2.6 Turbo subscription through a dedicated router
|
|
18
|
+
* endpoint at `accounts/fireworks/routers/<id>` rather than the `models/` namespace.
|
|
19
|
+
* We keep a friendly public id (e.g. `kimi-k2.6-turbo`) in the catalog and translate
|
|
20
|
+
* to the wire form (`accounts/fireworks/routers/kimi-k2p6-turbo`) at request time.
|
|
21
|
+
*/
|
|
22
|
+
export function toFirepassPublicModelId(modelId: string): string {
|
|
23
|
+
const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
|
|
24
|
+
return stripped.replace(VERSION_SEPARATOR_PATTERN, ".");
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function toFirepassWireModelId(modelId: string): string {
|
|
28
|
+
const stripped = modelId.startsWith(FIREPASS_WIRE_PREFIX) ? modelId.slice(FIREPASS_WIRE_PREFIX.length) : modelId;
|
|
29
|
+
return `${FIREPASS_WIRE_PREFIX}${stripped.replace(VERSION_DOT_PATTERN, "p")}`;
|
|
30
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fire Pass login flow.
|
|
3
|
+
*
|
|
4
|
+
* Fire Pass is a Fireworks subscription product whose dedicated `fpk_…` API
|
|
5
|
+
* keys are scoped to the `accounts/fireworks/routers/kimi-k2p6-turbo` router
|
|
6
|
+
* (Kimi K2.6 Turbo). The key does NOT authorize `/v1/models`, so validation
|
|
7
|
+
* pings the chat completions endpoint with the router id directly.
|
|
8
|
+
* See https://docs.fireworks.ai/firepass.
|
|
9
|
+
*/
|
|
10
|
+
import { createApiKeyLogin } from "./api-key-login";
|
|
11
|
+
|
|
12
|
+
export const loginFirepass = createApiKeyLogin({
|
|
13
|
+
providerLabel: "Fire Pass",
|
|
14
|
+
authUrl: "https://app.fireworks.ai/settings/users/api-keys",
|
|
15
|
+
instructions: "Create a dedicated Fire Pass API key in the Fireworks dashboard",
|
|
16
|
+
promptMessage: "Paste your Fire Pass API key",
|
|
17
|
+
placeholder: "fpk_...",
|
|
18
|
+
validation: {
|
|
19
|
+
kind: "chat-completions",
|
|
20
|
+
provider: "Fire Pass",
|
|
21
|
+
baseUrl: "https://api.fireworks.ai/inference/v1",
|
|
22
|
+
model: "accounts/fireworks/routers/kimi-k2p6-turbo",
|
|
23
|
+
},
|
|
24
|
+
});
|
package/src/utils/oauth/index.ts
CHANGED
|
@@ -55,6 +55,11 @@ const builtInOAuthProviders: OAuthProviderInfo[] = [
|
|
|
55
55
|
name: "Fireworks",
|
|
56
56
|
available: true,
|
|
57
57
|
},
|
|
58
|
+
{
|
|
59
|
+
id: "firepass",
|
|
60
|
+
name: "Fire Pass (Fireworks Kimi K2.6 Turbo subscription)",
|
|
61
|
+
available: true,
|
|
62
|
+
},
|
|
58
63
|
{
|
|
59
64
|
id: "github-copilot",
|
|
60
65
|
name: "GitHub Copilot",
|
|
@@ -301,6 +306,7 @@ export async function refreshOAuthToken(
|
|
|
301
306
|
case "opencode-go":
|
|
302
307
|
case "cerebras":
|
|
303
308
|
case "fireworks":
|
|
309
|
+
case "firepass":
|
|
304
310
|
case "nvidia":
|
|
305
311
|
case "nanogpt":
|
|
306
312
|
case "synthetic":
|
|
@@ -363,10 +369,14 @@ export async function getOAuthApiKey(
|
|
|
363
369
|
}
|
|
364
370
|
|
|
365
371
|
if (provider === "perplexity") {
|
|
372
|
+
// Perplexity JWTs usually omit `exp` (server-side sessions). Trust the JWT
|
|
373
|
+
// claim when present; otherwise treat the credential as non-expiring rather
|
|
374
|
+
// than honoring a stale stored `expires` (older logins wrote loginTime+1h).
|
|
375
|
+
const NEVER_EXPIRES = 8.64e15;
|
|
366
376
|
const normalizedExpires =
|
|
367
377
|
creds.expires > 0 && creds.expires < 10_000_000_000 ? creds.expires * 1000 : creds.expires;
|
|
368
378
|
const jwtExpiry = getPerplexityJwtExpiryMs(creds.access);
|
|
369
|
-
const expires = jwtExpiry
|
|
379
|
+
const expires = jwtExpiry ?? Math.max(normalizedExpires, NEVER_EXPIRES);
|
|
370
380
|
if (expires !== creds.expires) {
|
|
371
381
|
creds = { ...creds, expires };
|
|
372
382
|
}
|
|
@@ -24,20 +24,26 @@ const APP_USER_AGENT = "Perplexity/641 CFNetwork/1568 Darwin/25.2.0";
|
|
|
24
24
|
// JWT helpers
|
|
25
25
|
// ---------------------------------------------------------------------------
|
|
26
26
|
|
|
27
|
-
/**
|
|
27
|
+
/**
|
|
28
|
+
* Extract expiry from a JWT. Perplexity tokens generally lack an `exp` claim
|
|
29
|
+
* (their sessions are server-side and effectively non-expiring from the client's
|
|
30
|
+
* point of view), so we return a far-future sentinel when no `exp` is present.
|
|
31
|
+
* When `exp` IS present, subtract a 5-minute safety margin.
|
|
32
|
+
*/
|
|
33
|
+
const NEVER_EXPIRES = 8.64e15; // max safe Date value
|
|
28
34
|
function getJwtExpiry(token: string): number {
|
|
29
35
|
try {
|
|
30
36
|
const parts = token.split(".");
|
|
31
|
-
if (parts.length !== 3) return
|
|
37
|
+
if (parts.length !== 3) return NEVER_EXPIRES;
|
|
32
38
|
const payload = parts[1] ?? "";
|
|
33
39
|
const decoded = JSON.parse(atob(payload.replace(/-/g, "+").replace(/_/g, "/")));
|
|
34
|
-
if (
|
|
40
|
+
if (typeof decoded?.exp === "number" && Number.isFinite(decoded.exp)) {
|
|
35
41
|
return decoded.exp * 1000 - 5 * 60_000;
|
|
36
42
|
}
|
|
37
43
|
} catch {
|
|
38
44
|
// Ignore decode errors
|
|
39
45
|
}
|
|
40
|
-
return
|
|
46
|
+
return NEVER_EXPIRES;
|
|
41
47
|
}
|
|
42
48
|
|
|
43
49
|
/** Build OAuthCredentials from a Perplexity JWT string. */
|
package/src/utils/oauth/types.ts
CHANGED
|
@@ -243,8 +243,17 @@ function rewriteZodNode(node: JsonObject, seen: WeakSet<object>): unknown {
|
|
|
243
243
|
case "pipe":
|
|
244
244
|
case "transform": {
|
|
245
245
|
const inner = walk(unwrapInnerSchema(def), seen);
|
|
246
|
-
if (kind === "nullable" && isJsonObject(inner)
|
|
247
|
-
|
|
246
|
+
if (kind === "nullable" && isJsonObject(inner)) {
|
|
247
|
+
if (typeof inner.type === "string") {
|
|
248
|
+
return { ...inner, type: [inner.type, "null"] };
|
|
249
|
+
}
|
|
250
|
+
if (Array.isArray(inner.type)) {
|
|
251
|
+
return (inner.type as string[]).includes("null")
|
|
252
|
+
? inner
|
|
253
|
+
: { ...inner, type: [...(inner.type as string[]), "null"] };
|
|
254
|
+
}
|
|
255
|
+
// anyOf / allOf / $ref shapes — no scalar `type` field
|
|
256
|
+
return { anyOf: [inner, { type: "null" }] };
|
|
248
257
|
}
|
|
249
258
|
return inner;
|
|
250
259
|
}
|