@oh-my-pi/pi-ai 15.5.4 → 15.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +53 -0
- package/dist/types/auth-storage.d.ts +12 -0
- package/dist/types/model-manager.d.ts +2 -0
- package/dist/types/model-thinking.d.ts +15 -0
- package/dist/types/provider-models/descriptors.d.ts +2 -0
- package/dist/types/provider-models/openai-compat.d.ts +48 -0
- package/dist/types/providers/openai-completions.d.ts +19 -0
- package/dist/types/providers/openai-responses-shared.d.ts +10 -1
- package/dist/types/providers/openai-responses.d.ts +34 -0
- package/dist/types/providers/xai-responses.d.ts +23 -0
- package/dist/types/types.d.ts +11 -1
- package/dist/types/utils/oauth/__tests__/xai-oauth.test.d.ts +1 -0
- package/dist/types/utils/oauth/openrouter.d.ts +1 -0
- package/dist/types/utils/oauth/types.d.ts +1 -1
- package/dist/types/utils/oauth/xai-oauth.d.ts +60 -0
- package/package.json +2 -2
- package/src/auth-storage.ts +33 -0
- package/src/model-manager.ts +31 -14
- package/src/model-thinking.ts +32 -0
- package/src/models.json +127 -0
- package/src/provider-models/descriptors.ts +15 -3
- package/src/provider-models/openai-compat.ts +292 -41
- package/src/providers/anthropic.ts +35 -5
- package/src/providers/openai-codex-responses.ts +283 -8
- package/src/providers/openai-completions.ts +40 -9
- package/src/providers/openai-responses-shared.ts +38 -9
- package/src/providers/openai-responses.ts +62 -8
- package/src/providers/pi-native-server.ts +1 -0
- package/src/providers/xai-responses.ts +82 -0
- package/src/stream.ts +17 -1
- package/src/types.ts +11 -0
- package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
- package/src/utils/oauth/index.ts +16 -0
- package/src/utils/oauth/openrouter.ts +20 -0
- package/src/utils/oauth/synthetic.ts +2 -3
- package/src/utils/oauth/types.ts +2 -0
- package/src/utils/oauth/xai-oauth.ts +342 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,59 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.5.7] - 2026-05-27
|
|
6
|
+
### Added
|
|
7
|
+
- `SimpleStreamOptions.openrouterVariant` (`"nitro"`, `"floor"`, `"online"`, `"exacto"`, …) — when set, appends `:<variant>` to OpenRouter model IDs at request time, leaving ids that already carry an explicit `:suffix` untouched. Plumbed through `openai-completions` and the pi-native gateway forwarder.
|
|
8
|
+
|
|
9
|
+
- xAI Grok OAuth (SuperGrok Subscription) provider in `/login`. Loopback PKCE flow on `127.0.0.1:56121`; the token unlocks Grok-4.x chat. Ported from NousResearch/hermes-agent (MIT).
|
|
10
|
+
- OpenRouter provider in `/login`. API-key paste flow validated against `https://openrouter.ai/api/v1/auth/key` (the `/models` endpoint is public and cannot validate auth). The pasted key is stored under the existing `openrouter` provider id used by `OPENROUTER_API_KEY`.
|
|
11
|
+
- `XAI_OAUTH_TOKEN` environment variable accepted as a headless fallback for the xAI Grok OAuth provider.
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
- `OpenAIResponsesOptions` gains four optional, provider-agnostic fields that adapter wrappers can use to compose provider-specific behavior on top of the generic transport: `includeEncryptedReasoning` (gates `include: ["reasoning.encrypted_content"]`; default `true`, preserves current behavior), `filterReasoningHistory` (strips replayed `type: "reasoning"` items from conversation history; default `false`), `headers` (merged onto the client's default headers), and `extraBody` (merged into the request payload).
|
|
16
|
+
- The existing `XAI_API_KEY` path is unchanged — it continues to use the OpenAI-completions transport.
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
|
|
20
|
+
- Fixed OpenRouter DeepSeek V4 tool-call follow-up requests replaying normalized `reasoning` as-is instead of DeepSeek's required `reasoning_content`, which caused HTTP 400 errors in thinking mode. ([#1445](https://github.com/can1357/oh-my-pi/issues/1445))
|
|
21
|
+
|
|
22
|
+
## [15.5.6] - 2026-05-27
|
|
23
|
+
### Added
|
|
24
|
+
|
|
25
|
+
- Added `PI_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS` to control how long an idle Codex WebSocket stays eligible for reuse, with `0` disabling the check
|
|
26
|
+
|
|
27
|
+
### Fixed
|
|
28
|
+
|
|
29
|
+
- Fixed reused Codex WebSocket connections that had gone silent without activity to be dropped and replaced with a fresh handshake after the idle-reuse threshold, preventing stalled next requests
|
|
30
|
+
- Fixed stale response frames left in the websocket queue from a completed turn so subsequent requests no longer process terminal frames from the previous response
|
|
31
|
+
- Fixed websocket dead-socket detection to fail a stale connection when no inbound traffic or pong is observed after a ping timeout, improving recovery on runtimes that do not emit pong events
|
|
32
|
+
|
|
33
|
+
## [15.5.5] - 2026-05-27
|
|
34
|
+
|
|
35
|
+
### Added
|
|
36
|
+
|
|
37
|
+
- Added `PI_CODEX_WEBSOCKET_PING_INTERVAL_MS` to configure the interval for Codex WebSocket protocol ping heartbeats
|
|
38
|
+
- Added `PI_CODEX_WEBSOCKET_PONG_TIMEOUT_MS` to configure the Codex WebSocket pong timeout used to detect unresponsive connections
|
|
39
|
+
- Added `PI_CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY` to configure the maximum buffered Codex WebSocket inbound queue size before transport fallback
|
|
40
|
+
|
|
41
|
+
### Changed
|
|
42
|
+
|
|
43
|
+
- Improved Codex WebSocket timeout diagnostics to include last event type and time since last progress event
|
|
44
|
+
- Enhanced Codex WebSocket error classification to recognize ping, pong, send, and queue-overflow failures as retryable
|
|
45
|
+
|
|
46
|
+
### Fixed
|
|
47
|
+
|
|
48
|
+
- Fixed Codex WebSocket send failures by wrapping socket.send() in try-catch and surfacing errors as retryable transport errors
|
|
49
|
+
- Fixed Codex WebSocket inbound queue overflow by adding capacity bounds and triggering fallback to SSE when exceeded
|
|
50
|
+
- Fixed Codex WebSocket pong timeout detection by tracking pong events and failing the connection when no pong is received within the configured timeout
|
|
51
|
+
- Fixed Anthropic streaming to suppress hallucinated meta-prompt thinking blocks (the recent "I don't see any current rewritten thinking..." regression). When the marker phrase `rewritten thinking` appears in a streamed thinking summary the block is collapsed to a plain `Thinking...` placeholder and its signature is dropped so subsequent turns can't re-anchor on the garbled chain.
|
|
52
|
+
- Fixed Codex WebSocket silent stalls by adding protocol pings, inbound queue bounding, clearer idle-timeout diagnostics, and SDK retry clamping for first-event timeouts.
|
|
53
|
+
|
|
54
|
+
### Fixed
|
|
55
|
+
|
|
56
|
+
- Fixed Synthetic model discovery to treat the provider `/models` response as authoritative so deprecated bundled IDs are pruned from the runtime cache, and changed Synthetic login validation to avoid probing a specific model ([#1417](https://github.com/can1357/oh-my-pi/issues/1417)).
|
|
57
|
+
|
|
5
58
|
## [15.5.0] - 2026-05-26
|
|
6
59
|
### Added
|
|
7
60
|
|
|
@@ -416,6 +416,18 @@ export declare class AuthStorage {
|
|
|
416
416
|
* Unlike getApiKey(), this doesn't refresh OAuth tokens.
|
|
417
417
|
*/
|
|
418
418
|
hasAuth(provider: string): boolean;
|
|
419
|
+
/**
|
|
420
|
+
* True iff a dedicated, non-env credential source is configured for this
|
|
421
|
+
* provider — i.e. anything in the cascade EXCEPT `getEnvApiKey(provider)`.
|
|
422
|
+
*
|
|
423
|
+
* Mirrors `hasAuth` minus the env-fallback leg. Useful for callers that
|
|
424
|
+
* need to distinguish "the user explicitly configured this provider"
|
|
425
|
+
* from "an env var happens to alias this provider via the cross-provider
|
|
426
|
+
* fallback map" (see e.g. `xai-oauth → XAI_OAUTH_TOKEN || XAI_API_KEY` in
|
|
427
|
+
* `stream.ts`). Without that distinction, an `XAI_API_KEY`-only setup
|
|
428
|
+
* silently satisfies xai-oauth and routes around `providers.xai.baseUrl`.
|
|
429
|
+
*/
|
|
430
|
+
hasNonEnvCredential(provider: string): boolean;
|
|
419
431
|
/**
|
|
420
432
|
* Check if OAuth credentials are configured for a provider.
|
|
421
433
|
*/
|
|
@@ -24,6 +24,8 @@ export interface ModelManagerOptions<TApi extends Api = Api, TModelsDevPayload =
|
|
|
24
24
|
cacheDbPath?: string;
|
|
25
25
|
/** Maximum cache age in milliseconds before considered stale. Default: 24h. */
|
|
26
26
|
cacheTtlMs?: number;
|
|
27
|
+
/** When true, a successful dynamic fetch is the complete provider catalog and prunes static-only models. */
|
|
28
|
+
dynamicModelsAuthoritative?: boolean;
|
|
27
29
|
/** Optional dynamic endpoint fetcher. */
|
|
28
30
|
fetchDynamicModels?: () => Promise<readonly Model<TApi>[] | null>;
|
|
29
31
|
/** Optional models.dev fallback hook. */
|
|
@@ -42,6 +42,21 @@ export declare function applyGeneratedModelPolicies(models: ApiModel<Api>[]): vo
|
|
|
42
42
|
* - `gpt-5.5` (270K input) promotes to `gpt-5.4` (1M input).
|
|
43
43
|
*/
|
|
44
44
|
export declare function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void;
|
|
45
|
+
/**
|
|
46
|
+
* True when the model reasons natively but rejects the wire `reasoning.effort`
|
|
47
|
+
* param (compat.supportsReasoningEffort: false on openai-responses*). Callers
|
|
48
|
+
* are expected to omit the effort field; the wire-side omitReasoningEffort
|
|
49
|
+
* gate (providers/xai-responses.ts:78) is the actual strip, and this
|
|
50
|
+
* predicate is the upstream check that prevents a redundant
|
|
51
|
+
* requireSupportedEffort throw from defeating that gate.
|
|
52
|
+
*
|
|
53
|
+
* Scoped to openai-responses* because that's the only API surface where
|
|
54
|
+
* `compat.supportsReasoningEffort: false` is meaningful today. The
|
|
55
|
+
* `in`-narrowed access is necessary because Model.compat is
|
|
56
|
+
* `AnthropicCompat | OpenAICompat` and the api gate doesn't narrow the
|
|
57
|
+
* union for TS.
|
|
58
|
+
*/
|
|
59
|
+
export declare function modelOmitsReasoningEffort<TApi extends Api>(model: ApiModel<TApi>): boolean;
|
|
45
60
|
/**
|
|
46
61
|
* Returns the supported thinking efforts declared on the model metadata.
|
|
47
62
|
*
|
|
@@ -28,6 +28,8 @@ export interface ProviderDescriptor {
|
|
|
28
28
|
defaultModel: string;
|
|
29
29
|
/** When true, the runtime creates a model manager even without a valid API key (e.g. ollama). */
|
|
30
30
|
allowUnauthenticated?: boolean;
|
|
31
|
+
/** When true, successful runtime discovery replaces bundled provider models instead of merging fallback-only IDs. */
|
|
32
|
+
dynamicModelsAuthoritative?: boolean;
|
|
31
33
|
/** Catalog discovery configuration. Only providers with this field participate in generate-models.ts. */
|
|
32
34
|
catalogDiscovery?: CatalogDiscoveryConfig;
|
|
33
35
|
}
|
|
@@ -53,6 +53,53 @@ export interface XaiModelManagerConfig {
|
|
|
53
53
|
baseUrl?: string;
|
|
54
54
|
}
|
|
55
55
|
export declare function xaiModelManagerOptions(config?: XaiModelManagerConfig): ModelManagerOptions<"openai-completions">;
|
|
56
|
+
export interface XaiOAuthModelManagerConfig {
|
|
57
|
+
apiKey?: string;
|
|
58
|
+
baseUrl?: string;
|
|
59
|
+
}
|
|
60
|
+
interface XAICuratedModel {
|
|
61
|
+
id: string;
|
|
62
|
+
contextWindow: number;
|
|
63
|
+
name?: string;
|
|
64
|
+
/** Whether the model reasons natively. Defaults to true for Grok-4.x family. */
|
|
65
|
+
reasoning?: boolean;
|
|
66
|
+
/**
|
|
67
|
+
* Whether xAI accepts the `reasoning.effort` wire param for this model.
|
|
68
|
+
* Default true. When false: picker hides the effort dial (via
|
|
69
|
+
* getSupportedEfforts in model-thinking.ts) AND wire-side already omits
|
|
70
|
+
* the param via GROK_EFFORT_CAPABLE_PREFIXES in providers/xai-responses.ts.
|
|
71
|
+
* Must agree with that allowlist; two truths kept in sync by curated-catalog
|
|
72
|
+
* author convention until a follow-up Op: compress unifies them.
|
|
73
|
+
*/
|
|
74
|
+
supportsReasoningEffort?: boolean;
|
|
75
|
+
/**
|
|
76
|
+
* Input modalities this model accepts. Defaults to `["text"]` when absent.
|
|
77
|
+
* Vision-capable Grok models MUST list `"image"` here so the curated layer
|
|
78
|
+
* overrides `fetchOpenAICompatibleModels`' default of `["text"]` (which
|
|
79
|
+
* otherwise strips image capability on every online refresh).
|
|
80
|
+
*/
|
|
81
|
+
input?: ("text" | "image")[];
|
|
82
|
+
}
|
|
83
|
+
export declare const XAI_OAUTH_CURATED_MODELS: readonly XAICuratedModel[];
|
|
84
|
+
/**
|
|
85
|
+
* Render `XAI_OAUTH_CURATED_MODELS` as full `Model<"openai-responses">` entries.
|
|
86
|
+
*
|
|
87
|
+
* Single source of truth for the curated to Model fan-in, consumed by both
|
|
88
|
+
* - {@link xaiOAuthModelManagerOptions} (runtime static seed handed to the model
|
|
89
|
+
* manager so the picker is populated on a fresh login), and
|
|
90
|
+
* - `packages/ai/scripts/generate-models.ts` (bundles the same entries into
|
|
91
|
+
* `models.json`, so the synchronous `ModelRegistry.#loadModels()` boot path
|
|
92
|
+
* sees `xai-oauth` without waiting for a refresh — fixes the boot-time
|
|
93
|
+
* default-model reset when `modelRoles.default = "xai-oauth/<id>"`).
|
|
94
|
+
*
|
|
95
|
+
* `reasoning` defaults to `true` for the Grok-4.x family; the explicit
|
|
96
|
+
* `grok-4.20-0309-non-reasoning` entry opts out via `XAICuratedModel.reasoning`.
|
|
97
|
+
* `maxTokens` uses `UNK_MAX_TOKENS` so id-keyed overlays from a successful
|
|
98
|
+
* dynamic fetch merge cleanly. Mirrors
|
|
99
|
+
* `hermes-agent/hermes_cli/models.py:_XAI_STATIC_FALLBACK`.
|
|
100
|
+
*/
|
|
101
|
+
export declare function buildXaiOAuthStaticSeed(baseUrl?: string): Model<"openai-responses">[];
|
|
102
|
+
export declare function xaiOAuthModelManagerOptions(config?: XaiOAuthModelManagerConfig): ModelManagerOptions<"openai-responses">;
|
|
56
103
|
export interface DeepSeekModelManagerConfig {
|
|
57
104
|
apiKey?: string;
|
|
58
105
|
baseUrl?: string;
|
|
@@ -240,3 +287,4 @@ export interface ModelsDevProviderDescriptor {
|
|
|
240
287
|
export declare function mapModelsDevToModels(data: Record<string, unknown>, descriptors: readonly ModelsDevProviderDescriptor[]): Model<Api>[];
|
|
241
288
|
/** All provider descriptors for models.dev data mapping in generate-models.ts. */
|
|
242
289
|
export declare const MODELS_DEV_PROVIDER_DESCRIPTORS: readonly ModelsDevProviderDescriptor[];
|
|
290
|
+
export {};
|
|
@@ -21,7 +21,26 @@ export interface OpenAICompletionsOptions extends StreamOptions {
|
|
|
21
21
|
/** Force-disable reasoning where supported, or request the lowest effort on generic effort endpoints. */
|
|
22
22
|
disableReasoning?: boolean;
|
|
23
23
|
serviceTier?: ServiceTier;
|
|
24
|
+
/**
|
|
25
|
+
* Routing-variant suffix appended to OpenRouter model IDs when none is
|
|
26
|
+
* already present (`anthropic/claude-haiku-latest` → `…:nitro`). Common
|
|
27
|
+
* values: `"nitro"`, `"floor"`, `"online"`, `"exacto"`. Ignored when the
|
|
28
|
+
* resolved `model.id` already contains a colon-suffix after the last
|
|
29
|
+
* provider segment (explicit `:nitro` in the selector or a catalog entry
|
|
30
|
+
* with the variant baked in).
|
|
31
|
+
*/
|
|
32
|
+
openrouterVariant?: string;
|
|
24
33
|
}
|
|
34
|
+
/**
|
|
35
|
+
* Append an OpenRouter routing-variant suffix (e.g. `:nitro`, `:floor`, `:online`, `:exacto`)
|
|
36
|
+
* to a model id when no explicit variant is already present. A variant is considered
|
|
37
|
+
* "already present" when `modelId` contains a colon after the last `/` separator —
|
|
38
|
+
* which covers both user-typed selectors (`anthropic/claude-haiku:nitro`) and catalog
|
|
39
|
+
* entries that bake the variant in (`deepseek/deepseek-v3.1-terminus:exacto`).
|
|
40
|
+
*
|
|
41
|
+
* Exported for unit testing.
|
|
42
|
+
*/
|
|
43
|
+
export declare function applyOpenRouterRoutingVariant(modelId: string, variant: string | undefined): string;
|
|
25
44
|
export declare const streamOpenAICompletions: StreamFunction<"openai-completions">;
|
|
26
45
|
export declare function parseChunkUsage(rawUsage: object, model: Model<"openai-completions">, premiumRequests: number | undefined): AssistantMessage["usage"];
|
|
27
46
|
export declare function convertMessages(model: Model<"openai-completions">, context: Context, compat: ResolvedOpenAICompat): ChatCompletionMessageParam[];
|
|
@@ -74,8 +74,17 @@ type ReasoningOptions = {
|
|
|
74
74
|
* Apply reasoning-related Responses parameters: enable encrypted reasoning content for replay,
|
|
75
75
|
* set effort/summary when requested, and otherwise inject the GPT-5 "Juice: 0" no-reasoning hack.
|
|
76
76
|
* Mutates `params` and may push a developer message into `messages`.
|
|
77
|
+
*
|
|
78
|
+
* @param omitReasoningEffort - When `true`, suppresses `params.reasoning.effort` from the wire
|
|
79
|
+
* body. Set by `xai-responses.ts` via {@link OpenAIResponsesOptions.omitReasoningEffort} for
|
|
80
|
+
* xAI Grok models that return HTTP 400 on any `reasoning.effort` value (e.g. grok-build,
|
|
81
|
+
* grok-4.20-0309-reasoning). When `true` and `options.reasoning` is set but
|
|
82
|
+
* `options.reasoningSummary` is absent, `params.reasoning` is intentionally omitted from the
|
|
83
|
+
* wire body entirely — these models reason natively at their own internal default effort level
|
|
84
|
+
* without needing explicit activation. Callers that pass `options.reasoning` for such models
|
|
85
|
+
* should expect this documented downgrade: the model will reason, but at its default effort.
|
|
77
86
|
*/
|
|
78
|
-
export declare function applyResponsesReasoningParams<P extends OpenAI.Responses.ResponseCreateParamsStreaming>(params: P, model: Model<Api>, options: ReasoningOptions | undefined, messages: ResponseInput, mapEffort?: (effort: string) => string): void;
|
|
87
|
+
export declare function applyResponsesReasoningParams<P extends OpenAI.Responses.ResponseCreateParamsStreaming>(params: P, model: Model<Api>, options: ReasoningOptions | undefined, messages: ResponseInput, mapEffort?: (effort: string) => string, includeEncryptedReasoning?: boolean, omitReasoningEffort?: boolean): void;
|
|
79
88
|
/** Populate `output.usage` from a Responses-API `response.usage` payload. Does not invoke `calculateCost`. */
|
|
80
89
|
export declare function populateResponsesUsageFromResponse(output: AssistantMessage, usage: {
|
|
81
90
|
input_tokens?: number | null;
|
|
@@ -12,11 +12,45 @@ export interface OpenAIResponsesOptions extends StreamOptions {
|
|
|
12
12
|
* Azure OpenAI and GitHub Copilot Responses paths require tool results to match prior tool calls.
|
|
13
13
|
*/
|
|
14
14
|
strictResponsesPairing?: boolean;
|
|
15
|
+
/**
|
|
16
|
+
* Pass `include: ["reasoning.encrypted_content"]` on requests when the
|
|
17
|
+
* model supports reasoning. Default: true (preserves current behavior).
|
|
18
|
+
* Set to false when the upstream Responses endpoint rejects replayed
|
|
19
|
+
* encrypted reasoning (e.g., xAI Grok under SuperGrok OAuth).
|
|
20
|
+
*/
|
|
21
|
+
includeEncryptedReasoning?: boolean;
|
|
22
|
+
/**
|
|
23
|
+
* Strip `type: "reasoning"` items from replayed conversation history
|
|
24
|
+
* before they hit the wire. Default: false (preserves current behavior).
|
|
25
|
+
* Set to true when the upstream rejects replayed reasoning wrappers.
|
|
26
|
+
*/
|
|
27
|
+
filterReasoningHistory?: boolean;
|
|
28
|
+
/**
|
|
29
|
+
* Suppress the `reasoning.effort` wire param when set, even if
|
|
30
|
+
* `options.reasoning` is requested. Default: false. xAI Grok models
|
|
31
|
+
* outside the effort-capable allowlist 400 with "Model X does not
|
|
32
|
+
* support parameter reasoningEffort" — the xAI Responses adapter sets
|
|
33
|
+
* this when the target model is not in GROK_EFFORT_CAPABLE_PREFIXES.
|
|
34
|
+
*/
|
|
35
|
+
omitReasoningEffort?: boolean;
|
|
36
|
+
/**
|
|
37
|
+
* Extra request headers merged onto the underlying client's
|
|
38
|
+
* defaultHeaders. Used by adapter wrappers to inject provider-specific
|
|
39
|
+
* routing or cache hints.
|
|
40
|
+
*/
|
|
41
|
+
headers?: Record<string, string>;
|
|
42
|
+
/**
|
|
43
|
+
* Extra body fields merged into the Responses request payload. Used by
|
|
44
|
+
* adapter wrappers to inject provider-specific body keys (e.g.,
|
|
45
|
+
* prompt_cache_key for prompt-cache routing).
|
|
46
|
+
*/
|
|
47
|
+
extraBody?: Record<string, unknown>;
|
|
15
48
|
}
|
|
16
49
|
/**
|
|
17
50
|
* Generate function for OpenAI Responses API
|
|
18
51
|
*/
|
|
19
52
|
export declare const streamOpenAIResponses: StreamFunction<"openai-responses">;
|
|
53
|
+
export declare function getOpenAIResponsesCacheSessionId(options: Pick<OpenAIResponsesOptions, "cacheRetention" | "sessionId" | "promptCacheKey"> | undefined): string | undefined;
|
|
20
54
|
export declare function supportsDeveloperRole(modelOrBaseUrl: Pick<Model, "provider" | "baseUrl"> | string): boolean;
|
|
21
55
|
/**
|
|
22
56
|
* Whether this model should get the OpenAI custom-tool grammar variant
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { StreamFunction } from "../types";
|
|
2
|
+
/**
|
|
3
|
+
* xAI Grok Responses adapter (SuperGrok OAuth path).
|
|
4
|
+
*
|
|
5
|
+
* Three xAI-specific behaviors vs the generic OpenAI Responses adapter:
|
|
6
|
+
*
|
|
7
|
+
* 1. `x-grok-conv-id` header + body `prompt_cache_key` route prompt-cache
|
|
8
|
+
* hits on xAI's edge. Hermes uses both (agent/transports/codex.py:182-193).
|
|
9
|
+
* The header is undocumented by xAI; `previous_response_id` is the
|
|
10
|
+
* documented alternative — switch if xAI deprecates the header.
|
|
11
|
+
* 2. includeEncryptedReasoning=false — xAI's /v1/responses rejects replayed
|
|
12
|
+
* `encrypted_content` blobs minted under SuperGrok OAuth.
|
|
13
|
+
* 3. filterReasoningHistory=true — strip `type: "reasoning"` items from
|
|
14
|
+
* replayed conversation history; the blob inside is non-replayable under
|
|
15
|
+
* OAuth and the wrapper item 404s without it (store=false; server cannot
|
|
16
|
+
* resolve by id).
|
|
17
|
+
*
|
|
18
|
+
* Everything else is the generic OpenAI Responses transport. The xAI bearer
|
|
19
|
+
* token arrives in `options.apiKey` via AuthStorage.getApiKey() upstream, and
|
|
20
|
+
* the xAI base URL (`https://api.x.ai/v1`) arrives via `model.baseUrl` from
|
|
21
|
+
* the provider registry — not routed through this wrapper.
|
|
22
|
+
*/
|
|
23
|
+
export declare const streamXAIResponses: StreamFunction<"openai-responses">;
|
package/dist/types/types.d.ts
CHANGED
|
@@ -48,7 +48,7 @@ export interface ThinkingConfig {
|
|
|
48
48
|
/** Provider-specific transport used to encode the selected effort. */
|
|
49
49
|
mode: ThinkingControlMode;
|
|
50
50
|
}
|
|
51
|
-
export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
|
|
51
|
+
export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "xai-oauth" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
|
|
52
52
|
export type Provider = KnownProvider | string;
|
|
53
53
|
import type { Effort } from "./model-thinking";
|
|
54
54
|
/** Token budgets for each thinking level (token-based providers only) */
|
|
@@ -294,6 +294,16 @@ export interface SimpleStreamOptions extends StreamOptions {
|
|
|
294
294
|
syntheticApiFormat?: "openai" | "anthropic";
|
|
295
295
|
/** Hint that websocket transport should be preferred when supported by the provider implementation. */
|
|
296
296
|
preferWebsockets?: boolean;
|
|
297
|
+
/**
|
|
298
|
+
* OpenRouter routing-variant suffix automatically appended to model IDs when
|
|
299
|
+
* the request targets OpenRouter (`model.provider === "openrouter"`). Common
|
|
300
|
+
* values: `"nitro"` (throughput), `"floor"` (cheapest), `"online"` (web
|
|
301
|
+
* search plugin), `"exacto"` (cherry-picked high-quality providers, only
|
|
302
|
+
* defined for some models). Ignored when the resolved model id already
|
|
303
|
+
* contains a `:<variant>` suffix (e.g. the user typed `:nitro` explicitly
|
|
304
|
+
* or the catalog entry already names the variant).
|
|
305
|
+
*/
|
|
306
|
+
openrouterVariant?: string;
|
|
297
307
|
}
|
|
298
308
|
export type StreamFunction<TApi extends Api> = (model: Model<TApi>, context: Context, options: OptionsForApi<TApi>) => AssistantMessageEventStream;
|
|
299
309
|
export interface TextSignatureV1 {
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const loginOpenRouter: (options: import("./types").OAuthController) => Promise<string>;
|
|
@@ -7,7 +7,7 @@ export type OAuthCredentials = {
|
|
|
7
7
|
email?: string;
|
|
8
8
|
accountId?: string;
|
|
9
9
|
};
|
|
10
|
-
export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
|
|
10
|
+
export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "openrouter" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xai-oauth" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
|
|
11
11
|
export type OAuthProviderId = OAuthProvider | (string & {});
|
|
12
12
|
export type OAuthPrompt = {
|
|
13
13
|
message: string;
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* xAI Grok (SuperGrok Subscription) OAuth flow.
|
|
3
|
+
*
|
|
4
|
+
* Loopback PKCE flow on `127.0.0.1:56121/callback`. One token unlocks Grok-4.x
|
|
5
|
+
* chat, Grok Imagine image generation, and Grok Voice TTS via subsequent
|
|
6
|
+
* commits. Endpoint discovery is hardened against MITM via
|
|
7
|
+
* {@link validateXAIEndpoint}: any non-HTTPS or non-`x.ai`/`*.x.ai` host is
|
|
8
|
+
* rejected on every call site, not just the first.
|
|
9
|
+
*/
|
|
10
|
+
import { OAuthCallbackFlow } from "./callback-server";
|
|
11
|
+
import type { OAuthController, OAuthCredentials } from "./types";
|
|
12
|
+
/**
|
|
13
|
+
* Validate an xAI OIDC discovery endpoint against scheme + host.
|
|
14
|
+
*
|
|
15
|
+
* Hermes `_xai_validate_oauth_endpoint` L2997-3035. The discovery response is
|
|
16
|
+
* long-lived and cached in {@link OAuthCredentials}; a single MITM during
|
|
17
|
+
* initial login could substitute a malicious `token_endpoint` that would then
|
|
18
|
+
* receive every future refresh_token. Rejecting non-HTTPS or non-`x.ai` /
|
|
19
|
+
* `*.x.ai` hosts pins the cached endpoint to the xAI auth origin.
|
|
20
|
+
*
|
|
21
|
+
* @throws Error with message `Invalid xAI <field>: <url>` when the URL fails
|
|
22
|
+
* either scheme or host validation.
|
|
23
|
+
*/
|
|
24
|
+
export declare function validateXAIEndpoint(url: string, field: string): string;
|
|
25
|
+
/**
|
|
26
|
+
* Check whether a JWT access token is at or past its `exp` claim (with an
|
|
27
|
+
* optional refresh-skew margin).
|
|
28
|
+
*
|
|
29
|
+
* Hermes `_xai_access_token_is_expiring` L2979-2994. Returns `false` for any
|
|
30
|
+
* malformed input — this is a refresh-trigger check, not a validation, so
|
|
31
|
+
* non-JWTs ("no token in cache") must NOT trigger a spurious refresh.
|
|
32
|
+
*/
|
|
33
|
+
export declare function isXAIAccessTokenExpiring(jwt: string, skewSeconds?: number): boolean;
|
|
34
|
+
/**
|
|
35
|
+
* xAI Grok OAuth loopback flow (Hermes `_xai_oauth_loopback_login` L5315-5469).
|
|
36
|
+
*
|
|
37
|
+
* Uses a fixed redirect URI so the callback server fails fast instead of
|
|
38
|
+
* falling back to a random port that xAI's redirect_uri allowlist rejects.
|
|
39
|
+
*/
|
|
40
|
+
export declare class XAIOAuthFlow extends OAuthCallbackFlow {
|
|
41
|
+
#private;
|
|
42
|
+
constructor(ctrl: OAuthController);
|
|
43
|
+
generateAuthUrl(state: string, redirectUri: string): Promise<{
|
|
44
|
+
url: string;
|
|
45
|
+
instructions?: string;
|
|
46
|
+
}>;
|
|
47
|
+
exchangeToken(code: string, _state: string, redirectUri: string): Promise<OAuthCredentials>;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Login with xAI Grok OAuth (SuperGrok Subscription).
|
|
51
|
+
*/
|
|
52
|
+
export declare function loginXAIOAuth(ctrl: OAuthController): Promise<OAuthCredentials>;
|
|
53
|
+
/**
|
|
54
|
+
* Refresh an xAI OAuth access token using a stored refresh_token.
|
|
55
|
+
*
|
|
56
|
+
* Hermes `refresh_xai_oauth_pure` L3087-3160. Re-runs OIDC discovery and
|
|
57
|
+
* re-validates the cached `token_endpoint` on the refresh hot path so a
|
|
58
|
+
* cached-but-poisoned endpoint cannot silently leak a refresh_token.
|
|
59
|
+
*/
|
|
60
|
+
export declare function refreshXAIOAuthToken(refreshToken: string): Promise<OAuthCredentials>;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "15.5.
|
|
4
|
+
"version": "15.5.7",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
"dependencies": {
|
|
41
41
|
"@anthropic-ai/sdk": "^0.94.0",
|
|
42
42
|
"@bufbuild/protobuf": "^2.12.0",
|
|
43
|
-
"@oh-my-pi/pi-utils": "15.5.
|
|
43
|
+
"@oh-my-pi/pi-utils": "15.5.7",
|
|
44
44
|
"openai": "^6.36.0",
|
|
45
45
|
"partial-json": "^0.1.7",
|
|
46
46
|
"zod": "4.4.3"
|
package/src/auth-storage.ts
CHANGED
|
@@ -1280,6 +1280,25 @@ export class AuthStorage {
|
|
|
1280
1280
|
return false;
|
|
1281
1281
|
}
|
|
1282
1282
|
|
|
1283
|
+
/**
|
|
1284
|
+
* True iff a dedicated, non-env credential source is configured for this
|
|
1285
|
+
* provider — i.e. anything in the cascade EXCEPT `getEnvApiKey(provider)`.
|
|
1286
|
+
*
|
|
1287
|
+
* Mirrors `hasAuth` minus the env-fallback leg. Useful for callers that
|
|
1288
|
+
* need to distinguish "the user explicitly configured this provider"
|
|
1289
|
+
* from "an env var happens to alias this provider via the cross-provider
|
|
1290
|
+
* fallback map" (see e.g. `xai-oauth → XAI_OAUTH_TOKEN || XAI_API_KEY` in
|
|
1291
|
+
* `stream.ts`). Without that distinction, an `XAI_API_KEY`-only setup
|
|
1292
|
+
* silently satisfies xai-oauth and routes around `providers.xai.baseUrl`.
|
|
1293
|
+
*/
|
|
1294
|
+
hasNonEnvCredential(provider: string): boolean {
|
|
1295
|
+
if (this.#runtimeOverrides.has(provider)) return true;
|
|
1296
|
+
if (this.#configOverrides.has(provider)) return true;
|
|
1297
|
+
if (this.#getCredentialsForProvider(provider).length > 0) return true;
|
|
1298
|
+
if (this.#fallbackResolver?.(provider)) return true;
|
|
1299
|
+
return false;
|
|
1300
|
+
}
|
|
1301
|
+
|
|
1283
1302
|
/**
|
|
1284
1303
|
* Check if OAuth credentials are configured for a provider.
|
|
1285
1304
|
*/
|
|
@@ -1378,6 +1397,14 @@ export class AuthStorage {
|
|
|
1378
1397
|
});
|
|
1379
1398
|
break;
|
|
1380
1399
|
}
|
|
1400
|
+
case "xai-oauth": {
|
|
1401
|
+
const { loginXAIOAuth } = await import("./utils/oauth/xai-oauth");
|
|
1402
|
+
credentials = await loginXAIOAuth({
|
|
1403
|
+
...ctrl,
|
|
1404
|
+
onManualCodeInput: ctrl.onManualCodeInput ?? manualCodeInput,
|
|
1405
|
+
});
|
|
1406
|
+
break;
|
|
1407
|
+
}
|
|
1381
1408
|
case "alibaba-coding-plan": {
|
|
1382
1409
|
const { loginAlibabaCodingPlan } = await import("./utils/oauth/alibaba-coding-plan");
|
|
1383
1410
|
const apiKey = await loginAlibabaCodingPlan(ctrl);
|
|
@@ -1586,6 +1613,12 @@ export class AuthStorage {
|
|
|
1586
1613
|
await saveApiKeyCredential(apiKey);
|
|
1587
1614
|
return;
|
|
1588
1615
|
}
|
|
1616
|
+
case "openrouter": {
|
|
1617
|
+
const { loginOpenRouter } = await import("./utils/oauth/openrouter");
|
|
1618
|
+
const apiKey = await loginOpenRouter(ctrl);
|
|
1619
|
+
await saveApiKeyCredential(apiKey);
|
|
1620
|
+
return;
|
|
1621
|
+
}
|
|
1589
1622
|
case "together": {
|
|
1590
1623
|
const { loginTogether } = await import("./utils/oauth/together");
|
|
1591
1624
|
const apiKey = await loginTogether(ctrl);
|
package/src/model-manager.ts
CHANGED
|
@@ -34,6 +34,8 @@ export interface ModelManagerOptions<TApi extends Api = Api, TModelsDevPayload =
|
|
|
34
34
|
cacheDbPath?: string;
|
|
35
35
|
/** Maximum cache age in milliseconds before considered stale. Default: 24h. */
|
|
36
36
|
cacheTtlMs?: number;
|
|
37
|
+
/** When true, a successful dynamic fetch is the complete provider catalog and prunes static-only models. */
|
|
38
|
+
dynamicModelsAuthoritative?: boolean;
|
|
37
39
|
/** Optional dynamic endpoint fetcher. */
|
|
38
40
|
fetchDynamicModels?: () => Promise<readonly Model<TApi>[] | null>;
|
|
39
41
|
/** Optional models.dev fallback hook. */
|
|
@@ -110,30 +112,27 @@ export async function resolveProviderModels<TApi extends Api = Api, TModelsDevPa
|
|
|
110
112
|
options.staticModels ?? getBundledModels(options.providerId as GeneratedProvider),
|
|
111
113
|
);
|
|
112
114
|
const cache = readModelCache<TApi>(options.providerId, ttlMs, now, dbPath);
|
|
115
|
+
const dynamicModelsAuthoritative = options.dynamicModelsAuthoritative ?? false;
|
|
116
|
+
const staticFingerprint = fingerprintStatic(staticModels, dynamicModelsAuthoritative);
|
|
117
|
+
const cacheFingerprintMatches = cache?.staticFingerprint === staticFingerprint && staticFingerprint.length > 0;
|
|
118
|
+
const hasUsableFreshCache = (cache?.fresh ?? false) && (!dynamicModelsAuthoritative || cacheFingerprintMatches);
|
|
113
119
|
const dynamicFetcher = options.fetchDynamicModels;
|
|
114
120
|
const hasDynamicFetcher = typeof dynamicFetcher === "function";
|
|
115
|
-
const hasAuthoritativeCache = (cache?.authoritative ?? false) || !hasDynamicFetcher;
|
|
121
|
+
const hasAuthoritativeCache = ((cache?.authoritative ?? false) && hasUsableFreshCache) || !hasDynamicFetcher;
|
|
116
122
|
const cacheAgeMs = cache ? now() - cache.updatedAt : Number.POSITIVE_INFINITY;
|
|
117
123
|
const shouldFetchFromNetwork = shouldFetchRemoteSources(
|
|
118
124
|
strategy,
|
|
119
|
-
|
|
125
|
+
hasUsableFreshCache,
|
|
120
126
|
hasAuthoritativeCache,
|
|
121
127
|
cacheAgeMs,
|
|
122
128
|
);
|
|
123
|
-
const staticFingerprint = fingerprintStatic(staticModels);
|
|
124
129
|
|
|
125
130
|
// Cold-start fast path: when a fresh, authoritative cache exists, the network
|
|
126
131
|
// fetch is skipped, AND the static catalog slice is byte-identical to what
|
|
127
132
|
// was merged in last time, the cache row IS the authoritative merge result.
|
|
128
133
|
// Re-running `mergeDynamicModels(static, cache)` would just rebuild the same
|
|
129
134
|
// objects (~800ms in the steady-state cold-start profile for `omp -p hi`).
|
|
130
|
-
if (
|
|
131
|
-
!shouldFetchFromNetwork &&
|
|
132
|
-
cache?.fresh &&
|
|
133
|
-
hasAuthoritativeCache &&
|
|
134
|
-
cache.staticFingerprint === staticFingerprint &&
|
|
135
|
-
cache.staticFingerprint.length > 0
|
|
136
|
-
) {
|
|
135
|
+
if (!shouldFetchFromNetwork && cache?.fresh && hasAuthoritativeCache && cacheFingerprintMatches) {
|
|
137
136
|
return { models: passModelList<TApi>(cache.models), stale: false };
|
|
138
137
|
}
|
|
139
138
|
|
|
@@ -142,16 +141,21 @@ export async function resolveProviderModels<TApi extends Api = Api, TModelsDevPa
|
|
|
142
141
|
: [null, null];
|
|
143
142
|
const modelsDevModels = normalizeModelList<TApi>(fetchedModelsDevModels ?? []);
|
|
144
143
|
const shouldUseFreshCacheAsAuthoritative =
|
|
145
|
-
strategy === "online-if-uncached" &&
|
|
144
|
+
strategy === "online-if-uncached" && hasUsableFreshCache && hasAuthoritativeCache;
|
|
146
145
|
const dynamicFetchSucceeded = fetchedDynamicModels !== null;
|
|
147
146
|
const cacheModels = dynamicFetchSucceeded ? [] : normalizeModelList<TApi>(cache?.models ?? []);
|
|
148
147
|
const dynamicModels = fetchedDynamicModels ?? [];
|
|
149
148
|
const mergedWithCache = mergeDynamicModels(mergeModelSources(staticModels, modelsDevModels), cacheModels);
|
|
150
|
-
const
|
|
149
|
+
const mergedModels = mergeDynamicModels(mergedWithCache, dynamicModels);
|
|
150
|
+
const models =
|
|
151
|
+
dynamicModelsAuthoritative && dynamicFetchSucceeded ? retainModelIds(mergedModels, dynamicModels) : mergedModels;
|
|
151
152
|
const dynamicAuthoritative = !hasDynamicFetcher || dynamicFetchSucceeded || shouldUseFreshCacheAsAuthoritative;
|
|
152
153
|
if (shouldFetchFromNetwork) {
|
|
153
154
|
if (dynamicFetchSucceeded) {
|
|
154
|
-
const
|
|
155
|
+
const mergedSnapshot = mergeDynamicModels(mergeModelSources(staticModels, modelsDevModels), dynamicModels);
|
|
156
|
+
const snapshotModels = dynamicModelsAuthoritative
|
|
157
|
+
? retainModelIds(mergedSnapshot, dynamicModels)
|
|
158
|
+
: mergedSnapshot;
|
|
155
159
|
writeModelCache(options.providerId, now(), snapshotModels, true, staticFingerprint, dbPath);
|
|
156
160
|
} else {
|
|
157
161
|
// Dynamic fetch failed — update cache with a non-authoritative snapshot so
|
|
@@ -270,6 +274,15 @@ function mergeDynamicModels<TApi extends Api>(
|
|
|
270
274
|
return Array.from(merged.values());
|
|
271
275
|
}
|
|
272
276
|
|
|
277
|
+
function retainModelIds<TApi extends Api>(
|
|
278
|
+
models: readonly Model<TApi>[],
|
|
279
|
+
retainedModels: readonly Model<TApi>[],
|
|
280
|
+
): Model<TApi>[] {
|
|
281
|
+
if (retainedModels.length === 0 || models.length === 0) return [];
|
|
282
|
+
const retainedIds = new Set(retainedModels.map(model => model.id));
|
|
283
|
+
return models.filter(model => retainedIds.has(model.id));
|
|
284
|
+
}
|
|
285
|
+
|
|
273
286
|
/**
|
|
274
287
|
* Stable, low-collision fingerprint of a static catalog slice. Cached by
|
|
275
288
|
* reference so repeat calls in the same process (e.g. multiple cold-start
|
|
@@ -278,8 +291,12 @@ function mergeDynamicModels<TApi extends Api>(
|
|
|
278
291
|
*/
|
|
279
292
|
const kStaticFingerprint = Symbol("model-manager.staticFingerprint");
|
|
280
293
|
type ModelArrayWithFingerprint = readonly Model<Api>[] & { [kStaticFingerprint]?: string };
|
|
281
|
-
function fingerprintStatic<TApi extends Api>(
|
|
294
|
+
function fingerprintStatic<TApi extends Api>(
|
|
295
|
+
models: readonly Model<TApi>[],
|
|
296
|
+
dynamicModelsAuthoritative = false,
|
|
297
|
+
): string {
|
|
282
298
|
if (models.length === 0) return "empty";
|
|
299
|
+
if (dynamicModelsAuthoritative) return `authoritative:${fingerprintStatic(models)}`;
|
|
283
300
|
const tagged = models as ModelArrayWithFingerprint;
|
|
284
301
|
const cached = tagged[kStaticFingerprint];
|
|
285
302
|
if (cached !== undefined) return cached;
|
package/src/model-thinking.ts
CHANGED
|
@@ -198,6 +198,28 @@ export function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void {
|
|
|
198
198
|
}
|
|
199
199
|
}
|
|
200
200
|
|
|
201
|
+
/**
|
|
202
|
+
* True when the model reasons natively but rejects the wire `reasoning.effort`
|
|
203
|
+
* param (compat.supportsReasoningEffort: false on openai-responses*). Callers
|
|
204
|
+
* are expected to omit the effort field; the wire-side omitReasoningEffort
|
|
205
|
+
* gate (providers/xai-responses.ts:78) is the actual strip, and this
|
|
206
|
+
* predicate is the upstream check that prevents a redundant
|
|
207
|
+
* requireSupportedEffort throw from defeating that gate.
|
|
208
|
+
*
|
|
209
|
+
* Scoped to openai-responses* because that's the only API surface where
|
|
210
|
+
* `compat.supportsReasoningEffort: false` is meaningful today. The
|
|
211
|
+
* `in`-narrowed access is necessary because Model.compat is
|
|
212
|
+
* `AnthropicCompat | OpenAICompat` and the api gate doesn't narrow the
|
|
213
|
+
* union for TS.
|
|
214
|
+
*/
|
|
215
|
+
export function modelOmitsReasoningEffort<TApi extends Api>(model: ApiModel<TApi>): boolean {
|
|
216
|
+
if (model.api !== "openai-responses" && model.api !== "openai-codex-responses") {
|
|
217
|
+
return false;
|
|
218
|
+
}
|
|
219
|
+
const compat = model.compat;
|
|
220
|
+
return Boolean(compat && "supportsReasoningEffort" in compat && compat.supportsReasoningEffort === false);
|
|
221
|
+
}
|
|
222
|
+
|
|
201
223
|
/**
|
|
202
224
|
* Returns the supported thinking efforts declared on the model metadata.
|
|
203
225
|
*
|
|
@@ -211,6 +233,16 @@ export function getSupportedEfforts<TApi extends Api>(model: ApiModel<TApi>): re
|
|
|
211
233
|
if (!model.reasoning) {
|
|
212
234
|
return [];
|
|
213
235
|
}
|
|
236
|
+
// Models that reason natively but reject the `reasoning.effort` wire param
|
|
237
|
+
// (xAI Grok off the GROK_EFFORT_CAPABLE_PREFIXES allowlist in
|
|
238
|
+
// providers/xai-responses.ts: grok-build, grok-4.20-0309-reasoning) hide the
|
|
239
|
+
// picker's effort dial. Scoped to openai-responses* by
|
|
240
|
+
// `modelOmitsReasoningEffort` — openai-completions has its own
|
|
241
|
+
// supportsReasoningEffort consultation at inferFallbackEfforts L536 and
|
|
242
|
+
// changing that path's semantics is out-of-scope.
|
|
243
|
+
if (modelOmitsReasoningEffort(model)) {
|
|
244
|
+
return [];
|
|
245
|
+
}
|
|
214
246
|
if (!model.thinking) {
|
|
215
247
|
throw new Error(`Model ${model.provider}/${model.id} is missing thinking metadata`);
|
|
216
248
|
}
|