@oh-my-pi/pi-ai 15.5.4 → 15.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +53 -0
  2. package/dist/types/auth-storage.d.ts +12 -0
  3. package/dist/types/model-manager.d.ts +2 -0
  4. package/dist/types/model-thinking.d.ts +15 -0
  5. package/dist/types/provider-models/descriptors.d.ts +2 -0
  6. package/dist/types/provider-models/openai-compat.d.ts +48 -0
  7. package/dist/types/providers/openai-completions.d.ts +19 -0
  8. package/dist/types/providers/openai-responses-shared.d.ts +10 -1
  9. package/dist/types/providers/openai-responses.d.ts +34 -0
  10. package/dist/types/providers/xai-responses.d.ts +23 -0
  11. package/dist/types/types.d.ts +11 -1
  12. package/dist/types/utils/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  13. package/dist/types/utils/oauth/openrouter.d.ts +1 -0
  14. package/dist/types/utils/oauth/types.d.ts +1 -1
  15. package/dist/types/utils/oauth/xai-oauth.d.ts +60 -0
  16. package/package.json +2 -2
  17. package/src/auth-storage.ts +33 -0
  18. package/src/model-manager.ts +31 -14
  19. package/src/model-thinking.ts +32 -0
  20. package/src/models.json +127 -0
  21. package/src/provider-models/descriptors.ts +15 -3
  22. package/src/provider-models/openai-compat.ts +292 -41
  23. package/src/providers/anthropic.ts +35 -5
  24. package/src/providers/openai-codex-responses.ts +283 -8
  25. package/src/providers/openai-completions.ts +40 -9
  26. package/src/providers/openai-responses-shared.ts +38 -9
  27. package/src/providers/openai-responses.ts +62 -8
  28. package/src/providers/pi-native-server.ts +1 -0
  29. package/src/providers/xai-responses.ts +82 -0
  30. package/src/stream.ts +17 -1
  31. package/src/types.ts +11 -0
  32. package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
  33. package/src/utils/oauth/index.ts +16 -0
  34. package/src/utils/oauth/openrouter.ts +20 -0
  35. package/src/utils/oauth/synthetic.ts +2 -3
  36. package/src/utils/oauth/types.ts +2 -0
  37. package/src/utils/oauth/xai-oauth.ts +342 -0
package/CHANGELOG.md CHANGED
@@ -2,6 +2,59 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.5.7] - 2026-05-27
6
+ ### Added
7
+ - `SimpleStreamOptions.openrouterVariant` (`"nitro"`, `"floor"`, `"online"`, `"exacto"`, …) — when set, appends `:<variant>` to OpenRouter model IDs at request time, leaving ids that already carry an explicit `:suffix` untouched. Plumbed through `openai-completions` and the pi-native gateway forwarder.
8
+
9
+ - xAI Grok OAuth (SuperGrok Subscription) provider in `/login`. Loopback PKCE flow on `127.0.0.1:56121`; the token unlocks Grok-4.x chat. Ported from NousResearch/hermes-agent (MIT).
10
+ - OpenRouter provider in `/login`. API-key paste flow validated against `https://openrouter.ai/api/v1/auth/key` (the `/models` endpoint is public and cannot validate auth). The pasted key is stored under the existing `openrouter` provider id used by `OPENROUTER_API_KEY`.
11
+ - `XAI_OAUTH_TOKEN` environment variable accepted as a headless fallback for the xAI Grok OAuth provider.
12
+
13
+ ### Changed
14
+
15
+ - `OpenAIResponsesOptions` gains four optional, provider-agnostic fields that adapter wrappers can use to compose provider-specific behavior on top of the generic transport: `includeEncryptedReasoning` (gates `include: ["reasoning.encrypted_content"]`; default `true`, preserves current behavior), `filterReasoningHistory` (strips replayed `type: "reasoning"` items from conversation history; default `false`), `headers` (merged onto the client's default headers), and `extraBody` (merged into the request payload).
16
+ - The existing `XAI_API_KEY` path is unchanged — it continues to use the OpenAI-completions transport.
17
+
18
+ ### Fixed
19
+
20
+ - Fixed OpenRouter DeepSeek V4 tool-call follow-up requests replaying normalized `reasoning` as-is instead of DeepSeek's required `reasoning_content`, which caused HTTP 400 errors in thinking mode. ([#1445](https://github.com/can1357/oh-my-pi/issues/1445))
21
+
22
+ ## [15.5.6] - 2026-05-27
23
+ ### Added
24
+
25
+ - Added `PI_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS` to control how long an idle Codex WebSocket stays eligible for reuse, with `0` disabling the check
26
+
27
+ ### Fixed
28
+
29
+ - Fixed reused Codex WebSocket connections that had gone silent without activity to be dropped and replaced with a fresh handshake after the idle-reuse threshold, preventing stalled next requests
30
+ - Fixed stale response frames left in the websocket queue from a completed turn so subsequent requests no longer process terminal frames from the previous response
31
+ - Fixed websocket dead-socket detection to fail a stale connection when no inbound traffic or pong is observed after a ping timeout, improving recovery on runtimes that do not emit pong events
32
+
33
+ ## [15.5.5] - 2026-05-27
34
+
35
+ ### Added
36
+
37
+ - Added `PI_CODEX_WEBSOCKET_PING_INTERVAL_MS` to configure the interval for Codex WebSocket protocol ping heartbeats
38
+ - Added `PI_CODEX_WEBSOCKET_PONG_TIMEOUT_MS` to configure the Codex WebSocket pong timeout used to detect unresponsive connections
39
+ - Added `PI_CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY` to configure the maximum buffered Codex WebSocket inbound queue size before transport fallback
40
+
41
+ ### Changed
42
+
43
+ - Improved Codex WebSocket timeout diagnostics to include last event type and time since last progress event
44
+ - Enhanced Codex WebSocket error classification to recognize ping, pong, send, and queue-overflow failures as retryable
45
+
46
+ ### Fixed
47
+
48
+ - Fixed Codex WebSocket send failures by wrapping socket.send() in try-catch and surfacing errors as retryable transport errors
49
+ - Fixed Codex WebSocket inbound queue overflow by adding capacity bounds and triggering fallback to SSE when exceeded
50
+ - Fixed Codex WebSocket pong timeout detection by tracking pong events and failing the connection when no pong is received within the configured timeout
51
+ - Fixed Anthropic streaming to suppress hallucinated meta-prompt thinking blocks (the recent "I don't see any current rewritten thinking..." regression). When the marker phrase `rewritten thinking` appears in a streamed thinking summary the block is collapsed to a plain `Thinking...` placeholder and its signature is dropped so subsequent turns can't re-anchor on the garbled chain.
52
+ - Fixed Codex WebSocket silent stalls by adding protocol pings, inbound queue bounding, clearer idle-timeout diagnostics, and SDK retry clamping for first-event timeouts.
53
+
54
+ ### Fixed
55
+
56
+ - Fixed Synthetic model discovery to treat the provider `/models` response as authoritative so deprecated bundled IDs are pruned from the runtime cache, and changed Synthetic login validation to avoid probing a specific model ([#1417](https://github.com/can1357/oh-my-pi/issues/1417)).
57
+
5
58
  ## [15.5.0] - 2026-05-26
6
59
  ### Added
7
60
 
@@ -416,6 +416,18 @@ export declare class AuthStorage {
416
416
  * Unlike getApiKey(), this doesn't refresh OAuth tokens.
417
417
  */
418
418
  hasAuth(provider: string): boolean;
419
+ /**
420
+ * True iff a dedicated, non-env credential source is configured for this
421
+ * provider — i.e. anything in the cascade EXCEPT `getEnvApiKey(provider)`.
422
+ *
423
+ * Mirrors `hasAuth` minus the env-fallback leg. Useful for callers that
424
+ * need to distinguish "the user explicitly configured this provider"
425
+ * from "an env var happens to alias this provider via the cross-provider
426
+ * fallback map" (see e.g. `xai-oauth → XAI_OAUTH_TOKEN || XAI_API_KEY` in
427
+ * `stream.ts`). Without that distinction, an `XAI_API_KEY`-only setup
428
+ * silently satisfies xai-oauth and routes around `providers.xai.baseUrl`.
429
+ */
430
+ hasNonEnvCredential(provider: string): boolean;
419
431
  /**
420
432
  * Check if OAuth credentials are configured for a provider.
421
433
  */
@@ -24,6 +24,8 @@ export interface ModelManagerOptions<TApi extends Api = Api, TModelsDevPayload =
24
24
  cacheDbPath?: string;
25
25
  /** Maximum cache age in milliseconds before considered stale. Default: 24h. */
26
26
  cacheTtlMs?: number;
27
+ /** When true, a successful dynamic fetch is the complete provider catalog and prunes static-only models. */
28
+ dynamicModelsAuthoritative?: boolean;
27
29
  /** Optional dynamic endpoint fetcher. */
28
30
  fetchDynamicModels?: () => Promise<readonly Model<TApi>[] | null>;
29
31
  /** Optional models.dev fallback hook. */
@@ -42,6 +42,21 @@ export declare function applyGeneratedModelPolicies(models: ApiModel<Api>[]): vo
42
42
  * - `gpt-5.5` (270K input) promotes to `gpt-5.4` (1M input).
43
43
  */
44
44
  export declare function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void;
45
+ /**
46
+ * True when the model reasons natively but rejects the wire `reasoning.effort`
47
+ * param (compat.supportsReasoningEffort: false on openai-responses*). Callers
48
+ * are expected to omit the effort field; the wire-side omitReasoningEffort
49
+ * gate (providers/xai-responses.ts:78) is the actual strip, and this
50
+ * predicate is the upstream check that prevents a redundant
51
+ * requireSupportedEffort throw from defeating that gate.
52
+ *
53
+ * Scoped to openai-responses* because that's the only API surface where
54
+ * `compat.supportsReasoningEffort: false` is meaningful today. The
55
+ * `in`-narrowed access is necessary because Model.compat is
56
+ * `AnthropicCompat | OpenAICompat` and the api gate doesn't narrow the
57
+ * union for TS.
58
+ */
59
+ export declare function modelOmitsReasoningEffort<TApi extends Api>(model: ApiModel<TApi>): boolean;
45
60
  /**
46
61
  * Returns the supported thinking efforts declared on the model metadata.
47
62
  *
@@ -28,6 +28,8 @@ export interface ProviderDescriptor {
28
28
  defaultModel: string;
29
29
  /** When true, the runtime creates a model manager even without a valid API key (e.g. ollama). */
30
30
  allowUnauthenticated?: boolean;
31
+ /** When true, successful runtime discovery replaces bundled provider models instead of merging fallback-only IDs. */
32
+ dynamicModelsAuthoritative?: boolean;
31
33
  /** Catalog discovery configuration. Only providers with this field participate in generate-models.ts. */
32
34
  catalogDiscovery?: CatalogDiscoveryConfig;
33
35
  }
@@ -53,6 +53,53 @@ export interface XaiModelManagerConfig {
53
53
  baseUrl?: string;
54
54
  }
55
55
  export declare function xaiModelManagerOptions(config?: XaiModelManagerConfig): ModelManagerOptions<"openai-completions">;
56
+ export interface XaiOAuthModelManagerConfig {
57
+ apiKey?: string;
58
+ baseUrl?: string;
59
+ }
60
+ interface XAICuratedModel {
61
+ id: string;
62
+ contextWindow: number;
63
+ name?: string;
64
+ /** Whether the model reasons natively. Defaults to true for Grok-4.x family. */
65
+ reasoning?: boolean;
66
+ /**
67
+ * Whether xAI accepts the `reasoning.effort` wire param for this model.
68
+ * Default true. When false: picker hides the effort dial (via
69
+ * getSupportedEfforts in model-thinking.ts) AND wire-side already omits
70
+ * the param via GROK_EFFORT_CAPABLE_PREFIXES in providers/xai-responses.ts.
71
+ * Must agree with that allowlist; two truths kept in sync by curated-catalog
72
+ * author convention until a follow-up Op: compress unifies them.
73
+ */
74
+ supportsReasoningEffort?: boolean;
75
+ /**
76
+ * Input modalities this model accepts. Defaults to `["text"]` when absent.
77
+ * Vision-capable Grok models MUST list `"image"` here so the curated layer
78
+ * overrides `fetchOpenAICompatibleModels`' default of `["text"]` (which
79
+ * otherwise strips image capability on every online refresh).
80
+ */
81
+ input?: ("text" | "image")[];
82
+ }
83
+ export declare const XAI_OAUTH_CURATED_MODELS: readonly XAICuratedModel[];
84
+ /**
85
+ * Render `XAI_OAUTH_CURATED_MODELS` as full `Model<"openai-responses">` entries.
86
+ *
87
+ * Single source of truth for the curated to Model fan-in, consumed by both
88
+ * - {@link xaiOAuthModelManagerOptions} (runtime static seed handed to the model
89
+ * manager so the picker is populated on a fresh login), and
90
+ * - `packages/ai/scripts/generate-models.ts` (bundles the same entries into
91
+ * `models.json`, so the synchronous `ModelRegistry.#loadModels()` boot path
92
+ * sees `xai-oauth` without waiting for a refresh — fixes the boot-time
93
+ * default-model reset when `modelRoles.default = "xai-oauth/<id>"`).
94
+ *
95
+ * `reasoning` defaults to `true` for the Grok-4.x family; the explicit
96
+ * `grok-4.20-0309-non-reasoning` entry opts out via `XAICuratedModel.reasoning`.
97
+ * `maxTokens` uses `UNK_MAX_TOKENS` so id-keyed overlays from a successful
98
+ * dynamic fetch merge cleanly. Mirrors
99
+ * `hermes-agent/hermes_cli/models.py:_XAI_STATIC_FALLBACK`.
100
+ */
101
+ export declare function buildXaiOAuthStaticSeed(baseUrl?: string): Model<"openai-responses">[];
102
+ export declare function xaiOAuthModelManagerOptions(config?: XaiOAuthModelManagerConfig): ModelManagerOptions<"openai-responses">;
56
103
  export interface DeepSeekModelManagerConfig {
57
104
  apiKey?: string;
58
105
  baseUrl?: string;
@@ -240,3 +287,4 @@ export interface ModelsDevProviderDescriptor {
240
287
  export declare function mapModelsDevToModels(data: Record<string, unknown>, descriptors: readonly ModelsDevProviderDescriptor[]): Model<Api>[];
241
288
  /** All provider descriptors for models.dev data mapping in generate-models.ts. */
242
289
  export declare const MODELS_DEV_PROVIDER_DESCRIPTORS: readonly ModelsDevProviderDescriptor[];
290
+ export {};
@@ -21,7 +21,26 @@ export interface OpenAICompletionsOptions extends StreamOptions {
21
21
  /** Force-disable reasoning where supported, or request the lowest effort on generic effort endpoints. */
22
22
  disableReasoning?: boolean;
23
23
  serviceTier?: ServiceTier;
24
+ /**
25
+ * Routing-variant suffix appended to OpenRouter model IDs when none is
26
+ * already present (`anthropic/claude-haiku-latest` → `…:nitro`). Common
27
+ * values: `"nitro"`, `"floor"`, `"online"`, `"exacto"`. Ignored when the
28
+ * resolved `model.id` already contains a colon-suffix after the last
29
+ * provider segment (explicit `:nitro` in the selector or a catalog entry
30
+ * with the variant baked in).
31
+ */
32
+ openrouterVariant?: string;
24
33
  }
34
+ /**
35
+ * Append an OpenRouter routing-variant suffix (e.g. `:nitro`, `:floor`, `:online`, `:exacto`)
36
+ * to a model id when no explicit variant is already present. A variant is considered
37
+ * "already present" when `modelId` contains a colon after the last `/` separator —
38
+ * which covers both user-typed selectors (`anthropic/claude-haiku:nitro`) and catalog
39
+ * entries that bake the variant in (`deepseek/deepseek-v3.1-terminus:exacto`).
40
+ *
41
+ * Exported for unit testing.
42
+ */
43
+ export declare function applyOpenRouterRoutingVariant(modelId: string, variant: string | undefined): string;
25
44
  export declare const streamOpenAICompletions: StreamFunction<"openai-completions">;
26
45
  export declare function parseChunkUsage(rawUsage: object, model: Model<"openai-completions">, premiumRequests: number | undefined): AssistantMessage["usage"];
27
46
  export declare function convertMessages(model: Model<"openai-completions">, context: Context, compat: ResolvedOpenAICompat): ChatCompletionMessageParam[];
@@ -74,8 +74,17 @@ type ReasoningOptions = {
74
74
  * Apply reasoning-related Responses parameters: enable encrypted reasoning content for replay,
75
75
  * set effort/summary when requested, and otherwise inject the GPT-5 "Juice: 0" no-reasoning hack.
76
76
  * Mutates `params` and may push a developer message into `messages`.
77
+ *
78
+ * @param omitReasoningEffort - When `true`, suppresses `params.reasoning.effort` from the wire
79
+ * body. Set by `xai-responses.ts` via {@link OpenAIResponsesOptions.omitReasoningEffort} for
80
+ * xAI Grok models that return HTTP 400 on any `reasoning.effort` value (e.g. grok-build,
81
+ * grok-4.20-0309-reasoning). When `true` and `options.reasoning` is set but
82
+ * `options.reasoningSummary` is absent, `params.reasoning` is intentionally omitted from the
83
+ * wire body entirely — these models reason natively at their own internal default effort level
84
+ * without needing explicit activation. Callers that pass `options.reasoning` for such models
85
+ * should expect this documented downgrade: the model will reason, but at its default effort.
77
86
  */
78
- export declare function applyResponsesReasoningParams<P extends OpenAI.Responses.ResponseCreateParamsStreaming>(params: P, model: Model<Api>, options: ReasoningOptions | undefined, messages: ResponseInput, mapEffort?: (effort: string) => string): void;
87
+ export declare function applyResponsesReasoningParams<P extends OpenAI.Responses.ResponseCreateParamsStreaming>(params: P, model: Model<Api>, options: ReasoningOptions | undefined, messages: ResponseInput, mapEffort?: (effort: string) => string, includeEncryptedReasoning?: boolean, omitReasoningEffort?: boolean): void;
79
88
  /** Populate `output.usage` from a Responses-API `response.usage` payload. Does not invoke `calculateCost`. */
80
89
  export declare function populateResponsesUsageFromResponse(output: AssistantMessage, usage: {
81
90
  input_tokens?: number | null;
@@ -12,11 +12,45 @@ export interface OpenAIResponsesOptions extends StreamOptions {
12
12
  * Azure OpenAI and GitHub Copilot Responses paths require tool results to match prior tool calls.
13
13
  */
14
14
  strictResponsesPairing?: boolean;
15
+ /**
16
+ * Pass `include: ["reasoning.encrypted_content"]` on requests when the
17
+ * model supports reasoning. Default: true (preserves current behavior).
18
+ * Set to false when the upstream Responses endpoint rejects replayed
19
+ * encrypted reasoning (e.g., xAI Grok under SuperGrok OAuth).
20
+ */
21
+ includeEncryptedReasoning?: boolean;
22
+ /**
23
+ * Strip `type: "reasoning"` items from replayed conversation history
24
+ * before they hit the wire. Default: false (preserves current behavior).
25
+ * Set to true when the upstream rejects replayed reasoning wrappers.
26
+ */
27
+ filterReasoningHistory?: boolean;
28
+ /**
29
+ * Suppress the `reasoning.effort` wire param when set, even if
30
+ * `options.reasoning` is requested. Default: false. xAI Grok models
31
+ * outside the effort-capable allowlist 400 with "Model X does not
32
+ * support parameter reasoningEffort" — the xAI Responses adapter sets
33
+ * this when the target model is not in GROK_EFFORT_CAPABLE_PREFIXES.
34
+ */
35
+ omitReasoningEffort?: boolean;
36
+ /**
37
+ * Extra request headers merged onto the underlying client's
38
+ * defaultHeaders. Used by adapter wrappers to inject provider-specific
39
+ * routing or cache hints.
40
+ */
41
+ headers?: Record<string, string>;
42
+ /**
43
+ * Extra body fields merged into the Responses request payload. Used by
44
+ * adapter wrappers to inject provider-specific body keys (e.g.,
45
+ * prompt_cache_key for prompt-cache routing).
46
+ */
47
+ extraBody?: Record<string, unknown>;
15
48
  }
16
49
  /**
17
50
  * Generate function for OpenAI Responses API
18
51
  */
19
52
  export declare const streamOpenAIResponses: StreamFunction<"openai-responses">;
53
+ export declare function getOpenAIResponsesCacheSessionId(options: Pick<OpenAIResponsesOptions, "cacheRetention" | "sessionId" | "promptCacheKey"> | undefined): string | undefined;
20
54
  export declare function supportsDeveloperRole(modelOrBaseUrl: Pick<Model, "provider" | "baseUrl"> | string): boolean;
21
55
  /**
22
56
  * Whether this model should get the OpenAI custom-tool grammar variant
@@ -0,0 +1,23 @@
1
+ import type { StreamFunction } from "../types";
2
+ /**
3
+ * xAI Grok Responses adapter (SuperGrok OAuth path).
4
+ *
5
+ * Three xAI-specific behaviors vs the generic OpenAI Responses adapter:
6
+ *
7
+ * 1. `x-grok-conv-id` header + body `prompt_cache_key` route prompt-cache
8
+ * hits on xAI's edge. Hermes uses both (agent/transports/codex.py:182-193).
9
+ * The header is undocumented by xAI; `previous_response_id` is the
10
+ * documented alternative — switch if xAI deprecates the header.
11
+ * 2. includeEncryptedReasoning=false — xAI's /v1/responses rejects replayed
12
+ * `encrypted_content` blobs minted under SuperGrok OAuth.
13
+ * 3. filterReasoningHistory=true — strip `type: "reasoning"` items from
14
+ * replayed conversation history; the blob inside is non-replayable under
15
+ * OAuth and the wrapper item 404s without it (store=false; server cannot
16
+ * resolve by id).
17
+ *
18
+ * Everything else is the generic OpenAI Responses transport. The xAI bearer
19
+ * token arrives in `options.apiKey` via AuthStorage.getApiKey() upstream, and
20
+ * the xAI base URL (`https://api.x.ai/v1`) arrives via `model.baseUrl` from
21
+ * the provider registry — not routed through this wrapper.
22
+ */
23
+ export declare const streamXAIResponses: StreamFunction<"openai-responses">;
@@ -48,7 +48,7 @@ export interface ThinkingConfig {
48
48
  /** Provider-specific transport used to encode the selected effort. */
49
49
  mode: ThinkingControlMode;
50
50
  }
51
- export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
51
+ export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "xai-oauth" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
52
52
  export type Provider = KnownProvider | string;
53
53
  import type { Effort } from "./model-thinking";
54
54
  /** Token budgets for each thinking level (token-based providers only) */
@@ -294,6 +294,16 @@ export interface SimpleStreamOptions extends StreamOptions {
294
294
  syntheticApiFormat?: "openai" | "anthropic";
295
295
  /** Hint that websocket transport should be preferred when supported by the provider implementation. */
296
296
  preferWebsockets?: boolean;
297
+ /**
298
+ * OpenRouter routing-variant suffix automatically appended to model IDs when
299
+ * the request targets OpenRouter (`model.provider === "openrouter"`). Common
300
+ * values: `"nitro"` (throughput), `"floor"` (cheapest), `"online"` (web
301
+ * search plugin), `"exacto"` (cherry-picked high-quality providers, only
302
+ * defined for some models). Ignored when the resolved model id already
303
+ * contains a `:<variant>` suffix (e.g. the user typed `:nitro` explicitly
304
+ * or the catalog entry already names the variant).
305
+ */
306
+ openrouterVariant?: string;
297
307
  }
298
308
  export type StreamFunction<TApi extends Api> = (model: Model<TApi>, context: Context, options: OptionsForApi<TApi>) => AssistantMessageEventStream;
299
309
  export interface TextSignatureV1 {
@@ -0,0 +1 @@
1
+ export declare const loginOpenRouter: (options: import("./types").OAuthController) => Promise<string>;
@@ -7,7 +7,7 @@ export type OAuthCredentials = {
7
7
  email?: string;
8
8
  accountId?: string;
9
9
  };
10
- export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
10
+ export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "openrouter" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xai-oauth" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
11
11
  export type OAuthProviderId = OAuthProvider | (string & {});
12
12
  export type OAuthPrompt = {
13
13
  message: string;
@@ -0,0 +1,60 @@
1
+ /**
2
+ * xAI Grok (SuperGrok Subscription) OAuth flow.
3
+ *
4
+ * Loopback PKCE flow on `127.0.0.1:56121/callback`. One token unlocks Grok-4.x
5
+ * chat, Grok Imagine image generation, and Grok Voice TTS via subsequent
6
+ * commits. Endpoint discovery is hardened against MITM via
7
+ * {@link validateXAIEndpoint}: any non-HTTPS or non-`x.ai`/`*.x.ai` host is
8
+ * rejected on every call site, not just the first.
9
+ */
10
+ import { OAuthCallbackFlow } from "./callback-server";
11
+ import type { OAuthController, OAuthCredentials } from "./types";
12
+ /**
13
+ * Validate an xAI OIDC discovery endpoint against scheme + host.
14
+ *
15
+ * Hermes `_xai_validate_oauth_endpoint` L2997-3035. The discovery response is
16
+ * long-lived and cached in {@link OAuthCredentials}; a single MITM during
17
+ * initial login could substitute a malicious `token_endpoint` that would then
18
+ * receive every future refresh_token. Rejecting non-HTTPS or non-`x.ai` /
19
+ * `*.x.ai` hosts pins the cached endpoint to the xAI auth origin.
20
+ *
21
+ * @throws Error with message `Invalid xAI <field>: <url>` when the URL fails
22
+ * either scheme or host validation.
23
+ */
24
+ export declare function validateXAIEndpoint(url: string, field: string): string;
25
+ /**
26
+ * Check whether a JWT access token is at or past its `exp` claim (with an
27
+ * optional refresh-skew margin).
28
+ *
29
+ * Hermes `_xai_access_token_is_expiring` L2979-2994. Returns `false` for any
30
+ * malformed input — this is a refresh-trigger check, not a validation, so
31
+ * non-JWTs ("no token in cache") must NOT trigger a spurious refresh.
32
+ */
33
+ export declare function isXAIAccessTokenExpiring(jwt: string, skewSeconds?: number): boolean;
34
+ /**
35
+ * xAI Grok OAuth loopback flow (Hermes `_xai_oauth_loopback_login` L5315-5469).
36
+ *
37
+ * Uses a fixed redirect URI so the callback server fails fast instead of
38
+ * falling back to a random port that xAI's redirect_uri allowlist rejects.
39
+ */
40
+ export declare class XAIOAuthFlow extends OAuthCallbackFlow {
41
+ #private;
42
+ constructor(ctrl: OAuthController);
43
+ generateAuthUrl(state: string, redirectUri: string): Promise<{
44
+ url: string;
45
+ instructions?: string;
46
+ }>;
47
+ exchangeToken(code: string, _state: string, redirectUri: string): Promise<OAuthCredentials>;
48
+ }
49
+ /**
50
+ * Login with xAI Grok OAuth (SuperGrok Subscription).
51
+ */
52
+ export declare function loginXAIOAuth(ctrl: OAuthController): Promise<OAuthCredentials>;
53
+ /**
54
+ * Refresh an xAI OAuth access token using a stored refresh_token.
55
+ *
56
+ * Hermes `refresh_xai_oauth_pure` L3087-3160. Re-runs OIDC discovery and
57
+ * re-validates the cached `token_endpoint` on the refresh hot path so a
58
+ * cached-but-poisoned endpoint cannot silently leak a refresh_token.
59
+ */
60
+ export declare function refreshXAIOAuthToken(refreshToken: string): Promise<OAuthCredentials>;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.5.4",
4
+ "version": "15.5.7",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -40,7 +40,7 @@
40
40
  "dependencies": {
41
41
  "@anthropic-ai/sdk": "^0.94.0",
42
42
  "@bufbuild/protobuf": "^2.12.0",
43
- "@oh-my-pi/pi-utils": "15.5.4",
43
+ "@oh-my-pi/pi-utils": "15.5.7",
44
44
  "openai": "^6.36.0",
45
45
  "partial-json": "^0.1.7",
46
46
  "zod": "4.4.3"
@@ -1280,6 +1280,25 @@ export class AuthStorage {
1280
1280
  return false;
1281
1281
  }
1282
1282
 
1283
+ /**
1284
+ * True iff a dedicated, non-env credential source is configured for this
1285
+ * provider — i.e. anything in the cascade EXCEPT `getEnvApiKey(provider)`.
1286
+ *
1287
+ * Mirrors `hasAuth` minus the env-fallback leg. Useful for callers that
1288
+ * need to distinguish "the user explicitly configured this provider"
1289
+ * from "an env var happens to alias this provider via the cross-provider
1290
+ * fallback map" (see e.g. `xai-oauth → XAI_OAUTH_TOKEN || XAI_API_KEY` in
1291
+ * `stream.ts`). Without that distinction, an `XAI_API_KEY`-only setup
1292
+ * silently satisfies xai-oauth and routes around `providers.xai.baseUrl`.
1293
+ */
1294
+ hasNonEnvCredential(provider: string): boolean {
1295
+ if (this.#runtimeOverrides.has(provider)) return true;
1296
+ if (this.#configOverrides.has(provider)) return true;
1297
+ if (this.#getCredentialsForProvider(provider).length > 0) return true;
1298
+ if (this.#fallbackResolver?.(provider)) return true;
1299
+ return false;
1300
+ }
1301
+
1283
1302
  /**
1284
1303
  * Check if OAuth credentials are configured for a provider.
1285
1304
  */
@@ -1378,6 +1397,14 @@ export class AuthStorage {
1378
1397
  });
1379
1398
  break;
1380
1399
  }
1400
+ case "xai-oauth": {
1401
+ const { loginXAIOAuth } = await import("./utils/oauth/xai-oauth");
1402
+ credentials = await loginXAIOAuth({
1403
+ ...ctrl,
1404
+ onManualCodeInput: ctrl.onManualCodeInput ?? manualCodeInput,
1405
+ });
1406
+ break;
1407
+ }
1381
1408
  case "alibaba-coding-plan": {
1382
1409
  const { loginAlibabaCodingPlan } = await import("./utils/oauth/alibaba-coding-plan");
1383
1410
  const apiKey = await loginAlibabaCodingPlan(ctrl);
@@ -1586,6 +1613,12 @@ export class AuthStorage {
1586
1613
  await saveApiKeyCredential(apiKey);
1587
1614
  return;
1588
1615
  }
1616
+ case "openrouter": {
1617
+ const { loginOpenRouter } = await import("./utils/oauth/openrouter");
1618
+ const apiKey = await loginOpenRouter(ctrl);
1619
+ await saveApiKeyCredential(apiKey);
1620
+ return;
1621
+ }
1589
1622
  case "together": {
1590
1623
  const { loginTogether } = await import("./utils/oauth/together");
1591
1624
  const apiKey = await loginTogether(ctrl);
@@ -34,6 +34,8 @@ export interface ModelManagerOptions<TApi extends Api = Api, TModelsDevPayload =
34
34
  cacheDbPath?: string;
35
35
  /** Maximum cache age in milliseconds before considered stale. Default: 24h. */
36
36
  cacheTtlMs?: number;
37
+ /** When true, a successful dynamic fetch is the complete provider catalog and prunes static-only models. */
38
+ dynamicModelsAuthoritative?: boolean;
37
39
  /** Optional dynamic endpoint fetcher. */
38
40
  fetchDynamicModels?: () => Promise<readonly Model<TApi>[] | null>;
39
41
  /** Optional models.dev fallback hook. */
@@ -110,30 +112,27 @@ export async function resolveProviderModels<TApi extends Api = Api, TModelsDevPa
110
112
  options.staticModels ?? getBundledModels(options.providerId as GeneratedProvider),
111
113
  );
112
114
  const cache = readModelCache<TApi>(options.providerId, ttlMs, now, dbPath);
115
+ const dynamicModelsAuthoritative = options.dynamicModelsAuthoritative ?? false;
116
+ const staticFingerprint = fingerprintStatic(staticModels, dynamicModelsAuthoritative);
117
+ const cacheFingerprintMatches = cache?.staticFingerprint === staticFingerprint && staticFingerprint.length > 0;
118
+ const hasUsableFreshCache = (cache?.fresh ?? false) && (!dynamicModelsAuthoritative || cacheFingerprintMatches);
113
119
  const dynamicFetcher = options.fetchDynamicModels;
114
120
  const hasDynamicFetcher = typeof dynamicFetcher === "function";
115
- const hasAuthoritativeCache = (cache?.authoritative ?? false) || !hasDynamicFetcher;
121
+ const hasAuthoritativeCache = ((cache?.authoritative ?? false) && hasUsableFreshCache) || !hasDynamicFetcher;
116
122
  const cacheAgeMs = cache ? now() - cache.updatedAt : Number.POSITIVE_INFINITY;
117
123
  const shouldFetchFromNetwork = shouldFetchRemoteSources(
118
124
  strategy,
119
- cache?.fresh ?? false,
125
+ hasUsableFreshCache,
120
126
  hasAuthoritativeCache,
121
127
  cacheAgeMs,
122
128
  );
123
- const staticFingerprint = fingerprintStatic(staticModels);
124
129
 
125
130
  // Cold-start fast path: when a fresh, authoritative cache exists, the network
126
131
  // fetch is skipped, AND the static catalog slice is byte-identical to what
127
132
  // was merged in last time, the cache row IS the authoritative merge result.
128
133
  // Re-running `mergeDynamicModels(static, cache)` would just rebuild the same
129
134
  // objects (~800ms in the steady-state cold-start profile for `omp -p hi`).
130
- if (
131
- !shouldFetchFromNetwork &&
132
- cache?.fresh &&
133
- hasAuthoritativeCache &&
134
- cache.staticFingerprint === staticFingerprint &&
135
- cache.staticFingerprint.length > 0
136
- ) {
135
+ if (!shouldFetchFromNetwork && cache?.fresh && hasAuthoritativeCache && cacheFingerprintMatches) {
137
136
  return { models: passModelList<TApi>(cache.models), stale: false };
138
137
  }
139
138
 
@@ -142,16 +141,21 @@ export async function resolveProviderModels<TApi extends Api = Api, TModelsDevPa
142
141
  : [null, null];
143
142
  const modelsDevModels = normalizeModelList<TApi>(fetchedModelsDevModels ?? []);
144
143
  const shouldUseFreshCacheAsAuthoritative =
145
- strategy === "online-if-uncached" && (cache?.fresh ?? false) && hasAuthoritativeCache;
144
+ strategy === "online-if-uncached" && hasUsableFreshCache && hasAuthoritativeCache;
146
145
  const dynamicFetchSucceeded = fetchedDynamicModels !== null;
147
146
  const cacheModels = dynamicFetchSucceeded ? [] : normalizeModelList<TApi>(cache?.models ?? []);
148
147
  const dynamicModels = fetchedDynamicModels ?? [];
149
148
  const mergedWithCache = mergeDynamicModels(mergeModelSources(staticModels, modelsDevModels), cacheModels);
150
- const models = mergeDynamicModels(mergedWithCache, dynamicModels);
149
+ const mergedModels = mergeDynamicModels(mergedWithCache, dynamicModels);
150
+ const models =
151
+ dynamicModelsAuthoritative && dynamicFetchSucceeded ? retainModelIds(mergedModels, dynamicModels) : mergedModels;
151
152
  const dynamicAuthoritative = !hasDynamicFetcher || dynamicFetchSucceeded || shouldUseFreshCacheAsAuthoritative;
152
153
  if (shouldFetchFromNetwork) {
153
154
  if (dynamicFetchSucceeded) {
154
- const snapshotModels = mergeDynamicModels(mergeModelSources(staticModels, modelsDevModels), dynamicModels);
155
+ const mergedSnapshot = mergeDynamicModels(mergeModelSources(staticModels, modelsDevModels), dynamicModels);
156
+ const snapshotModels = dynamicModelsAuthoritative
157
+ ? retainModelIds(mergedSnapshot, dynamicModels)
158
+ : mergedSnapshot;
155
159
  writeModelCache(options.providerId, now(), snapshotModels, true, staticFingerprint, dbPath);
156
160
  } else {
157
161
  // Dynamic fetch failed — update cache with a non-authoritative snapshot so
@@ -270,6 +274,15 @@ function mergeDynamicModels<TApi extends Api>(
270
274
  return Array.from(merged.values());
271
275
  }
272
276
 
277
+ function retainModelIds<TApi extends Api>(
278
+ models: readonly Model<TApi>[],
279
+ retainedModels: readonly Model<TApi>[],
280
+ ): Model<TApi>[] {
281
+ if (retainedModels.length === 0 || models.length === 0) return [];
282
+ const retainedIds = new Set(retainedModels.map(model => model.id));
283
+ return models.filter(model => retainedIds.has(model.id));
284
+ }
285
+
273
286
  /**
274
287
  * Stable, low-collision fingerprint of a static catalog slice. Cached by
275
288
  * reference so repeat calls in the same process (e.g. multiple cold-start
@@ -278,8 +291,12 @@ function mergeDynamicModels<TApi extends Api>(
278
291
  */
279
292
  const kStaticFingerprint = Symbol("model-manager.staticFingerprint");
280
293
  type ModelArrayWithFingerprint = readonly Model<Api>[] & { [kStaticFingerprint]?: string };
281
- function fingerprintStatic<TApi extends Api>(models: readonly Model<TApi>[]): string {
294
+ function fingerprintStatic<TApi extends Api>(
295
+ models: readonly Model<TApi>[],
296
+ dynamicModelsAuthoritative = false,
297
+ ): string {
282
298
  if (models.length === 0) return "empty";
299
+ if (dynamicModelsAuthoritative) return `authoritative:${fingerprintStatic(models)}`;
283
300
  const tagged = models as ModelArrayWithFingerprint;
284
301
  const cached = tagged[kStaticFingerprint];
285
302
  if (cached !== undefined) return cached;
@@ -198,6 +198,28 @@ export function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void {
198
198
  }
199
199
  }
200
200
 
201
+ /**
202
+ * True when the model reasons natively but rejects the wire `reasoning.effort`
203
+ * param (compat.supportsReasoningEffort: false on openai-responses*). Callers
204
+ * are expected to omit the effort field; the wire-side omitReasoningEffort
205
+ * gate (providers/xai-responses.ts:78) is the actual strip, and this
206
+ * predicate is the upstream check that prevents a redundant
207
+ * requireSupportedEffort throw from defeating that gate.
208
+ *
209
+ * Scoped to openai-responses* because that's the only API surface where
210
+ * `compat.supportsReasoningEffort: false` is meaningful today. The
211
+ * `in`-narrowed access is necessary because Model.compat is
212
+ * `AnthropicCompat | OpenAICompat` and the api gate doesn't narrow the
213
+ * union for TS.
214
+ */
215
+ export function modelOmitsReasoningEffort<TApi extends Api>(model: ApiModel<TApi>): boolean {
216
+ if (model.api !== "openai-responses" && model.api !== "openai-codex-responses") {
217
+ return false;
218
+ }
219
+ const compat = model.compat;
220
+ return Boolean(compat && "supportsReasoningEffort" in compat && compat.supportsReasoningEffort === false);
221
+ }
222
+
201
223
  /**
202
224
  * Returns the supported thinking efforts declared on the model metadata.
203
225
  *
@@ -211,6 +233,16 @@ export function getSupportedEfforts<TApi extends Api>(model: ApiModel<TApi>): re
211
233
  if (!model.reasoning) {
212
234
  return [];
213
235
  }
236
+ // Models that reason natively but reject the `reasoning.effort` wire param
237
+ // (xAI Grok off the GROK_EFFORT_CAPABLE_PREFIXES allowlist in
238
+ // providers/xai-responses.ts: grok-build, grok-4.20-0309-reasoning) hide the
239
+ // picker's effort dial. Scoped to openai-responses* by
240
+ // `modelOmitsReasoningEffort` — openai-completions has its own
241
+ // supportsReasoningEffort consultation at inferFallbackEfforts L536 and
242
+ // changing that path's semantics is out-of-scope.
243
+ if (modelOmitsReasoningEffort(model)) {
244
+ return [];
245
+ }
214
246
  if (!model.thinking) {
215
247
  throw new Error(`Model ${model.provider}/${model.id} is missing thinking metadata`);
216
248
  }