npm - @oh-my-pi/pi-ai - Versions diffs - 15.5.4 → 15.5.7 - Mend

@oh-my-pi/pi-ai 15.5.4 → 15.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/CHANGELOG.md +53 -0
package/dist/types/auth-storage.d.ts +12 -0
package/dist/types/model-manager.d.ts +2 -0
package/dist/types/model-thinking.d.ts +15 -0
package/dist/types/provider-models/descriptors.d.ts +2 -0
package/dist/types/provider-models/openai-compat.d.ts +48 -0
package/dist/types/providers/openai-completions.d.ts +19 -0
package/dist/types/providers/openai-responses-shared.d.ts +10 -1
package/dist/types/providers/openai-responses.d.ts +34 -0
package/dist/types/providers/xai-responses.d.ts +23 -0
package/dist/types/types.d.ts +11 -1
package/dist/types/utils/oauth/__tests__/xai-oauth.test.d.ts +1 -0
package/dist/types/utils/oauth/openrouter.d.ts +1 -0
package/dist/types/utils/oauth/types.d.ts +1 -1
package/dist/types/utils/oauth/xai-oauth.d.ts +60 -0
package/package.json +2 -2
package/src/auth-storage.ts +33 -0
package/src/model-manager.ts +31 -14
package/src/model-thinking.ts +32 -0
package/src/models.json +127 -0
package/src/provider-models/descriptors.ts +15 -3
package/src/provider-models/openai-compat.ts +292 -41
package/src/providers/anthropic.ts +35 -5
package/src/providers/openai-codex-responses.ts +283 -8
package/src/providers/openai-completions.ts +40 -9
package/src/providers/openai-responses-shared.ts +38 -9
package/src/providers/openai-responses.ts +62 -8
package/src/providers/pi-native-server.ts +1 -0
package/src/providers/xai-responses.ts +82 -0
package/src/stream.ts +17 -1
package/src/types.ts +11 -0
package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
package/src/utils/oauth/index.ts +16 -0
package/src/utils/oauth/openrouter.ts +20 -0
package/src/utils/oauth/synthetic.ts +2 -3
package/src/utils/oauth/types.ts +2 -0
package/src/utils/oauth/xai-oauth.ts +342 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,59 @@
 ## [Unreleased]
+## [15.5.7] - 2026-05-27
+### Added
+- `SimpleStreamOptions.openrouterVariant` (`"nitro"`, `"floor"`, `"online"`, `"exacto"`, …) — when set, appends `:<variant>` to OpenRouter model IDs at request time, leaving ids that already carry an explicit `:suffix` untouched. Plumbed through `openai-completions` and the pi-native gateway forwarder.
+- xAI Grok OAuth (SuperGrok Subscription) provider in `/login`. Loopback PKCE flow on `127.0.0.1:56121`; the token unlocks Grok-4.x chat. Ported from NousResearch/hermes-agent (MIT).
+- OpenRouter provider in `/login`. API-key paste flow validated against `https://openrouter.ai/api/v1/auth/key` (the `/models` endpoint is public and cannot validate auth). The pasted key is stored under the existing `openrouter` provider id used by `OPENROUTER_API_KEY`.
+- `XAI_OAUTH_TOKEN` environment variable accepted as a headless fallback for the xAI Grok OAuth provider.
+### Changed
+- `OpenAIResponsesOptions` gains four optional, provider-agnostic fields that adapter wrappers can use to compose provider-specific behavior on top of the generic transport: `includeEncryptedReasoning` (gates `include: ["reasoning.encrypted_content"]`; default `true`, preserves current behavior), `filterReasoningHistory` (strips replayed `type: "reasoning"` items from conversation history; default `false`), `headers` (merged onto the client's default headers), and `extraBody` (merged into the request payload).
+- The existing `XAI_API_KEY` path is unchanged — it continues to use the OpenAI-completions transport.
+### Fixed
+- Fixed OpenRouter DeepSeek V4 tool-call follow-up requests replaying normalized `reasoning` as-is instead of DeepSeek's required `reasoning_content`, which caused HTTP 400 errors in thinking mode. ([#1445](https://github.com/can1357/oh-my-pi/issues/1445))
+## [15.5.6] - 2026-05-27
+### Added
+- Added `PI_CODEX_WEBSOCKET_MAX_IDLE_REUSE_MS` to control how long an idle Codex WebSocket stays eligible for reuse, with `0` disabling the check
+### Fixed
+- Fixed reused Codex WebSocket connections that had gone silent without activity to be dropped and replaced with a fresh handshake after the idle-reuse threshold, preventing stalled next requests
+- Fixed stale response frames left in the websocket queue from a completed turn so subsequent requests no longer process terminal frames from the previous response
+- Fixed websocket dead-socket detection to fail a stale connection when no inbound traffic or pong is observed after a ping timeout, improving recovery on runtimes that do not emit pong events
+## [15.5.5] - 2026-05-27
+### Added
+- Added `PI_CODEX_WEBSOCKET_PING_INTERVAL_MS` to configure the interval for Codex WebSocket protocol ping heartbeats
+- Added `PI_CODEX_WEBSOCKET_PONG_TIMEOUT_MS` to configure the Codex WebSocket pong timeout used to detect unresponsive connections
+- Added `PI_CODEX_WEBSOCKET_MESSAGE_QUEUE_CAPACITY` to configure the maximum buffered Codex WebSocket inbound queue size before transport fallback
+### Changed
+- Improved Codex WebSocket timeout diagnostics to include last event type and time since last progress event
+- Enhanced Codex WebSocket error classification to recognize ping, pong, send, and queue-overflow failures as retryable
+### Fixed
+- Fixed Codex WebSocket send failures by wrapping socket.send() in try-catch and surfacing errors as retryable transport errors
+- Fixed Codex WebSocket inbound queue overflow by adding capacity bounds and triggering fallback to SSE when exceeded
+- Fixed Codex WebSocket pong timeout detection by tracking pong events and failing the connection when no pong is received within the configured timeout
+- Fixed Anthropic streaming to suppress hallucinated meta-prompt thinking blocks (the recent "I don't see any current rewritten thinking..." regression). When the marker phrase `rewritten thinking` appears in a streamed thinking summary the block is collapsed to a plain `Thinking...` placeholder and its signature is dropped so subsequent turns can't re-anchor on the garbled chain.
+- Fixed Codex WebSocket silent stalls by adding protocol pings, inbound queue bounding, clearer idle-timeout diagnostics, and SDK retry clamping for first-event timeouts.
+### Fixed
+- Fixed Synthetic model discovery to treat the provider `/models` response as authoritative so deprecated bundled IDs are pruned from the runtime cache, and changed Synthetic login validation to avoid probing a specific model ([#1417](https://github.com/can1357/oh-my-pi/issues/1417)).
 ## [15.5.0] - 2026-05-26
 ### Added

package/dist/types/auth-storage.d.ts CHANGED Viewed

@@ -416,6 +416,18 @@ export declare class AuthStorage {
      * Unlike getApiKey(), this doesn't refresh OAuth tokens.
      */
     hasAuth(provider: string): boolean;
+    /**
+     * True iff a dedicated, non-env credential source is configured for this
+     * provider — i.e. anything in the cascade EXCEPT `getEnvApiKey(provider)`.
+     *
+     * Mirrors `hasAuth` minus the env-fallback leg. Useful for callers that
+     * need to distinguish "the user explicitly configured this provider"
+     * from "an env var happens to alias this provider via the cross-provider
+     * fallback map" (see e.g. `xai-oauth → XAI_OAUTH_TOKEN || XAI_API_KEY` in
+     * `stream.ts`). Without that distinction, an `XAI_API_KEY`-only setup
+     * silently satisfies xai-oauth and routes around `providers.xai.baseUrl`.
+     */
+    hasNonEnvCredential(provider: string): boolean;
     /**
      * Check if OAuth credentials are configured for a provider.
      */

package/dist/types/model-manager.d.ts CHANGED Viewed

@@ -24,6 +24,8 @@ export interface ModelManagerOptions<TApi extends Api = Api, TModelsDevPayload =
     cacheDbPath?: string;
     /** Maximum cache age in milliseconds before considered stale. Default: 24h. */
     cacheTtlMs?: number;
+    /** When true, a successful dynamic fetch is the complete provider catalog and prunes static-only models. */
+    dynamicModelsAuthoritative?: boolean;
     /** Optional dynamic endpoint fetcher. */
     fetchDynamicModels?: () => Promise<readonly Model<TApi>[] | null>;
     /** Optional models.dev fallback hook. */

package/dist/types/model-thinking.d.ts CHANGED Viewed

@@ -42,6 +42,21 @@ export declare function applyGeneratedModelPolicies(models: ApiModel<Api>[]): vo
  * - `gpt-5.5` (270K input) promotes to `gpt-5.4` (1M input).
  */
 export declare function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void;
+/**
+ * True when the model reasons natively but rejects the wire `reasoning.effort`
+ * param (compat.supportsReasoningEffort: false on openai-responses*). Callers
+ * are expected to omit the effort field; the wire-side omitReasoningEffort
+ * gate (providers/xai-responses.ts:78) is the actual strip, and this
+ * predicate is the upstream check that prevents a redundant
+ * requireSupportedEffort throw from defeating that gate.
+ *
+ * Scoped to openai-responses* because that's the only API surface where
+ * `compat.supportsReasoningEffort: false` is meaningful today. The
+ * `in`-narrowed access is necessary because Model.compat is
+ * `AnthropicCompat | OpenAICompat` and the api gate doesn't narrow the
+ * union for TS.
+ */
+export declare function modelOmitsReasoningEffort<TApi extends Api>(model: ApiModel<TApi>): boolean;
 /**
  * Returns the supported thinking efforts declared on the model metadata.
  *

package/dist/types/provider-models/descriptors.d.ts CHANGED Viewed

@@ -28,6 +28,8 @@ export interface ProviderDescriptor {
     defaultModel: string;
     /** When true, the runtime creates a model manager even without a valid API key (e.g. ollama). */
     allowUnauthenticated?: boolean;
+    /** When true, successful runtime discovery replaces bundled provider models instead of merging fallback-only IDs. */
+    dynamicModelsAuthoritative?: boolean;
     /** Catalog discovery configuration. Only providers with this field participate in generate-models.ts. */
     catalogDiscovery?: CatalogDiscoveryConfig;
 }

package/dist/types/provider-models/openai-compat.d.ts CHANGED Viewed

@@ -53,6 +53,53 @@ export interface XaiModelManagerConfig {
     baseUrl?: string;
 }
 export declare function xaiModelManagerOptions(config?: XaiModelManagerConfig): ModelManagerOptions<"openai-completions">;
+export interface XaiOAuthModelManagerConfig {
+    apiKey?: string;
+    baseUrl?: string;
+}
+interface XAICuratedModel {
+    id: string;
+    contextWindow: number;
+    name?: string;
+    /** Whether the model reasons natively. Defaults to true for Grok-4.x family. */
+    reasoning?: boolean;
+    /**
+     * Whether xAI accepts the `reasoning.effort` wire param for this model.
+     * Default true. When false: picker hides the effort dial (via
+     * getSupportedEfforts in model-thinking.ts) AND wire-side already omits
+     * the param via GROK_EFFORT_CAPABLE_PREFIXES in providers/xai-responses.ts.
+     * Must agree with that allowlist; two truths kept in sync by curated-catalog
+     * author convention until a follow-up Op: compress unifies them.
+     */
+    supportsReasoningEffort?: boolean;
+    /**
+     * Input modalities this model accepts. Defaults to `["text"]` when absent.
+     * Vision-capable Grok models MUST list `"image"` here so the curated layer
+     * overrides `fetchOpenAICompatibleModels`' default of `["text"]` (which
+     * otherwise strips image capability on every online refresh).
+     */
+    input?: ("text" | "image")[];
+}
+export declare const XAI_OAUTH_CURATED_MODELS: readonly XAICuratedModel[];
+/**
+ * Render `XAI_OAUTH_CURATED_MODELS` as full `Model<"openai-responses">` entries.
+ *
+ * Single source of truth for the curated to Model fan-in, consumed by both
+ * - {@link xaiOAuthModelManagerOptions} (runtime static seed handed to the model
+ *   manager so the picker is populated on a fresh login), and
+ * - `packages/ai/scripts/generate-models.ts` (bundles the same entries into
+ *   `models.json`, so the synchronous `ModelRegistry.#loadModels()` boot path
+ *   sees `xai-oauth` without waiting for a refresh — fixes the boot-time
+ *   default-model reset when `modelRoles.default = "xai-oauth/<id>"`).
+ *
+ * `reasoning` defaults to `true` for the Grok-4.x family; the explicit
+ * `grok-4.20-0309-non-reasoning` entry opts out via `XAICuratedModel.reasoning`.
+ * `maxTokens` uses `UNK_MAX_TOKENS` so id-keyed overlays from a successful
+ * dynamic fetch merge cleanly. Mirrors
+ * `hermes-agent/hermes_cli/models.py:_XAI_STATIC_FALLBACK`.
+ */
+export declare function buildXaiOAuthStaticSeed(baseUrl?: string): Model<"openai-responses">[];
+export declare function xaiOAuthModelManagerOptions(config?: XaiOAuthModelManagerConfig): ModelManagerOptions<"openai-responses">;
 export interface DeepSeekModelManagerConfig {
     apiKey?: string;
     baseUrl?: string;
@@ -240,3 +287,4 @@ export interface ModelsDevProviderDescriptor {
 export declare function mapModelsDevToModels(data: Record<string, unknown>, descriptors: readonly ModelsDevProviderDescriptor[]): Model<Api>[];
 /** All provider descriptors for models.dev data mapping in generate-models.ts. */
 export declare const MODELS_DEV_PROVIDER_DESCRIPTORS: readonly ModelsDevProviderDescriptor[];
+export {};

package/dist/types/providers/openai-completions.d.ts CHANGED Viewed

@@ -21,7 +21,26 @@ export interface OpenAICompletionsOptions extends StreamOptions {
     /** Force-disable reasoning where supported, or request the lowest effort on generic effort endpoints. */
     disableReasoning?: boolean;
     serviceTier?: ServiceTier;
+    /**
+     * Routing-variant suffix appended to OpenRouter model IDs when none is
+     * already present (`anthropic/claude-haiku-latest` → `…:nitro`). Common
+     * values: `"nitro"`, `"floor"`, `"online"`, `"exacto"`. Ignored when the
+     * resolved `model.id` already contains a colon-suffix after the last
+     * provider segment (explicit `:nitro` in the selector or a catalog entry
+     * with the variant baked in).
+     */
+    openrouterVariant?: string;
 }
+/**
+ * Append an OpenRouter routing-variant suffix (e.g. `:nitro`, `:floor`, `:online`, `:exacto`)
+ * to a model id when no explicit variant is already present. A variant is considered
+ * "already present" when `modelId` contains a colon after the last `/` separator —
+ * which covers both user-typed selectors (`anthropic/claude-haiku:nitro`) and catalog
+ * entries that bake the variant in (`deepseek/deepseek-v3.1-terminus:exacto`).
+ *
+ * Exported for unit testing.
+ */
+export declare function applyOpenRouterRoutingVariant(modelId: string, variant: string | undefined): string;
 export declare const streamOpenAICompletions: StreamFunction<"openai-completions">;
 export declare function parseChunkUsage(rawUsage: object, model: Model<"openai-completions">, premiumRequests: number | undefined): AssistantMessage["usage"];
 export declare function convertMessages(model: Model<"openai-completions">, context: Context, compat: ResolvedOpenAICompat): ChatCompletionMessageParam[];

package/dist/types/providers/openai-responses-shared.d.ts CHANGED Viewed

@@ -74,8 +74,17 @@ type ReasoningOptions = {
  * Apply reasoning-related Responses parameters: enable encrypted reasoning content for replay,
  * set effort/summary when requested, and otherwise inject the GPT-5 "Juice: 0" no-reasoning hack.
  * Mutates `params` and may push a developer message into `messages`.
+ *
+ * @param omitReasoningEffort - When `true`, suppresses `params.reasoning.effort` from the wire
+ *   body. Set by `xai-responses.ts` via {@link OpenAIResponsesOptions.omitReasoningEffort} for
+ *   xAI Grok models that return HTTP 400 on any `reasoning.effort` value (e.g. grok-build,
+ *   grok-4.20-0309-reasoning). When `true` and `options.reasoning` is set but
+ *   `options.reasoningSummary` is absent, `params.reasoning` is intentionally omitted from the
+ *   wire body entirely — these models reason natively at their own internal default effort level
+ *   without needing explicit activation. Callers that pass `options.reasoning` for such models
+ *   should expect this documented downgrade: the model will reason, but at its default effort.
  */
-export declare function applyResponsesReasoningParams<P extends OpenAI.Responses.ResponseCreateParamsStreaming>(params: P, model: Model<Api>, options: ReasoningOptions | undefined, messages: ResponseInput, mapEffort?: (effort: string) => string): void;
+export declare function applyResponsesReasoningParams<P extends OpenAI.Responses.ResponseCreateParamsStreaming>(params: P, model: Model<Api>, options: ReasoningOptions | undefined, messages: ResponseInput, mapEffort?: (effort: string) => string, includeEncryptedReasoning?: boolean, omitReasoningEffort?: boolean): void;
 /** Populate `output.usage` from a Responses-API `response.usage` payload. Does not invoke `calculateCost`. */
 export declare function populateResponsesUsageFromResponse(output: AssistantMessage, usage: {
     input_tokens?: number | null;

package/dist/types/providers/openai-responses.d.ts CHANGED Viewed

@@ -12,11 +12,45 @@ export interface OpenAIResponsesOptions extends StreamOptions {
      * Azure OpenAI and GitHub Copilot Responses paths require tool results to match prior tool calls.
      */
     strictResponsesPairing?: boolean;
+    /**
+     * Pass `include: ["reasoning.encrypted_content"]` on requests when the
+     * model supports reasoning. Default: true (preserves current behavior).
+     * Set to false when the upstream Responses endpoint rejects replayed
+     * encrypted reasoning (e.g., xAI Grok under SuperGrok OAuth).
+     */
+    includeEncryptedReasoning?: boolean;
+    /**
+     * Strip `type: "reasoning"` items from replayed conversation history
+     * before they hit the wire. Default: false (preserves current behavior).
+     * Set to true when the upstream rejects replayed reasoning wrappers.
+     */
+    filterReasoningHistory?: boolean;
+    /**
+     * Suppress the `reasoning.effort` wire param when set, even if
+     * `options.reasoning` is requested. Default: false. xAI Grok models
+     * outside the effort-capable allowlist 400 with "Model X does not
+     * support parameter reasoningEffort" — the xAI Responses adapter sets
+     * this when the target model is not in GROK_EFFORT_CAPABLE_PREFIXES.
+     */
+    omitReasoningEffort?: boolean;
+    /**
+     * Extra request headers merged onto the underlying client's
+     * defaultHeaders. Used by adapter wrappers to inject provider-specific
+     * routing or cache hints.
+     */
+    headers?: Record<string, string>;
+    /**
+     * Extra body fields merged into the Responses request payload. Used by
+     * adapter wrappers to inject provider-specific body keys (e.g.,
+     * prompt_cache_key for prompt-cache routing).
+     */
+    extraBody?: Record<string, unknown>;
 }
 /**
  * Generate function for OpenAI Responses API
  */
 export declare const streamOpenAIResponses: StreamFunction<"openai-responses">;
+export declare function getOpenAIResponsesCacheSessionId(options: Pick<OpenAIResponsesOptions, "cacheRetention" | "sessionId" | "promptCacheKey"> | undefined): string | undefined;
 export declare function supportsDeveloperRole(modelOrBaseUrl: Pick<Model, "provider" | "baseUrl"> | string): boolean;
 /**
  * Whether this model should get the OpenAI custom-tool grammar variant

package/dist/types/providers/xai-responses.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+import type { StreamFunction } from "../types";
+/**
+ * xAI Grok Responses adapter (SuperGrok OAuth path).
+ *
+ * Three xAI-specific behaviors vs the generic OpenAI Responses adapter:
+ *
+ *  1. `x-grok-conv-id` header + body `prompt_cache_key` route prompt-cache
+ *     hits on xAI's edge. Hermes uses both (agent/transports/codex.py:182-193).
+ *     The header is undocumented by xAI; `previous_response_id` is the
+ *     documented alternative — switch if xAI deprecates the header.
+ *  2. includeEncryptedReasoning=false — xAI's /v1/responses rejects replayed
+ *     `encrypted_content` blobs minted under SuperGrok OAuth.
+ *  3. filterReasoningHistory=true — strip `type: "reasoning"` items from
+ *     replayed conversation history; the blob inside is non-replayable under
+ *     OAuth and the wrapper item 404s without it (store=false; server cannot
+ *     resolve by id).
+ *
+ * Everything else is the generic OpenAI Responses transport. The xAI bearer
+ * token arrives in `options.apiKey` via AuthStorage.getApiKey() upstream, and
+ * the xAI base URL (`https://api.x.ai/v1`) arrives via `model.baseUrl` from
+ * the provider registry — not routed through this wrapper.
+ */
+export declare const streamXAIResponses: StreamFunction<"openai-responses">;

package/dist/types/types.d.ts CHANGED Viewed

@@ -48,7 +48,7 @@ export interface ThinkingConfig {
     /** Provider-specific transport used to encode the selected effort. */
     mode: ThinkingControlMode;
 }
-export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
+export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "xai-oauth" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
 export type Provider = KnownProvider | string;
 import type { Effort } from "./model-thinking";
 /** Token budgets for each thinking level (token-based providers only) */
@@ -294,6 +294,16 @@ export interface SimpleStreamOptions extends StreamOptions {
     syntheticApiFormat?: "openai" | "anthropic";
     /** Hint that websocket transport should be preferred when supported by the provider implementation. */
     preferWebsockets?: boolean;
+    /**
+     * OpenRouter routing-variant suffix automatically appended to model IDs when
+     * the request targets OpenRouter (`model.provider === "openrouter"`). Common
+     * values: `"nitro"` (throughput), `"floor"` (cheapest), `"online"` (web
+     * search plugin), `"exacto"` (cherry-picked high-quality providers, only
+     * defined for some models). Ignored when the resolved model id already
+     * contains a `:<variant>` suffix (e.g. the user typed `:nitro` explicitly
+     * or the catalog entry already names the variant).
+     */
+    openrouterVariant?: string;
 }
 export type StreamFunction<TApi extends Api> = (model: Model<TApi>, context: Context, options: OptionsForApi<TApi>) => AssistantMessageEventStream;
 export interface TextSignatureV1 {

package/dist/types/utils/oauth/__tests__/xai-oauth.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/types/utils/oauth/openrouter.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare const loginOpenRouter: (options: import("./types").OAuthController) => Promise<string>;

package/dist/types/utils/oauth/types.d.ts CHANGED Viewed

@@ -7,7 +7,7 @@ export type OAuthCredentials = {
     email?: string;
     accountId?: string;
 };
-export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
+export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "openrouter" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xai-oauth" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
 export type OAuthProviderId = OAuthProvider | (string & {});
 export type OAuthPrompt = {
     message: string;

package/dist/types/utils/oauth/xai-oauth.d.ts ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * xAI Grok (SuperGrok Subscription) OAuth flow.
+ *
+ * Loopback PKCE flow on `127.0.0.1:56121/callback`. One token unlocks Grok-4.x
+ * chat, Grok Imagine image generation, and Grok Voice TTS via subsequent
+ * commits. Endpoint discovery is hardened against MITM via
+ * {@link validateXAIEndpoint}: any non-HTTPS or non-`x.ai`/`*.x.ai` host is
+ * rejected on every call site, not just the first.
+ */
+import { OAuthCallbackFlow } from "./callback-server";
+import type { OAuthController, OAuthCredentials } from "./types";
+/**
+ * Validate an xAI OIDC discovery endpoint against scheme + host.
+ *
+ * Hermes `_xai_validate_oauth_endpoint` L2997-3035. The discovery response is
+ * long-lived and cached in {@link OAuthCredentials}; a single MITM during
+ * initial login could substitute a malicious `token_endpoint` that would then
+ * receive every future refresh_token. Rejecting non-HTTPS or non-`x.ai` /
+ * `*.x.ai` hosts pins the cached endpoint to the xAI auth origin.
+ *
+ * @throws Error with message `Invalid xAI <field>: <url>` when the URL fails
+ *         either scheme or host validation.
+ */
+export declare function validateXAIEndpoint(url: string, field: string): string;
+/**
+ * Check whether a JWT access token is at or past its `exp` claim (with an
+ * optional refresh-skew margin).
+ *
+ * Hermes `_xai_access_token_is_expiring` L2979-2994. Returns `false` for any
+ * malformed input — this is a refresh-trigger check, not a validation, so
+ * non-JWTs ("no token in cache") must NOT trigger a spurious refresh.
+ */
+export declare function isXAIAccessTokenExpiring(jwt: string, skewSeconds?: number): boolean;
+/**
+ * xAI Grok OAuth loopback flow (Hermes `_xai_oauth_loopback_login` L5315-5469).
+ *
+ * Uses a fixed redirect URI so the callback server fails fast instead of
+ * falling back to a random port that xAI's redirect_uri allowlist rejects.
+ */
+export declare class XAIOAuthFlow extends OAuthCallbackFlow {
+    #private;
+    constructor(ctrl: OAuthController);
+    generateAuthUrl(state: string, redirectUri: string): Promise<{
+        url: string;
+        instructions?: string;
+    }>;
+    exchangeToken(code: string, _state: string, redirectUri: string): Promise<OAuthCredentials>;
+}
+/**
+ * Login with xAI Grok OAuth (SuperGrok Subscription).
+ */
+export declare function loginXAIOAuth(ctrl: OAuthController): Promise<OAuthCredentials>;
+/**
+ * Refresh an xAI OAuth access token using a stored refresh_token.
+ *
+ * Hermes `refresh_xai_oauth_pure` L3087-3160. Re-runs OIDC discovery and
+ * re-validates the cached `token_endpoint` on the refresh hot path so a
+ * cached-but-poisoned endpoint cannot silently leak a refresh_token.
+ */
+export declare function refreshXAIOAuthToken(refreshToken: string): Promise<OAuthCredentials>;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "15.5.4",
+	"version": "15.5.7",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -40,7 +40,7 @@
 	"dependencies": {
 		"@anthropic-ai/sdk": "^0.94.0",
 		"@bufbuild/protobuf": "^2.12.0",
-		"@oh-my-pi/pi-utils": "15.5.4",
+		"@oh-my-pi/pi-utils": "15.5.7",
 		"openai": "^6.36.0",
 		"partial-json": "^0.1.7",
 		"zod": "4.4.3"

package/src/auth-storage.ts CHANGED Viewed

@@ -1280,6 +1280,25 @@ export class AuthStorage {
 		return false;
 	}
+	/**
+	 * True iff a dedicated, non-env credential source is configured for this
+	 * provider — i.e. anything in the cascade EXCEPT `getEnvApiKey(provider)`.
+	 *
+	 * Mirrors `hasAuth` minus the env-fallback leg. Useful for callers that
+	 * need to distinguish "the user explicitly configured this provider"
+	 * from "an env var happens to alias this provider via the cross-provider
+	 * fallback map" (see e.g. `xai-oauth → XAI_OAUTH_TOKEN || XAI_API_KEY` in
+	 * `stream.ts`). Without that distinction, an `XAI_API_KEY`-only setup
+	 * silently satisfies xai-oauth and routes around `providers.xai.baseUrl`.
+	 */
+	hasNonEnvCredential(provider: string): boolean {
+		if (this.#runtimeOverrides.has(provider)) return true;
+		if (this.#configOverrides.has(provider)) return true;
+		if (this.#getCredentialsForProvider(provider).length > 0) return true;
+		if (this.#fallbackResolver?.(provider)) return true;
+		return false;
+	}
 	/**
 	 * Check if OAuth credentials are configured for a provider.
 	 */
@@ -1378,6 +1397,14 @@ export class AuthStorage {
 				});
 				break;
 			}
+			case "xai-oauth": {
+				const { loginXAIOAuth } = await import("./utils/oauth/xai-oauth");
+				credentials = await loginXAIOAuth({
+					...ctrl,
+					onManualCodeInput: ctrl.onManualCodeInput ?? manualCodeInput,
+				});
+				break;
+			}
 			case "alibaba-coding-plan": {
 				const { loginAlibabaCodingPlan } = await import("./utils/oauth/alibaba-coding-plan");
 				const apiKey = await loginAlibabaCodingPlan(ctrl);
@@ -1586,6 +1613,12 @@ export class AuthStorage {
 				await saveApiKeyCredential(apiKey);
 				return;
 			}
+			case "openrouter": {
+				const { loginOpenRouter } = await import("./utils/oauth/openrouter");
+				const apiKey = await loginOpenRouter(ctrl);
+				await saveApiKeyCredential(apiKey);
+				return;
+			}
 			case "together": {
 				const { loginTogether } = await import("./utils/oauth/together");
 				const apiKey = await loginTogether(ctrl);

package/src/model-manager.ts CHANGED Viewed

@@ -34,6 +34,8 @@ export interface ModelManagerOptions<TApi extends Api = Api, TModelsDevPayload =
 	cacheDbPath?: string;
 	/** Maximum cache age in milliseconds before considered stale. Default: 24h. */
 	cacheTtlMs?: number;
+	/** When true, a successful dynamic fetch is the complete provider catalog and prunes static-only models. */
+	dynamicModelsAuthoritative?: boolean;
 	/** Optional dynamic endpoint fetcher. */
 	fetchDynamicModels?: () => Promise<readonly Model<TApi>[] | null>;
 	/** Optional models.dev fallback hook. */
@@ -110,30 +112,27 @@ export async function resolveProviderModels<TApi extends Api = Api, TModelsDevPa
 		options.staticModels ?? getBundledModels(options.providerId as GeneratedProvider),
 	);
 	const cache = readModelCache<TApi>(options.providerId, ttlMs, now, dbPath);
+	const dynamicModelsAuthoritative = options.dynamicModelsAuthoritative ?? false;
+	const staticFingerprint = fingerprintStatic(staticModels, dynamicModelsAuthoritative);
+	const cacheFingerprintMatches = cache?.staticFingerprint === staticFingerprint && staticFingerprint.length > 0;
+	const hasUsableFreshCache = (cache?.fresh ?? false) && (!dynamicModelsAuthoritative || cacheFingerprintMatches);
 	const dynamicFetcher = options.fetchDynamicModels;
 	const hasDynamicFetcher = typeof dynamicFetcher === "function";
-	const hasAuthoritativeCache = (cache?.authoritative ?? false) || !hasDynamicFetcher;
+	const hasAuthoritativeCache = ((cache?.authoritative ?? false) && hasUsableFreshCache) || !hasDynamicFetcher;
 	const cacheAgeMs = cache ? now() - cache.updatedAt : Number.POSITIVE_INFINITY;
 	const shouldFetchFromNetwork = shouldFetchRemoteSources(
 		strategy,
-		cache?.fresh ?? false,
+		hasUsableFreshCache,
 		hasAuthoritativeCache,
 		cacheAgeMs,
 	);
-	const staticFingerprint = fingerprintStatic(staticModels);
 	// Cold-start fast path: when a fresh, authoritative cache exists, the network
 	// fetch is skipped, AND the static catalog slice is byte-identical to what
 	// was merged in last time, the cache row IS the authoritative merge result.
 	// Re-running `mergeDynamicModels(static, cache)` would just rebuild the same
 	// objects (~800ms in the steady-state cold-start profile for `omp -p hi`).
-	if (
-		!shouldFetchFromNetwork &&
-		cache?.fresh &&
-		hasAuthoritativeCache &&
-		cache.staticFingerprint === staticFingerprint &&
-		cache.staticFingerprint.length > 0
-	) {
+	if (!shouldFetchFromNetwork && cache?.fresh && hasAuthoritativeCache && cacheFingerprintMatches) {
 		return { models: passModelList<TApi>(cache.models), stale: false };
 	}
@@ -142,16 +141,21 @@ export async function resolveProviderModels<TApi extends Api = Api, TModelsDevPa
 		: [null, null];
 	const modelsDevModels = normalizeModelList<TApi>(fetchedModelsDevModels ?? []);
 	const shouldUseFreshCacheAsAuthoritative =
-		strategy === "online-if-uncached" && (cache?.fresh ?? false) && hasAuthoritativeCache;
+		strategy === "online-if-uncached" && hasUsableFreshCache && hasAuthoritativeCache;
 	const dynamicFetchSucceeded = fetchedDynamicModels !== null;
 	const cacheModels = dynamicFetchSucceeded ? [] : normalizeModelList<TApi>(cache?.models ?? []);
 	const dynamicModels = fetchedDynamicModels ?? [];
 	const mergedWithCache = mergeDynamicModels(mergeModelSources(staticModels, modelsDevModels), cacheModels);
-	const models = mergeDynamicModels(mergedWithCache, dynamicModels);
+	const mergedModels = mergeDynamicModels(mergedWithCache, dynamicModels);
+	const models =
+		dynamicModelsAuthoritative && dynamicFetchSucceeded ? retainModelIds(mergedModels, dynamicModels) : mergedModels;
 	const dynamicAuthoritative = !hasDynamicFetcher || dynamicFetchSucceeded || shouldUseFreshCacheAsAuthoritative;
 	if (shouldFetchFromNetwork) {
 		if (dynamicFetchSucceeded) {
-			const snapshotModels = mergeDynamicModels(mergeModelSources(staticModels, modelsDevModels), dynamicModels);
+			const mergedSnapshot = mergeDynamicModels(mergeModelSources(staticModels, modelsDevModels), dynamicModels);
+			const snapshotModels = dynamicModelsAuthoritative
+				? retainModelIds(mergedSnapshot, dynamicModels)
+				: mergedSnapshot;
 			writeModelCache(options.providerId, now(), snapshotModels, true, staticFingerprint, dbPath);
 		} else {
 			// Dynamic fetch failed — update cache with a non-authoritative snapshot so
@@ -270,6 +274,15 @@ function mergeDynamicModels<TApi extends Api>(
 	return Array.from(merged.values());
 }
+function retainModelIds<TApi extends Api>(
+	models: readonly Model<TApi>[],
+	retainedModels: readonly Model<TApi>[],
+): Model<TApi>[] {
+	if (retainedModels.length === 0 || models.length === 0) return [];
+	const retainedIds = new Set(retainedModels.map(model => model.id));
+	return models.filter(model => retainedIds.has(model.id));
+}
 /**
  * Stable, low-collision fingerprint of a static catalog slice. Cached by
  * reference so repeat calls in the same process (e.g. multiple cold-start
@@ -278,8 +291,12 @@ function mergeDynamicModels<TApi extends Api>(
  */
 const kStaticFingerprint = Symbol("model-manager.staticFingerprint");
 type ModelArrayWithFingerprint = readonly Model<Api>[] & { [kStaticFingerprint]?: string };
-function fingerprintStatic<TApi extends Api>(models: readonly Model<TApi>[]): string {
+function fingerprintStatic<TApi extends Api>(
+	models: readonly Model<TApi>[],
+	dynamicModelsAuthoritative = false,
+): string {
 	if (models.length === 0) return "empty";
+	if (dynamicModelsAuthoritative) return `authoritative:${fingerprintStatic(models)}`;
 	const tagged = models as ModelArrayWithFingerprint;
 	const cached = tagged[kStaticFingerprint];
 	if (cached !== undefined) return cached;

package/src/model-thinking.ts CHANGED Viewed

@@ -198,6 +198,28 @@ export function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void {
 	}
 }
+/**
+ * True when the model reasons natively but rejects the wire `reasoning.effort`
+ * param (compat.supportsReasoningEffort: false on openai-responses*). Callers
+ * are expected to omit the effort field; the wire-side omitReasoningEffort
+ * gate (providers/xai-responses.ts:78) is the actual strip, and this
+ * predicate is the upstream check that prevents a redundant
+ * requireSupportedEffort throw from defeating that gate.
+ *
+ * Scoped to openai-responses* because that's the only API surface where
+ * `compat.supportsReasoningEffort: false` is meaningful today. The
+ * `in`-narrowed access is necessary because Model.compat is
+ * `AnthropicCompat | OpenAICompat` and the api gate doesn't narrow the
+ * union for TS.
+ */
+export function modelOmitsReasoningEffort<TApi extends Api>(model: ApiModel<TApi>): boolean {
+	if (model.api !== "openai-responses" && model.api !== "openai-codex-responses") {
+		return false;
+	}
+	const compat = model.compat;
+	return Boolean(compat && "supportsReasoningEffort" in compat && compat.supportsReasoningEffort === false);
+}
 /**
  * Returns the supported thinking efforts declared on the model metadata.
  *
@@ -211,6 +233,16 @@ export function getSupportedEfforts<TApi extends Api>(model: ApiModel<TApi>): re
 	if (!model.reasoning) {
 		return [];
 	}
+	// Models that reason natively but reject the `reasoning.effort` wire param
+	// (xAI Grok off the GROK_EFFORT_CAPABLE_PREFIXES allowlist in
+	// providers/xai-responses.ts: grok-build, grok-4.20-0309-reasoning) hide the
+	// picker's effort dial. Scoped to openai-responses* by
+	// `modelOmitsReasoningEffort` — openai-completions has its own
+	// supportsReasoningEffort consultation at inferFallbackEfforts L536 and
+	// changing that path's semantics is out-of-scope.
+	if (modelOmitsReasoningEffort(model)) {
+		return [];
+	}
 	if (!model.thinking) {
 		throw new Error(`Model ${model.provider}/${model.id} is missing thinking metadata`);
 	}