npm - @oh-my-pi/pi-ai - Versions diffs - 15.5.6 → 15.5.8 - Mend

@oh-my-pi/pi-ai 15.5.6 → 15.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/CHANGELOG.md +51 -0
package/README.md +2 -0
package/dist/types/auth-gateway/server.d.ts +19 -0
package/dist/types/auth-storage.d.ts +93 -0
package/dist/types/model-manager.d.ts +2 -0
package/dist/types/model-thinking.d.ts +15 -0
package/dist/types/provider-models/descriptors.d.ts +2 -0
package/dist/types/provider-models/google.d.ts +1 -1
package/dist/types/provider-models/openai-compat.d.ts +54 -0
package/dist/types/providers/openai-chat-server-schema.d.ts +3 -0
package/dist/types/providers/openai-completions.d.ts +19 -0
package/dist/types/providers/openai-responses-shared.d.ts +10 -1
package/dist/types/providers/openai-responses.d.ts +34 -0
package/dist/types/providers/xai-responses.d.ts +23 -0
package/dist/types/types.d.ts +11 -1
package/dist/types/utils/discovery/index.d.ts +0 -1
package/dist/types/utils/oauth/__tests__/xai-oauth.test.d.ts +1 -0
package/dist/types/utils/oauth/openrouter.d.ts +1 -0
package/dist/types/utils/oauth/types.d.ts +1 -1
package/dist/types/utils/oauth/wafer.d.ts +2 -0
package/dist/types/utils/oauth/xai-oauth.d.ts +60 -0
package/dist/types/utils/stream-markup-healing.d.ts +83 -0
package/package.json +2 -2
package/src/auth-gateway/server.ts +155 -57
package/src/auth-storage.ts +218 -32
package/src/model-manager.ts +31 -14
package/src/model-thinking.ts +32 -0
package/src/models.json +1332 -224
package/src/provider-models/descriptors.ts +29 -3
package/src/provider-models/google.ts +2 -38
package/src/provider-models/openai-compat.ts +476 -41
package/src/providers/anthropic.ts +104 -1
package/src/providers/ollama.ts +136 -43
package/src/providers/openai-chat-server-schema.ts +9 -0
package/src/providers/openai-chat-server.ts +21 -2
package/src/providers/openai-completions.ts +91 -120
package/src/providers/openai-responses-shared.ts +38 -9
package/src/providers/openai-responses.ts +60 -4
package/src/providers/pi-native-server.ts +1 -0
package/src/providers/xai-responses.ts +82 -0
package/src/stream.ts +132 -10
package/src/types.ts +13 -0
package/src/utils/discovery/index.ts +0 -1
package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
package/src/utils/oauth/index.ts +28 -0
package/src/utils/oauth/openrouter.ts +20 -0
package/src/utils/oauth/synthetic.ts +2 -3
package/src/utils/oauth/types.ts +4 -0
package/src/utils/oauth/wafer.ts +50 -0
package/src/utils/oauth/xai-oauth.ts +342 -0
package/src/utils/stream-markup-healing.ts +759 -0
package/dist/types/utils/discovery/vertex.d.ts +0 -25
package/dist/types/utils/tool-call-healing.d.ts +0 -71
package/src/utils/discovery/vertex.ts +0 -210
package/src/utils/tool-call-healing.ts +0 -271

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,53 @@
 ## [Unreleased]
+## [15.5.8] - 2026-05-28
+### Added
+- Added `CheckCredentialsOptions.completionProbe` (and `completionTimeoutMs`) so `AuthStorage.checkCredentials` can additionally exercise each credential against the provider's chat-completion endpoint after refresh-on-expiry. Result lands on `CredentialHealthResult.completion` ({ok, reason?, modelId?, latencyMs?}) without disturbing the usage `ok` field. Public types: `CompletionProbe`, `CompletionProbeInput`, `CompletionProbeCredential`, `CredentialCompletionResult`. The probe is invoked even when no `UsageProvider` is registered for the row, and is skipped when OAuth refresh fails (the stale bytes would only mask the upstream failure).
+- Added Wafer Pass and Wafer Serverless providers (`wafer-pass`, `wafer-serverless`). OpenAI-compatible (`https://pass.wafer.ai/v1`), bearer auth, `wfr_…` keys. `/login wafer-pass` and `/login wafer-serverless` paste-and-validate the key against `/v1/models`. `WAFER_PASS_API_KEY` and `WAFER_SERVERLESS_API_KEY` environment variables wired into `getEnvApiKey`. Bundled catalog seeds `wafer-pass/{GLM-5.1, Qwen3.5-397B-A17B}` and `wafer-serverless/{GLM-5.1, Kimi-K2.6, Qwen3.5-397B-A17B, Qwen3.6-35B-A3B, qwen3.7-max, deepseek-v4-flash, deepseek-v4-pro}`; dynamic discovery via `/v1/models` overlays additional models at runtime. Pass-tier discovery filters `wafer.tier === "pass_included"`. Pass-SKU costs are seeded at `0` (flat-rate subscription, no per-token charge — matches `kimi-code`/`firepass`/`alibaba-coding-plan`). Serverless costs are the wafer.ai retail rate, derived from the `*_cents_per_million` envelope via `value × 125 / 10000` (e.g. GLM-5.1 `120` → $1.50/M, Kimi-K2.6 `88` → $1.10/M). Reasoning entries get a thinking compat picked from the `wafer.provider` envelope: `zai`/`moonshotai` → zai-style `thinking: { type }`, `qwen` → top-level `enable_thinking`, `deepseek` and unknown upstreams stay unset so `detectOpenAICompat` can pick `reasoning_effort` from the id pattern at request time.
+### Changed
+- Changed auth-gateway credential resolution to use per-conversation `promptCacheKey`/`sessionId` when calling `AuthStorage.getApiKey`, so repeated turns can keep the same credential until it becomes unavailable
+- Changed auth-gateway and pi-native request handling to align `sessionId` with prompt/context identity before credential lookup
+- Changed Anthropic prompt preparation to downscale image blocks over 2000px when a request includes 20+ images, reducing oversized payloads automatically
+- Changed OpenAI chat request parsing to accept `name` on `tool` messages and fall back to the matching assistant `tool_calls` name, so parsed tool results now carry a proper tool name when the wire omits it
+- Changed `checkCredentials` to skip running `completionProbe` when OAuth refresh fails, so stale bearer tokens are never probed and the refresh failure remains the returned `reason`
+- Changed completion reporting to return `completion: { ok: null, reason: ... }` when a credential has no usable bearer bytes instead of attempting the probe
+- Refactored `AuthStorage.checkCredentials` so OAuth refresh-on-expiry runs up-front and the refreshed credential is shared between the usage probe and the new completion probe; rows without a registered `UsageProvider` no longer short-circuit before the completion probe runs.
+### Fixed
+- Fixed DeepSeek DSML tool-call envelope leaks on Ollama Cloud and OpenAI-compatible streams by healing leaked envelopes into structured tool calls without displaying raw DSML markers. ([#1462](https://github.com/can1357/oh-my-pi/issues/1462))
+- Fixed auth-gateway to classify usage-limit messages such as `usage_limit_reached`, `resource_exhausted`, and Codex-style `Try again in ~X min` text as 429 `rate_limit_error` responses
+- Fixed auth-gateway usage-limit handling to honor parsed retry hints and switch to a sibling credential via `markUsageLimitReached` instead of invalidating the rate-limited credential
+- Fixed `streamSimple` to retry on usage-limit errors (including message-only error events) before any content is emitted, so `onAuthError` can rotate credentials automatically
+- Fixed auth-gateway error classification to extract embedded status codes and use word-boundary matching, so `GenerateContentRequest` and similar messages are no longer misreported as rate-limit errors
+- Fixed `checkCredentials` to handle `completionProbe` exceptions by recording the failure in `CredentialHealthResult.completion.reason` while still returning the usage probe result
+### Fixed
+- Fixed Google Vertex's bundled model list to use the authoritative models.dev catalog, including MaaS entries such as `deepseek-ai/deepseek-v3.2-maas` and removing retired Gemini 1.5 fallbacks. ([#1456](https://github.com/can1357/oh-my-pi/issues/1456))
+## [15.5.7] - 2026-05-27
+### Added
+- `SimpleStreamOptions.openrouterVariant` (`"nitro"`, `"floor"`, `"online"`, `"exacto"`, …) — when set, appends `:<variant>` to OpenRouter model IDs at request time, leaving ids that already carry an explicit `:suffix` untouched. Plumbed through `openai-completions` and the pi-native gateway forwarder.
+- xAI Grok OAuth (SuperGrok Subscription) provider in `/login`. Loopback PKCE flow on `127.0.0.1:56121`; the token unlocks Grok-4.x chat. Ported from NousResearch/hermes-agent (MIT).
+- OpenRouter provider in `/login`. API-key paste flow validated against `https://openrouter.ai/api/v1/auth/key` (the `/models` endpoint is public and cannot validate auth). The pasted key is stored under the existing `openrouter` provider id used by `OPENROUTER_API_KEY`.
+- `XAI_OAUTH_TOKEN` environment variable accepted as a headless fallback for the xAI Grok OAuth provider.
+### Changed
+- `OpenAIResponsesOptions` gains four optional, provider-agnostic fields that adapter wrappers can use to compose provider-specific behavior on top of the generic transport: `includeEncryptedReasoning` (gates `include: ["reasoning.encrypted_content"]`; default `true`, preserves current behavior), `filterReasoningHistory` (strips replayed `type: "reasoning"` items from conversation history; default `false`), `headers` (merged onto the client's default headers), and `extraBody` (merged into the request payload).
+- The existing `XAI_API_KEY` path is unchanged — it continues to use the OpenAI-completions transport.
+### Fixed
+- Fixed OpenRouter DeepSeek V4 tool-call follow-up requests replaying normalized `reasoning` as-is instead of DeepSeek's required `reasoning_content`, which caused HTTP 400 errors in thinking mode. ([#1445](https://github.com/can1357/oh-my-pi/issues/1445))
 ## [15.5.6] - 2026-05-27
 ### Added
@@ -34,6 +81,10 @@
 - Fixed Anthropic streaming to suppress hallucinated meta-prompt thinking blocks (the recent "I don't see any current rewritten thinking..." regression). When the marker phrase `rewritten thinking` appears in a streamed thinking summary the block is collapsed to a plain `Thinking...` placeholder and its signature is dropped so subsequent turns can't re-anchor on the garbled chain.
 - Fixed Codex WebSocket silent stalls by adding protocol pings, inbound queue bounding, clearer idle-timeout diagnostics, and SDK retry clamping for first-event timeouts.
+### Fixed
+- Fixed Synthetic model discovery to treat the provider `/models` response as authoritative so deprecated bundled IDs are pruned from the runtime cache, and changed Synthetic login validation to avoid probing a specific model ([#1417](https://github.com/can1357/oh-my-pi/issues/1417)).
 ## [15.5.0] - 2026-05-26
 ### Added

package/README.md CHANGED Viewed

@@ -62,6 +62,8 @@ Unified LLM API with automatic model discovery, provider configuration, token an
 - **Hugging Face Inference**
 - **xAI**
 - **Venice** (requires `VENICE_API_KEY`)
+- **Wafer Pass** (requires `WAFER_PASS_API_KEY`; flat-rate subscription, includes GLM-5.1 and Qwen3.5-397B-A17B)
+- **Wafer Serverless** (requires `WAFER_SERVERLESS_API_KEY`; pay-as-you-go)
 - **OpenRouter**
 - **Kilo Gateway** (supports OAuth `/login kilo` or `KILO_API_KEY`)
 - **LiteLLM** (requires `LITELLM_API_KEY`)

package/dist/types/auth-gateway/server.d.ts CHANGED Viewed

@@ -14,4 +14,23 @@ export interface AuthGatewayBootOptions extends AuthGatewayServerOptions {
     /** Optional supplier for `/v1/models` listing. Returns the full model array. */
     listModels?: () => Iterable<Model<Api>>;
 }
+/**
+ * Classify an upstream / gateway-internal error into a status code and a
+ * format-neutral type. The order is intentional:
+ *
+ *  1. Honour an explicit numeric `status` property on the thrown error.
+ *  2. Parse a status code embedded in the message string. Provider errors
+ *     virtually always carry one (`Google API error (400): …`, `HTTP 429`,
+ *     `status=503`) and the embedded value is authoritative.
+ *  3. Fall through to **word-boundaried** substring heuristics. The old
+ *     `lower.includes("rate")` test famously matched
+ *     `GenerateContentRequest`, surfacing every Google 400 as a 429
+ *     `rate_limit_error`. The patterns here all require boundaries so they
+ *     don't collide with provider field names.
+ */
+export declare function classifyGatewayError(err: unknown): {
+    status: number;
+    type: string;
+    message: string;
+};
 export declare function startAuthGateway(opts: AuthGatewayBootOptions): AuthGatewayServerHandle;

package/dist/types/auth-storage.d.ts CHANGED Viewed

@@ -75,13 +75,88 @@ export interface CredentialHealthResult {
     reason?: string;
     /** Probe usage report (raw payload stripped) when `ok === true`. */
     report?: Omit<UsageReport, "raw">;
+    /**
+     * Result of the optional end-to-end completion probe (see
+     * {@link CheckCredentialsOptions.completionProbe}). Absent when no probe was
+     * supplied. The completion probe exercises the provider's chat-completion
+     * endpoint with the credential's bearer bytes, which is a stricter signal
+     * than the usage endpoint (some providers happily 200 a `/usage` call while
+     * the chat endpoint 401s the same bearer).
+     */
+    completion?: CredentialCompletionResult;
 }
+/**
+ * Outcome of the end-to-end completion probe. `null` means the probe was
+ * skipped (no bearer bytes were available — e.g. OAuth refresh failed
+ * upstream of the probe).
+ */
+export interface CredentialCompletionResult {
+    ok: boolean | null;
+    /** Failure / unverifiable reason; absent when `ok === true`. */
+    reason?: string;
+    /** Probe model id used (carried back from the caller for display). */
+    modelId?: string;
+    /** Round-trip latency in milliseconds. */
+    latencyMs?: number;
+}
+/**
+ * Credential payload handed to {@link CompletionProbe}. For API-key
+ * credentials only the bytes are exposed; for OAuth, every identity field
+ * carried by the refreshed credential is included so the probe can compose
+ * provider-specific apiKey shapes (e.g. GitHub Copilot / Google Gemini CLI
+ * expect a JSON blob with `token` + `projectId`, not the raw access token).
+ *
+ * `refreshToken` may be {@link REMOTE_REFRESH_SENTINEL} when the credential
+ * lives behind a broker; the chat endpoint never reads it, so the probe can
+ * forward it verbatim into the structured shape without harm.
+ */
+export type CompletionProbeCredential = {
+    type: "api_key";
+    apiKey: string;
+} | {
+    type: "oauth";
+    accessToken: string;
+    refreshToken?: string;
+    expiresAt?: number;
+    accountId?: string;
+    projectId?: string;
+    email?: string;
+    enterpriseUrl?: string;
+};
+/**
+ * Caller-supplied bearer probe. Receives the post-refresh credential for a
+ * single row and reports whether a real chat-completion round-trip succeeds.
+ * The check-credentials pipeline calls this AFTER any OAuth refresh so the
+ * bytes match what a live request would send.
+ */
+export interface CompletionProbeInput {
+    provider: Provider;
+    credentialId: number;
+    credential: CompletionProbeCredential;
+    signal: AbortSignal;
+}
+export type CompletionProbe = (input: CompletionProbeInput) => Promise<CredentialCompletionResult>;
 export interface CheckCredentialsOptions {
     signal?: AbortSignal;
     /** Per-credential probe timeout (ms). Defaults to the configured usage request timeout. */
     timeoutMs?: number;
     /** Provider → base URL override, same shape as {@link AuthStorage.fetchUsageReports}. */
     baseUrlResolver?: (provider: Provider) => string | undefined;
+    /**
+     * Optional end-to-end probe. When provided, `checkCredentials` invokes it
+     * for every credential where a usable bearer is available (API key, or
+     * OAuth access token after refresh-on-expiry succeeded). The result lands
+     * on {@link CredentialHealthResult.completion}.
+     *
+     * The probe runs INDEPENDENTLY of whether a {@link UsageProvider} is
+     * configured: providers without a usage endpoint still benefit from the
+     * extra signal. The probe is NOT invoked when OAuth refresh fails — the
+     * bytes would be stale anyway and the upstream failure is already captured
+     * on `reason`.
+     */
+    completionProbe?: CompletionProbe;
+    /** Per-credential completion probe timeout (ms). Defaults to `timeoutMs`. */
+    completionTimeoutMs?: number;
 }
 /**
  * Sentinel value placed in OAuth `refresh` fields when a credential is shared
@@ -416,6 +491,18 @@ export declare class AuthStorage {
      * Unlike getApiKey(), this doesn't refresh OAuth tokens.
      */
     hasAuth(provider: string): boolean;
+    /**
+     * True iff a dedicated, non-env credential source is configured for this
+     * provider — i.e. anything in the cascade EXCEPT `getEnvApiKey(provider)`.
+     *
+     * Mirrors `hasAuth` minus the env-fallback leg. Useful for callers that
+     * need to distinguish "the user explicitly configured this provider"
+     * from "an env var happens to alias this provider via the cross-provider
+     * fallback map" (see e.g. `xai-oauth → XAI_OAUTH_TOKEN || XAI_API_KEY` in
+     * `stream.ts`). Without that distinction, an `XAI_API_KEY`-only setup
+     * silently satisfies xai-oauth and routes around `providers.xai.baseUrl`.
+     */
+    hasNonEnvCredential(provider: string): boolean;
     /**
      * Check if OAuth credentials are configured for a provider.
      */
@@ -479,6 +566,12 @@ export declare class AuthStorage {
      * soft-disabled rows are already known-bad and don't need a network probe.
      * Environment-variable API keys are not enumerated — the caller's intent
      * here is "which of my stored credentials is broken".
+     *
+     * Pass {@link CheckCredentialsOptions.completionProbe} to additionally
+     * exercise each credential against the provider's chat-completion endpoint
+     * (strict mode). The result lands on
+     * {@link CredentialHealthResult.completion}; the usage `ok` field is
+     * unchanged so callers can tell the two signals apart.
      */
     checkCredentials(options?: CheckCredentialsOptions): Promise<CredentialHealthResult[]>;
     /**

package/dist/types/model-manager.d.ts CHANGED Viewed

@@ -24,6 +24,8 @@ export interface ModelManagerOptions<TApi extends Api = Api, TModelsDevPayload =
     cacheDbPath?: string;
     /** Maximum cache age in milliseconds before considered stale. Default: 24h. */
     cacheTtlMs?: number;
+    /** When true, a successful dynamic fetch is the complete provider catalog and prunes static-only models. */
+    dynamicModelsAuthoritative?: boolean;
     /** Optional dynamic endpoint fetcher. */
     fetchDynamicModels?: () => Promise<readonly Model<TApi>[] | null>;
     /** Optional models.dev fallback hook. */

package/dist/types/model-thinking.d.ts CHANGED Viewed

@@ -42,6 +42,21 @@ export declare function applyGeneratedModelPolicies(models: ApiModel<Api>[]): vo
  * - `gpt-5.5` (270K input) promotes to `gpt-5.4` (1M input).
  */
 export declare function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void;
+/**
+ * True when the model reasons natively but rejects the wire `reasoning.effort`
+ * param (compat.supportsReasoningEffort: false on openai-responses*). Callers
+ * are expected to omit the effort field; the wire-side omitReasoningEffort
+ * gate (providers/xai-responses.ts:78) is the actual strip, and this
+ * predicate is the upstream check that prevents a redundant
+ * requireSupportedEffort throw from defeating that gate.
+ *
+ * Scoped to openai-responses* because that's the only API surface where
+ * `compat.supportsReasoningEffort: false` is meaningful today. The
+ * `in`-narrowed access is necessary because Model.compat is
+ * `AnthropicCompat | OpenAICompat` and the api gate doesn't narrow the
+ * union for TS.
+ */
+export declare function modelOmitsReasoningEffort<TApi extends Api>(model: ApiModel<TApi>): boolean;
 /**
  * Returns the supported thinking efforts declared on the model metadata.
  *

package/dist/types/provider-models/descriptors.d.ts CHANGED Viewed

@@ -28,6 +28,8 @@ export interface ProviderDescriptor {
     defaultModel: string;
     /** When true, the runtime creates a model manager even without a valid API key (e.g. ollama). */
     allowUnauthenticated?: boolean;
+    /** When true, successful runtime discovery replaces bundled provider models instead of merging fallback-only IDs. */
+    dynamicModelsAuthoritative?: boolean;
     /** Catalog discovery configuration. Only providers with this field participate in generate-models.ts. */
     catalogDiscovery?: CatalogDiscoveryConfig;
 }

package/dist/types/provider-models/google.d.ts CHANGED Viewed

@@ -19,6 +19,6 @@ export interface GoogleGeminiCliModelManagerConfig {
     endpoint?: string;
 }
 export declare function googleModelManagerOptions(config?: GoogleModelManagerConfig): ModelManagerOptions<"google-generative-ai">;
-export declare function googleVertexModelManagerOptions(config?: GoogleVertexModelManagerConfig): ModelManagerOptions;
+export declare function googleVertexModelManagerOptions(_config?: GoogleVertexModelManagerConfig): ModelManagerOptions;
 export declare function googleAntigravityModelManagerOptions(config?: GoogleAntigravityModelManagerConfig): ModelManagerOptions<"google-gemini-cli">;
 export declare function googleGeminiCliModelManagerOptions(config?: GoogleGeminiCliModelManagerConfig): ModelManagerOptions<"google-gemini-cli">;

package/dist/types/provider-models/openai-compat.d.ts CHANGED Viewed

@@ -53,6 +53,53 @@ export interface XaiModelManagerConfig {
     baseUrl?: string;
 }
 export declare function xaiModelManagerOptions(config?: XaiModelManagerConfig): ModelManagerOptions<"openai-completions">;
+export interface XaiOAuthModelManagerConfig {
+    apiKey?: string;
+    baseUrl?: string;
+}
+interface XAICuratedModel {
+    id: string;
+    contextWindow: number;
+    name?: string;
+    /** Whether the model reasons natively. Defaults to true for Grok-4.x family. */
+    reasoning?: boolean;
+    /**
+     * Whether xAI accepts the `reasoning.effort` wire param for this model.
+     * Default true. When false: picker hides the effort dial (via
+     * getSupportedEfforts in model-thinking.ts) AND wire-side already omits
+     * the param via GROK_EFFORT_CAPABLE_PREFIXES in providers/xai-responses.ts.
+     * Must agree with that allowlist; two truths kept in sync by curated-catalog
+     * author convention until a follow-up Op: compress unifies them.
+     */
+    supportsReasoningEffort?: boolean;
+    /**
+     * Input modalities this model accepts. Defaults to `["text"]` when absent.
+     * Vision-capable Grok models MUST list `"image"` here so the curated layer
+     * overrides `fetchOpenAICompatibleModels`' default of `["text"]` (which
+     * otherwise strips image capability on every online refresh).
+     */
+    input?: ("text" | "image")[];
+}
+export declare const XAI_OAUTH_CURATED_MODELS: readonly XAICuratedModel[];
+/**
+ * Render `XAI_OAUTH_CURATED_MODELS` as full `Model<"openai-responses">` entries.
+ *
+ * Single source of truth for the curated to Model fan-in, consumed by both
+ * - {@link xaiOAuthModelManagerOptions} (runtime static seed handed to the model
+ *   manager so the picker is populated on a fresh login), and
+ * - `packages/ai/scripts/generate-models.ts` (bundles the same entries into
+ *   `models.json`, so the synchronous `ModelRegistry.#loadModels()` boot path
+ *   sees `xai-oauth` without waiting for a refresh — fixes the boot-time
+ *   default-model reset when `modelRoles.default = "xai-oauth/<id>"`).
+ *
+ * `reasoning` defaults to `true` for the Grok-4.x family; the explicit
+ * `grok-4.20-0309-non-reasoning` entry opts out via `XAICuratedModel.reasoning`.
+ * `maxTokens` uses `UNK_MAX_TOKENS` so id-keyed overlays from a successful
+ * dynamic fetch merge cleanly. Mirrors
+ * `hermes-agent/hermes_cli/models.py:_XAI_STATIC_FALLBACK`.
+ */
+export declare function buildXaiOAuthStaticSeed(baseUrl?: string): Model<"openai-responses">[];
+export declare function xaiOAuthModelManagerOptions(config?: XaiOAuthModelManagerConfig): ModelManagerOptions<"openai-responses">;
 export interface DeepSeekModelManagerConfig {
     apiKey?: string;
     baseUrl?: string;
@@ -80,6 +127,12 @@ export interface FirepassModelManagerConfig {
  * See https://docs.fireworks.ai/firepass.
  */
 export declare function firepassModelManagerOptions(_config?: FirepassModelManagerConfig): ModelManagerOptions<"openai-completions">;
+export interface WaferModelManagerConfig {
+    apiKey?: string;
+    baseUrl?: string;
+}
+export declare function waferPassModelManagerOptions(config?: WaferModelManagerConfig): ModelManagerOptions<"openai-completions">;
+export declare function waferServerlessModelManagerOptions(config?: WaferModelManagerConfig): ModelManagerOptions<"openai-completions">;
 export interface MistralModelManagerConfig {
     apiKey?: string;
     baseUrl?: string;
@@ -240,3 +293,4 @@ export interface ModelsDevProviderDescriptor {
 export declare function mapModelsDevToModels(data: Record<string, unknown>, descriptors: readonly ModelsDevProviderDescriptor[]): Model<Api>[];
 /** All provider descriptors for models.dev data mapping in generate-models.ts. */
 export declare const MODELS_DEV_PROVIDER_DESCRIPTORS: readonly ModelsDevProviderDescriptor[];
+export {};

package/dist/types/providers/openai-chat-server-schema.d.ts CHANGED Viewed

@@ -322,6 +322,7 @@ export declare const toolMessageSchema: z.ZodObject<{
         type: z.ZodString;
     }, z.core.$loose>]>>]>>;
     tool_call_id: z.ZodOptional<z.ZodString>;
+    name: z.ZodPipe<z.ZodOptional<z.ZodString>, z.ZodTransform<string | undefined, string | undefined>>;
 }, z.core.$strip>;
 /**
  * Legacy `function` role (pre-tools API). Translated to a `tool` role
@@ -526,6 +527,7 @@ export declare const messageSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
         type: z.ZodString;
     }, z.core.$loose>]>>]>>;
     tool_call_id: z.ZodOptional<z.ZodString>;
+    name: z.ZodPipe<z.ZodOptional<z.ZodString>, z.ZodTransform<string | undefined, string | undefined>>;
 }, z.core.$strip>, z.ZodObject<{
     role: z.ZodLiteral<"function">;
     name: z.ZodString;
@@ -736,6 +738,7 @@ export declare const openaiChatRequestSchema: z.ZodObject<{
             type: z.ZodString;
         }, z.core.$loose>]>>]>>;
         tool_call_id: z.ZodOptional<z.ZodString>;
+        name: z.ZodPipe<z.ZodOptional<z.ZodString>, z.ZodTransform<string | undefined, string | undefined>>;
     }, z.core.$strip>, z.ZodObject<{
         role: z.ZodLiteral<"function">;
         name: z.ZodString;

package/dist/types/providers/openai-completions.d.ts CHANGED Viewed

@@ -21,7 +21,26 @@ export interface OpenAICompletionsOptions extends StreamOptions {
     /** Force-disable reasoning where supported, or request the lowest effort on generic effort endpoints. */
     disableReasoning?: boolean;
     serviceTier?: ServiceTier;
+    /**
+     * Routing-variant suffix appended to OpenRouter model IDs when none is
+     * already present (`anthropic/claude-haiku-latest` → `…:nitro`). Common
+     * values: `"nitro"`, `"floor"`, `"online"`, `"exacto"`. Ignored when the
+     * resolved `model.id` already contains a colon-suffix after the last
+     * provider segment (explicit `:nitro` in the selector or a catalog entry
+     * with the variant baked in).
+     */
+    openrouterVariant?: string;
 }
+/**
+ * Append an OpenRouter routing-variant suffix (e.g. `:nitro`, `:floor`, `:online`, `:exacto`)
+ * to a model id when no explicit variant is already present. A variant is considered
+ * "already present" when `modelId` contains a colon after the last `/` separator —
+ * which covers both user-typed selectors (`anthropic/claude-haiku:nitro`) and catalog
+ * entries that bake the variant in (`deepseek/deepseek-v3.1-terminus:exacto`).
+ *
+ * Exported for unit testing.
+ */
+export declare function applyOpenRouterRoutingVariant(modelId: string, variant: string | undefined): string;
 export declare const streamOpenAICompletions: StreamFunction<"openai-completions">;
 export declare function parseChunkUsage(rawUsage: object, model: Model<"openai-completions">, premiumRequests: number | undefined): AssistantMessage["usage"];
 export declare function convertMessages(model: Model<"openai-completions">, context: Context, compat: ResolvedOpenAICompat): ChatCompletionMessageParam[];

package/dist/types/providers/openai-responses-shared.d.ts CHANGED Viewed

@@ -74,8 +74,17 @@ type ReasoningOptions = {
  * Apply reasoning-related Responses parameters: enable encrypted reasoning content for replay,
  * set effort/summary when requested, and otherwise inject the GPT-5 "Juice: 0" no-reasoning hack.
  * Mutates `params` and may push a developer message into `messages`.
+ *
+ * @param omitReasoningEffort - When `true`, suppresses `params.reasoning.effort` from the wire
+ *   body. Set by `xai-responses.ts` via {@link OpenAIResponsesOptions.omitReasoningEffort} for
+ *   xAI Grok models that return HTTP 400 on any `reasoning.effort` value (e.g. grok-build,
+ *   grok-4.20-0309-reasoning). When `true` and `options.reasoning` is set but
+ *   `options.reasoningSummary` is absent, `params.reasoning` is intentionally omitted from the
+ *   wire body entirely — these models reason natively at their own internal default effort level
+ *   without needing explicit activation. Callers that pass `options.reasoning` for such models
+ *   should expect this documented downgrade: the model will reason, but at its default effort.
  */
-export declare function applyResponsesReasoningParams<P extends OpenAI.Responses.ResponseCreateParamsStreaming>(params: P, model: Model<Api>, options: ReasoningOptions | undefined, messages: ResponseInput, mapEffort?: (effort: string) => string): void;
+export declare function applyResponsesReasoningParams<P extends OpenAI.Responses.ResponseCreateParamsStreaming>(params: P, model: Model<Api>, options: ReasoningOptions | undefined, messages: ResponseInput, mapEffort?: (effort: string) => string, includeEncryptedReasoning?: boolean, omitReasoningEffort?: boolean): void;
 /** Populate `output.usage` from a Responses-API `response.usage` payload. Does not invoke `calculateCost`. */
 export declare function populateResponsesUsageFromResponse(output: AssistantMessage, usage: {
     input_tokens?: number | null;

package/dist/types/providers/openai-responses.d.ts CHANGED Viewed

@@ -12,11 +12,45 @@ export interface OpenAIResponsesOptions extends StreamOptions {
      * Azure OpenAI and GitHub Copilot Responses paths require tool results to match prior tool calls.
      */
     strictResponsesPairing?: boolean;
+    /**
+     * Pass `include: ["reasoning.encrypted_content"]` on requests when the
+     * model supports reasoning. Default: true (preserves current behavior).
+     * Set to false when the upstream Responses endpoint rejects replayed
+     * encrypted reasoning (e.g., xAI Grok under SuperGrok OAuth).
+     */
+    includeEncryptedReasoning?: boolean;
+    /**
+     * Strip `type: "reasoning"` items from replayed conversation history
+     * before they hit the wire. Default: false (preserves current behavior).
+     * Set to true when the upstream rejects replayed reasoning wrappers.
+     */
+    filterReasoningHistory?: boolean;
+    /**
+     * Suppress the `reasoning.effort` wire param when set, even if
+     * `options.reasoning` is requested. Default: false. xAI Grok models
+     * outside the effort-capable allowlist 400 with "Model X does not
+     * support parameter reasoningEffort" — the xAI Responses adapter sets
+     * this when the target model is not in GROK_EFFORT_CAPABLE_PREFIXES.
+     */
+    omitReasoningEffort?: boolean;
+    /**
+     * Extra request headers merged onto the underlying client's
+     * defaultHeaders. Used by adapter wrappers to inject provider-specific
+     * routing or cache hints.
+     */
+    headers?: Record<string, string>;
+    /**
+     * Extra body fields merged into the Responses request payload. Used by
+     * adapter wrappers to inject provider-specific body keys (e.g.,
+     * prompt_cache_key for prompt-cache routing).
+     */
+    extraBody?: Record<string, unknown>;
 }
 /**
  * Generate function for OpenAI Responses API
  */
 export declare const streamOpenAIResponses: StreamFunction<"openai-responses">;
+export declare function getOpenAIResponsesCacheSessionId(options: Pick<OpenAIResponsesOptions, "cacheRetention" | "sessionId" | "promptCacheKey"> | undefined): string | undefined;
 export declare function supportsDeveloperRole(modelOrBaseUrl: Pick<Model, "provider" | "baseUrl"> | string): boolean;
 /**
  * Whether this model should get the OpenAI custom-tool grammar variant

package/dist/types/providers/xai-responses.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+import type { StreamFunction } from "../types";
+/**
+ * xAI Grok Responses adapter (SuperGrok OAuth path).
+ *
+ * Three xAI-specific behaviors vs the generic OpenAI Responses adapter:
+ *
+ *  1. `x-grok-conv-id` header + body `prompt_cache_key` route prompt-cache
+ *     hits on xAI's edge. Hermes uses both (agent/transports/codex.py:182-193).
+ *     The header is undocumented by xAI; `previous_response_id` is the
+ *     documented alternative — switch if xAI deprecates the header.
+ *  2. includeEncryptedReasoning=false — xAI's /v1/responses rejects replayed
+ *     `encrypted_content` blobs minted under SuperGrok OAuth.
+ *  3. filterReasoningHistory=true — strip `type: "reasoning"` items from
+ *     replayed conversation history; the blob inside is non-replayable under
+ *     OAuth and the wrapper item 404s without it (store=false; server cannot
+ *     resolve by id).
+ *
+ * Everything else is the generic OpenAI Responses transport. The xAI bearer
+ * token arrives in `options.apiKey` via AuthStorage.getApiKey() upstream, and
+ * the xAI base URL (`https://api.x.ai/v1`) arrives via `model.baseUrl` from
+ * the provider registry — not routed through this wrapper.
+ */
+export declare const streamXAIResponses: StreamFunction<"openai-responses">;

package/dist/types/types.d.ts CHANGED Viewed

@@ -48,7 +48,7 @@ export interface ThinkingConfig {
     /** Provider-specific transport used to encode the selected effort. */
     mode: ThinkingControlMode;
 }
-export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
+export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "xai-oauth" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "wafer-pass" | "wafer-serverless" | "zenmux" | "lm-studio";
 export type Provider = KnownProvider | string;
 import type { Effort } from "./model-thinking";
 /** Token budgets for each thinking level (token-based providers only) */
@@ -294,6 +294,16 @@ export interface SimpleStreamOptions extends StreamOptions {
     syntheticApiFormat?: "openai" | "anthropic";
     /** Hint that websocket transport should be preferred when supported by the provider implementation. */
     preferWebsockets?: boolean;
+    /**
+     * OpenRouter routing-variant suffix automatically appended to model IDs when
+     * the request targets OpenRouter (`model.provider === "openrouter"`). Common
+     * values: `"nitro"` (throughput), `"floor"` (cheapest), `"online"` (web
+     * search plugin), `"exacto"` (cherry-picked high-quality providers, only
+     * defined for some models). Ignored when the resolved model id already
+     * contains a `:<variant>` suffix (e.g. the user typed `:nitro` explicitly
+     * or the catalog entry already names the variant).
+     */
+    openrouterVariant?: string;
 }
 export type StreamFunction<TApi extends Api> = (model: Model<TApi>, context: Context, options: OptionsForApi<TApi>) => AssistantMessageEventStream;
 export interface TextSignatureV1 {

package/dist/types/utils/discovery/index.d.ts CHANGED Viewed

@@ -2,4 +2,3 @@ export * from "./antigravity";
 export * from "./codex";
 export * from "./gemini";
 export * from "./openai-compatible";
-export * from "./vertex";

package/dist/types/utils/oauth/__tests__/xai-oauth.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/types/utils/oauth/openrouter.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare const loginOpenRouter: (options: import("./types").OAuthController) => Promise<string>;

package/dist/types/utils/oauth/types.d.ts CHANGED Viewed

@@ -7,7 +7,7 @@ export type OAuthCredentials = {
     email?: string;
     accountId?: string;
 };
-export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
+export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "openrouter" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "wafer-pass" | "wafer-serverless" | "vllm" | "xai-oauth" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
 export type OAuthProviderId = OAuthProvider | (string & {});
 export type OAuthPrompt = {
     message: string;

package/dist/types/utils/oauth/wafer.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export declare const loginWaferPass: (options: import("./types").OAuthController) => Promise<string>;
2	+ export declare const loginWaferServerless: (options: import("./types").OAuthController) => Promise<string>;

package/dist/types/utils/oauth/xai-oauth.d.ts ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * xAI Grok (SuperGrok Subscription) OAuth flow.
+ *
+ * Loopback PKCE flow on `127.0.0.1:56121/callback`. One token unlocks Grok-4.x
+ * chat, Grok Imagine image generation, and Grok Voice TTS via subsequent
+ * commits. Endpoint discovery is hardened against MITM via
+ * {@link validateXAIEndpoint}: any non-HTTPS or non-`x.ai`/`*.x.ai` host is
+ * rejected on every call site, not just the first.
+ */
+import { OAuthCallbackFlow } from "./callback-server";
+import type { OAuthController, OAuthCredentials } from "./types";
+/**
+ * Validate an xAI OIDC discovery endpoint against scheme + host.
+ *
+ * Hermes `_xai_validate_oauth_endpoint` L2997-3035. The discovery response is
+ * long-lived and cached in {@link OAuthCredentials}; a single MITM during
+ * initial login could substitute a malicious `token_endpoint` that would then
+ * receive every future refresh_token. Rejecting non-HTTPS or non-`x.ai` /
+ * `*.x.ai` hosts pins the cached endpoint to the xAI auth origin.
+ *
+ * @throws Error with message `Invalid xAI <field>: <url>` when the URL fails
+ *         either scheme or host validation.
+ */
+export declare function validateXAIEndpoint(url: string, field: string): string;
+/**
+ * Check whether a JWT access token is at or past its `exp` claim (with an
+ * optional refresh-skew margin).
+ *
+ * Hermes `_xai_access_token_is_expiring` L2979-2994. Returns `false` for any
+ * malformed input — this is a refresh-trigger check, not a validation, so
+ * non-JWTs ("no token in cache") must NOT trigger a spurious refresh.
+ */
+export declare function isXAIAccessTokenExpiring(jwt: string, skewSeconds?: number): boolean;
+/**
+ * xAI Grok OAuth loopback flow (Hermes `_xai_oauth_loopback_login` L5315-5469).
+ *
+ * Uses a fixed redirect URI so the callback server fails fast instead of
+ * falling back to a random port that xAI's redirect_uri allowlist rejects.
+ */
+export declare class XAIOAuthFlow extends OAuthCallbackFlow {
+    #private;
+    constructor(ctrl: OAuthController);
+    generateAuthUrl(state: string, redirectUri: string): Promise<{
+        url: string;
+        instructions?: string;
+    }>;
+    exchangeToken(code: string, _state: string, redirectUri: string): Promise<OAuthCredentials>;
+}
+/**
+ * Login with xAI Grok OAuth (SuperGrok Subscription).
+ */
+export declare function loginXAIOAuth(ctrl: OAuthController): Promise<OAuthCredentials>;
+/**
+ * Refresh an xAI OAuth access token using a stored refresh_token.
+ *
+ * Hermes `refresh_xai_oauth_pure` L3087-3160. Re-runs OIDC discovery and
+ * re-validates the cached `token_endpoint` on the refresh hot path so a
+ * cached-but-poisoned endpoint cannot silently leak a refresh_token.
+ */
+export declare function refreshXAIOAuthToken(refreshToken: string): Promise<OAuthCredentials>;