@oh-my-pi/pi-ai 15.5.6 → 15.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +51 -0
  2. package/README.md +2 -0
  3. package/dist/types/auth-gateway/server.d.ts +19 -0
  4. package/dist/types/auth-storage.d.ts +93 -0
  5. package/dist/types/model-manager.d.ts +2 -0
  6. package/dist/types/model-thinking.d.ts +15 -0
  7. package/dist/types/provider-models/descriptors.d.ts +2 -0
  8. package/dist/types/provider-models/google.d.ts +1 -1
  9. package/dist/types/provider-models/openai-compat.d.ts +54 -0
  10. package/dist/types/providers/openai-chat-server-schema.d.ts +3 -0
  11. package/dist/types/providers/openai-completions.d.ts +19 -0
  12. package/dist/types/providers/openai-responses-shared.d.ts +10 -1
  13. package/dist/types/providers/openai-responses.d.ts +34 -0
  14. package/dist/types/providers/xai-responses.d.ts +23 -0
  15. package/dist/types/types.d.ts +11 -1
  16. package/dist/types/utils/discovery/index.d.ts +0 -1
  17. package/dist/types/utils/oauth/__tests__/xai-oauth.test.d.ts +1 -0
  18. package/dist/types/utils/oauth/openrouter.d.ts +1 -0
  19. package/dist/types/utils/oauth/types.d.ts +1 -1
  20. package/dist/types/utils/oauth/wafer.d.ts +2 -0
  21. package/dist/types/utils/oauth/xai-oauth.d.ts +60 -0
  22. package/dist/types/utils/stream-markup-healing.d.ts +83 -0
  23. package/package.json +2 -2
  24. package/src/auth-gateway/server.ts +155 -57
  25. package/src/auth-storage.ts +218 -32
  26. package/src/model-manager.ts +31 -14
  27. package/src/model-thinking.ts +32 -0
  28. package/src/models.json +1332 -224
  29. package/src/provider-models/descriptors.ts +29 -3
  30. package/src/provider-models/google.ts +2 -38
  31. package/src/provider-models/openai-compat.ts +476 -41
  32. package/src/providers/anthropic.ts +104 -1
  33. package/src/providers/ollama.ts +136 -43
  34. package/src/providers/openai-chat-server-schema.ts +9 -0
  35. package/src/providers/openai-chat-server.ts +21 -2
  36. package/src/providers/openai-completions.ts +91 -120
  37. package/src/providers/openai-responses-shared.ts +38 -9
  38. package/src/providers/openai-responses.ts +60 -4
  39. package/src/providers/pi-native-server.ts +1 -0
  40. package/src/providers/xai-responses.ts +82 -0
  41. package/src/stream.ts +132 -10
  42. package/src/types.ts +13 -0
  43. package/src/utils/discovery/index.ts +0 -1
  44. package/src/utils/oauth/__tests__/xai-oauth.test.ts +107 -0
  45. package/src/utils/oauth/index.ts +28 -0
  46. package/src/utils/oauth/openrouter.ts +20 -0
  47. package/src/utils/oauth/synthetic.ts +2 -3
  48. package/src/utils/oauth/types.ts +4 -0
  49. package/src/utils/oauth/wafer.ts +50 -0
  50. package/src/utils/oauth/xai-oauth.ts +342 -0
  51. package/src/utils/stream-markup-healing.ts +759 -0
  52. package/dist/types/utils/discovery/vertex.d.ts +0 -25
  53. package/dist/types/utils/tool-call-healing.d.ts +0 -71
  54. package/src/utils/discovery/vertex.ts +0 -210
  55. package/src/utils/tool-call-healing.ts +0 -271
package/CHANGELOG.md CHANGED
@@ -2,6 +2,53 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.5.8] - 2026-05-28
6
+
7
+ ### Added
8
+
9
+ - Added `CheckCredentialsOptions.completionProbe` (and `completionTimeoutMs`) so `AuthStorage.checkCredentials` can additionally exercise each credential against the provider's chat-completion endpoint after refresh-on-expiry. Result lands on `CredentialHealthResult.completion` ({ok, reason?, modelId?, latencyMs?}) without disturbing the usage `ok` field. Public types: `CompletionProbe`, `CompletionProbeInput`, `CompletionProbeCredential`, `CredentialCompletionResult`. The probe is invoked even when no `UsageProvider` is registered for the row, and is skipped when OAuth refresh fails (the stale bytes would only mask the upstream failure).
10
+ - Added Wafer Pass and Wafer Serverless providers (`wafer-pass`, `wafer-serverless`). OpenAI-compatible (`https://pass.wafer.ai/v1`), bearer auth, `wfr_…` keys. `/login wafer-pass` and `/login wafer-serverless` paste-and-validate the key against `/v1/models`. `WAFER_PASS_API_KEY` and `WAFER_SERVERLESS_API_KEY` environment variables wired into `getEnvApiKey`. Bundled catalog seeds `wafer-pass/{GLM-5.1, Qwen3.5-397B-A17B}` and `wafer-serverless/{GLM-5.1, Kimi-K2.6, Qwen3.5-397B-A17B, Qwen3.6-35B-A3B, qwen3.7-max, deepseek-v4-flash, deepseek-v4-pro}`; dynamic discovery via `/v1/models` overlays additional models at runtime. Pass-tier discovery filters `wafer.tier === "pass_included"`. Pass-SKU costs are seeded at `0` (flat-rate subscription, no per-token charge — matches `kimi-code`/`firepass`/`alibaba-coding-plan`). Serverless costs are the wafer.ai retail rate, derived from the `*_cents_per_million` envelope via `value × 125 / 10000` (e.g. GLM-5.1 `120` → $1.50/M, Kimi-K2.6 `88` → $1.10/M). Reasoning entries get a thinking compat picked from the `wafer.provider` envelope: `zai`/`moonshotai` → zai-style `thinking: { type }`, `qwen` → top-level `enable_thinking`, `deepseek` and unknown upstreams stay unset so `detectOpenAICompat` can pick `reasoning_effort` from the id pattern at request time.
11
+
12
+ ### Changed
13
+
14
+ - Changed auth-gateway credential resolution to use per-conversation `promptCacheKey`/`sessionId` when calling `AuthStorage.getApiKey`, so repeated turns can keep the same credential until it becomes unavailable
15
+ - Changed auth-gateway and pi-native request handling to align `sessionId` with prompt/context identity before credential lookup
16
+ - Changed Anthropic prompt preparation to downscale image blocks over 2000px when a request includes 20+ images, reducing oversized payloads automatically
17
+ - Changed OpenAI chat request parsing to accept `name` on `tool` messages and fall back to the matching assistant `tool_calls` name, so parsed tool results now carry a proper tool name when the wire omits it
18
+ - Changed `checkCredentials` to skip running `completionProbe` when OAuth refresh fails, so stale bearer tokens are never probed and the refresh failure remains the returned `reason`
19
+ - Changed completion reporting to return `completion: { ok: null, reason: ... }` when a credential has no usable bearer bytes instead of attempting the probe
20
+ - Refactored `AuthStorage.checkCredentials` so OAuth refresh-on-expiry runs up-front and the refreshed credential is shared between the usage probe and the new completion probe; rows without a registered `UsageProvider` no longer short-circuit before the completion probe runs.
21
+
22
+ ### Fixed
23
+
24
+ - Fixed DeepSeek DSML tool-call envelope leaks on Ollama Cloud and OpenAI-compatible streams by healing leaked envelopes into structured tool calls without displaying raw DSML markers. ([#1462](https://github.com/can1357/oh-my-pi/issues/1462))
25
+ - Fixed auth-gateway to classify usage-limit messages such as `usage_limit_reached`, `resource_exhausted`, and Codex-style `Try again in ~X min` text as 429 `rate_limit_error` responses
26
+ - Fixed auth-gateway usage-limit handling to honor parsed retry hints and switch to a sibling credential via `markUsageLimitReached` instead of invalidating the rate-limited credential
27
+ - Fixed `streamSimple` to retry on usage-limit errors (including message-only error events) before any content is emitted, so `onAuthError` can rotate credentials automatically
28
+ - Fixed auth-gateway error classification to extract embedded status codes and use word-boundary matching, so `GenerateContentRequest` and similar messages are no longer misreported as rate-limit errors
29
+ - Fixed `checkCredentials` to handle `completionProbe` exceptions by recording the failure in `CredentialHealthResult.completion.reason` while still returning the usage probe result
30
+
31
+ ### Fixed
32
+
33
+ - Fixed Google Vertex's bundled model list to use the authoritative models.dev catalog, including MaaS entries such as `deepseek-ai/deepseek-v3.2-maas` and removing retired Gemini 1.5 fallbacks. ([#1456](https://github.com/can1357/oh-my-pi/issues/1456))
34
+
35
+ ## [15.5.7] - 2026-05-27
36
+ ### Added
37
+ - `SimpleStreamOptions.openrouterVariant` (`"nitro"`, `"floor"`, `"online"`, `"exacto"`, …) — when set, appends `:<variant>` to OpenRouter model IDs at request time, leaving ids that already carry an explicit `:suffix` untouched. Plumbed through `openai-completions` and the pi-native gateway forwarder.
38
+
39
+ - xAI Grok OAuth (SuperGrok Subscription) provider in `/login`. Loopback PKCE flow on `127.0.0.1:56121`; the token unlocks Grok-4.x chat. Ported from NousResearch/hermes-agent (MIT).
40
+ - OpenRouter provider in `/login`. API-key paste flow validated against `https://openrouter.ai/api/v1/auth/key` (the `/models` endpoint is public and cannot validate auth). The pasted key is stored under the existing `openrouter` provider id used by `OPENROUTER_API_KEY`.
41
+ - `XAI_OAUTH_TOKEN` environment variable accepted as a headless fallback for the xAI Grok OAuth provider.
42
+
43
+ ### Changed
44
+
45
+ - `OpenAIResponsesOptions` gains four optional, provider-agnostic fields that adapter wrappers can use to compose provider-specific behavior on top of the generic transport: `includeEncryptedReasoning` (gates `include: ["reasoning.encrypted_content"]`; default `true`, preserves current behavior), `filterReasoningHistory` (strips replayed `type: "reasoning"` items from conversation history; default `false`), `headers` (merged onto the client's default headers), and `extraBody` (merged into the request payload).
46
+ - The existing `XAI_API_KEY` path is unchanged — it continues to use the OpenAI-completions transport.
47
+
48
+ ### Fixed
49
+
50
+ - Fixed OpenRouter DeepSeek V4 tool-call follow-up requests replaying normalized `reasoning` as-is instead of DeepSeek's required `reasoning_content`, which caused HTTP 400 errors in thinking mode. ([#1445](https://github.com/can1357/oh-my-pi/issues/1445))
51
+
5
52
  ## [15.5.6] - 2026-05-27
6
53
  ### Added
7
54
 
@@ -34,6 +81,10 @@
34
81
  - Fixed Anthropic streaming to suppress hallucinated meta-prompt thinking blocks (the recent "I don't see any current rewritten thinking..." regression). When the marker phrase `rewritten thinking` appears in a streamed thinking summary the block is collapsed to a plain `Thinking...` placeholder and its signature is dropped so subsequent turns can't re-anchor on the garbled chain.
35
82
  - Fixed Codex WebSocket silent stalls by adding protocol pings, inbound queue bounding, clearer idle-timeout diagnostics, and SDK retry clamping for first-event timeouts.
36
83
 
84
+ ### Fixed
85
+
86
+ - Fixed Synthetic model discovery to treat the provider `/models` response as authoritative so deprecated bundled IDs are pruned from the runtime cache, and changed Synthetic login validation to avoid probing a specific model ([#1417](https://github.com/can1357/oh-my-pi/issues/1417)).
87
+
37
88
  ## [15.5.0] - 2026-05-26
38
89
  ### Added
39
90
 
package/README.md CHANGED
@@ -62,6 +62,8 @@ Unified LLM API with automatic model discovery, provider configuration, token an
62
62
  - **Hugging Face Inference**
63
63
  - **xAI**
64
64
  - **Venice** (requires `VENICE_API_KEY`)
65
+ - **Wafer Pass** (requires `WAFER_PASS_API_KEY`; flat-rate subscription, includes GLM-5.1 and Qwen3.5-397B-A17B)
66
+ - **Wafer Serverless** (requires `WAFER_SERVERLESS_API_KEY`; pay-as-you-go)
65
67
  - **OpenRouter**
66
68
  - **Kilo Gateway** (supports OAuth `/login kilo` or `KILO_API_KEY`)
67
69
  - **LiteLLM** (requires `LITELLM_API_KEY`)
@@ -14,4 +14,23 @@ export interface AuthGatewayBootOptions extends AuthGatewayServerOptions {
14
14
  /** Optional supplier for `/v1/models` listing. Returns the full model array. */
15
15
  listModels?: () => Iterable<Model<Api>>;
16
16
  }
17
+ /**
18
+ * Classify an upstream / gateway-internal error into a status code and a
19
+ * format-neutral type. The order is intentional:
20
+ *
21
+ * 1. Honour an explicit numeric `status` property on the thrown error.
22
+ * 2. Parse a status code embedded in the message string. Provider errors
23
+ * virtually always carry one (`Google API error (400): …`, `HTTP 429`,
24
+ * `status=503`) and the embedded value is authoritative.
25
+ * 3. Fall through to **word-boundaried** substring heuristics. The old
26
+ * `lower.includes("rate")` test famously matched
27
+ * `GenerateContentRequest`, surfacing every Google 400 as a 429
28
+ * `rate_limit_error`. The patterns here all require boundaries so they
29
+ * don't collide with provider field names.
30
+ */
31
+ export declare function classifyGatewayError(err: unknown): {
32
+ status: number;
33
+ type: string;
34
+ message: string;
35
+ };
17
36
  export declare function startAuthGateway(opts: AuthGatewayBootOptions): AuthGatewayServerHandle;
@@ -75,13 +75,88 @@ export interface CredentialHealthResult {
75
75
  reason?: string;
76
76
  /** Probe usage report (raw payload stripped) when `ok === true`. */
77
77
  report?: Omit<UsageReport, "raw">;
78
+ /**
79
+ * Result of the optional end-to-end completion probe (see
80
+ * {@link CheckCredentialsOptions.completionProbe}). Absent when no probe was
81
+ * supplied. The completion probe exercises the provider's chat-completion
82
+ * endpoint with the credential's bearer bytes, which is a stricter signal
83
+ * than the usage endpoint (some providers happily 200 a `/usage` call while
84
+ * the chat endpoint 401s the same bearer).
85
+ */
86
+ completion?: CredentialCompletionResult;
78
87
  }
88
+ /**
89
+ * Outcome of the end-to-end completion probe. `null` means the probe was
90
+ * skipped (no bearer bytes were available — e.g. OAuth refresh failed
91
+ * upstream of the probe).
92
+ */
93
+ export interface CredentialCompletionResult {
94
+ ok: boolean | null;
95
+ /** Failure / unverifiable reason; absent when `ok === true`. */
96
+ reason?: string;
97
+ /** Probe model id used (carried back from the caller for display). */
98
+ modelId?: string;
99
+ /** Round-trip latency in milliseconds. */
100
+ latencyMs?: number;
101
+ }
102
+ /**
103
+ * Credential payload handed to {@link CompletionProbe}. For API-key
104
+ * credentials only the bytes are exposed; for OAuth, every identity field
105
+ * carried by the refreshed credential is included so the probe can compose
106
+ * provider-specific apiKey shapes (e.g. GitHub Copilot / Google Gemini CLI
107
+ * expect a JSON blob with `token` + `projectId`, not the raw access token).
108
+ *
109
+ * `refreshToken` may be {@link REMOTE_REFRESH_SENTINEL} when the credential
110
+ * lives behind a broker; the chat endpoint never reads it, so the probe can
111
+ * forward it verbatim into the structured shape without harm.
112
+ */
113
+ export type CompletionProbeCredential = {
114
+ type: "api_key";
115
+ apiKey: string;
116
+ } | {
117
+ type: "oauth";
118
+ accessToken: string;
119
+ refreshToken?: string;
120
+ expiresAt?: number;
121
+ accountId?: string;
122
+ projectId?: string;
123
+ email?: string;
124
+ enterpriseUrl?: string;
125
+ };
126
+ /**
127
+ * Caller-supplied bearer probe. Receives the post-refresh credential for a
128
+ * single row and reports whether a real chat-completion round-trip succeeds.
129
+ * The check-credentials pipeline calls this AFTER any OAuth refresh so the
130
+ * bytes match what a live request would send.
131
+ */
132
+ export interface CompletionProbeInput {
133
+ provider: Provider;
134
+ credentialId: number;
135
+ credential: CompletionProbeCredential;
136
+ signal: AbortSignal;
137
+ }
138
+ export type CompletionProbe = (input: CompletionProbeInput) => Promise<CredentialCompletionResult>;
79
139
  export interface CheckCredentialsOptions {
80
140
  signal?: AbortSignal;
81
141
  /** Per-credential probe timeout (ms). Defaults to the configured usage request timeout. */
82
142
  timeoutMs?: number;
83
143
  /** Provider → base URL override, same shape as {@link AuthStorage.fetchUsageReports}. */
84
144
  baseUrlResolver?: (provider: Provider) => string | undefined;
145
+ /**
146
+ * Optional end-to-end probe. When provided, `checkCredentials` invokes it
147
+ * for every credential where a usable bearer is available (API key, or
148
+ * OAuth access token after refresh-on-expiry succeeded). The result lands
149
+ * on {@link CredentialHealthResult.completion}.
150
+ *
151
+ * The probe runs INDEPENDENTLY of whether a {@link UsageProvider} is
152
+ * configured: providers without a usage endpoint still benefit from the
153
+ * extra signal. The probe is NOT invoked when OAuth refresh fails — the
154
+ * bytes would be stale anyway and the upstream failure is already captured
155
+ * on `reason`.
156
+ */
157
+ completionProbe?: CompletionProbe;
158
+ /** Per-credential completion probe timeout (ms). Defaults to `timeoutMs`. */
159
+ completionTimeoutMs?: number;
85
160
  }
86
161
  /**
87
162
  * Sentinel value placed in OAuth `refresh` fields when a credential is shared
@@ -416,6 +491,18 @@ export declare class AuthStorage {
416
491
  * Unlike getApiKey(), this doesn't refresh OAuth tokens.
417
492
  */
418
493
  hasAuth(provider: string): boolean;
494
+ /**
495
+ * True iff a dedicated, non-env credential source is configured for this
496
+ * provider — i.e. anything in the cascade EXCEPT `getEnvApiKey(provider)`.
497
+ *
498
+ * Mirrors `hasAuth` minus the env-fallback leg. Useful for callers that
499
+ * need to distinguish "the user explicitly configured this provider"
500
+ * from "an env var happens to alias this provider via the cross-provider
501
+ * fallback map" (see e.g. `xai-oauth → XAI_OAUTH_TOKEN || XAI_API_KEY` in
502
+ * `stream.ts`). Without that distinction, an `XAI_API_KEY`-only setup
503
+ * silently satisfies xai-oauth and routes around `providers.xai.baseUrl`.
504
+ */
505
+ hasNonEnvCredential(provider: string): boolean;
419
506
  /**
420
507
  * Check if OAuth credentials are configured for a provider.
421
508
  */
@@ -479,6 +566,12 @@ export declare class AuthStorage {
479
566
  * soft-disabled rows are already known-bad and don't need a network probe.
480
567
  * Environment-variable API keys are not enumerated — the caller's intent
481
568
  * here is "which of my stored credentials is broken".
569
+ *
570
+ * Pass {@link CheckCredentialsOptions.completionProbe} to additionally
571
+ * exercise each credential against the provider's chat-completion endpoint
572
+ * (strict mode). The result lands on
573
+ * {@link CredentialHealthResult.completion}; the usage `ok` field is
574
+ * unchanged so callers can tell the two signals apart.
482
575
  */
483
576
  checkCredentials(options?: CheckCredentialsOptions): Promise<CredentialHealthResult[]>;
484
577
  /**
@@ -24,6 +24,8 @@ export interface ModelManagerOptions<TApi extends Api = Api, TModelsDevPayload =
24
24
  cacheDbPath?: string;
25
25
  /** Maximum cache age in milliseconds before considered stale. Default: 24h. */
26
26
  cacheTtlMs?: number;
27
+ /** When true, a successful dynamic fetch is the complete provider catalog and prunes static-only models. */
28
+ dynamicModelsAuthoritative?: boolean;
27
29
  /** Optional dynamic endpoint fetcher. */
28
30
  fetchDynamicModels?: () => Promise<readonly Model<TApi>[] | null>;
29
31
  /** Optional models.dev fallback hook. */
@@ -42,6 +42,21 @@ export declare function applyGeneratedModelPolicies(models: ApiModel<Api>[]): vo
42
42
  * - `gpt-5.5` (270K input) promotes to `gpt-5.4` (1M input).
43
43
  */
44
44
  export declare function linkOpenAIPromotionTargets(models: ApiModel<Api>[]): void;
45
+ /**
46
+ * True when the model reasons natively but rejects the wire `reasoning.effort`
47
+ * param (compat.supportsReasoningEffort: false on openai-responses*). Callers
48
+ * are expected to omit the effort field; the wire-side omitReasoningEffort
49
+ * gate (providers/xai-responses.ts:78) is the actual strip, and this
50
+ * predicate is the upstream check that prevents a redundant
51
+ * requireSupportedEffort throw from defeating that gate.
52
+ *
53
+ * Scoped to openai-responses* because that's the only API surface where
54
+ * `compat.supportsReasoningEffort: false` is meaningful today. The
55
+ * `in`-narrowed access is necessary because Model.compat is
56
+ * `AnthropicCompat | OpenAICompat` and the api gate doesn't narrow the
57
+ * union for TS.
58
+ */
59
+ export declare function modelOmitsReasoningEffort<TApi extends Api>(model: ApiModel<TApi>): boolean;
45
60
  /**
46
61
  * Returns the supported thinking efforts declared on the model metadata.
47
62
  *
@@ -28,6 +28,8 @@ export interface ProviderDescriptor {
28
28
  defaultModel: string;
29
29
  /** When true, the runtime creates a model manager even without a valid API key (e.g. ollama). */
30
30
  allowUnauthenticated?: boolean;
31
+ /** When true, successful runtime discovery replaces bundled provider models instead of merging fallback-only IDs. */
32
+ dynamicModelsAuthoritative?: boolean;
31
33
  /** Catalog discovery configuration. Only providers with this field participate in generate-models.ts. */
32
34
  catalogDiscovery?: CatalogDiscoveryConfig;
33
35
  }
@@ -19,6 +19,6 @@ export interface GoogleGeminiCliModelManagerConfig {
19
19
  endpoint?: string;
20
20
  }
21
21
  export declare function googleModelManagerOptions(config?: GoogleModelManagerConfig): ModelManagerOptions<"google-generative-ai">;
22
- export declare function googleVertexModelManagerOptions(config?: GoogleVertexModelManagerConfig): ModelManagerOptions;
22
+ export declare function googleVertexModelManagerOptions(_config?: GoogleVertexModelManagerConfig): ModelManagerOptions;
23
23
  export declare function googleAntigravityModelManagerOptions(config?: GoogleAntigravityModelManagerConfig): ModelManagerOptions<"google-gemini-cli">;
24
24
  export declare function googleGeminiCliModelManagerOptions(config?: GoogleGeminiCliModelManagerConfig): ModelManagerOptions<"google-gemini-cli">;
@@ -53,6 +53,53 @@ export interface XaiModelManagerConfig {
53
53
  baseUrl?: string;
54
54
  }
55
55
  export declare function xaiModelManagerOptions(config?: XaiModelManagerConfig): ModelManagerOptions<"openai-completions">;
56
+ export interface XaiOAuthModelManagerConfig {
57
+ apiKey?: string;
58
+ baseUrl?: string;
59
+ }
60
+ interface XAICuratedModel {
61
+ id: string;
62
+ contextWindow: number;
63
+ name?: string;
64
+ /** Whether the model reasons natively. Defaults to true for Grok-4.x family. */
65
+ reasoning?: boolean;
66
+ /**
67
+ * Whether xAI accepts the `reasoning.effort` wire param for this model.
68
+ * Default true. When false: picker hides the effort dial (via
69
+ * getSupportedEfforts in model-thinking.ts) AND wire-side already omits
70
+ * the param via GROK_EFFORT_CAPABLE_PREFIXES in providers/xai-responses.ts.
71
+ * Must agree with that allowlist; two truths kept in sync by curated-catalog
72
+ * author convention until a follow-up Op: compress unifies them.
73
+ */
74
+ supportsReasoningEffort?: boolean;
75
+ /**
76
+ * Input modalities this model accepts. Defaults to `["text"]` when absent.
77
+ * Vision-capable Grok models MUST list `"image"` here so the curated layer
78
+ * overrides `fetchOpenAICompatibleModels`' default of `["text"]` (which
79
+ * otherwise strips image capability on every online refresh).
80
+ */
81
+ input?: ("text" | "image")[];
82
+ }
83
+ export declare const XAI_OAUTH_CURATED_MODELS: readonly XAICuratedModel[];
84
+ /**
85
+ * Render `XAI_OAUTH_CURATED_MODELS` as full `Model<"openai-responses">` entries.
86
+ *
87
+ * Single source of truth for the curated to Model fan-in, consumed by both
88
+ * - {@link xaiOAuthModelManagerOptions} (runtime static seed handed to the model
89
+ * manager so the picker is populated on a fresh login), and
90
+ * - `packages/ai/scripts/generate-models.ts` (bundles the same entries into
91
+ * `models.json`, so the synchronous `ModelRegistry.#loadModels()` boot path
92
+ * sees `xai-oauth` without waiting for a refresh — fixes the boot-time
93
+ * default-model reset when `modelRoles.default = "xai-oauth/<id>"`).
94
+ *
95
+ * `reasoning` defaults to `true` for the Grok-4.x family; the explicit
96
+ * `grok-4.20-0309-non-reasoning` entry opts out via `XAICuratedModel.reasoning`.
97
+ * `maxTokens` uses `UNK_MAX_TOKENS` so id-keyed overlays from a successful
98
+ * dynamic fetch merge cleanly. Mirrors
99
+ * `hermes-agent/hermes_cli/models.py:_XAI_STATIC_FALLBACK`.
100
+ */
101
+ export declare function buildXaiOAuthStaticSeed(baseUrl?: string): Model<"openai-responses">[];
102
+ export declare function xaiOAuthModelManagerOptions(config?: XaiOAuthModelManagerConfig): ModelManagerOptions<"openai-responses">;
56
103
  export interface DeepSeekModelManagerConfig {
57
104
  apiKey?: string;
58
105
  baseUrl?: string;
@@ -80,6 +127,12 @@ export interface FirepassModelManagerConfig {
80
127
  * See https://docs.fireworks.ai/firepass.
81
128
  */
82
129
  export declare function firepassModelManagerOptions(_config?: FirepassModelManagerConfig): ModelManagerOptions<"openai-completions">;
130
+ export interface WaferModelManagerConfig {
131
+ apiKey?: string;
132
+ baseUrl?: string;
133
+ }
134
+ export declare function waferPassModelManagerOptions(config?: WaferModelManagerConfig): ModelManagerOptions<"openai-completions">;
135
+ export declare function waferServerlessModelManagerOptions(config?: WaferModelManagerConfig): ModelManagerOptions<"openai-completions">;
83
136
  export interface MistralModelManagerConfig {
84
137
  apiKey?: string;
85
138
  baseUrl?: string;
@@ -240,3 +293,4 @@ export interface ModelsDevProviderDescriptor {
240
293
  export declare function mapModelsDevToModels(data: Record<string, unknown>, descriptors: readonly ModelsDevProviderDescriptor[]): Model<Api>[];
241
294
  /** All provider descriptors for models.dev data mapping in generate-models.ts. */
242
295
  export declare const MODELS_DEV_PROVIDER_DESCRIPTORS: readonly ModelsDevProviderDescriptor[];
296
+ export {};
@@ -322,6 +322,7 @@ export declare const toolMessageSchema: z.ZodObject<{
322
322
  type: z.ZodString;
323
323
  }, z.core.$loose>]>>]>>;
324
324
  tool_call_id: z.ZodOptional<z.ZodString>;
325
+ name: z.ZodPipe<z.ZodOptional<z.ZodString>, z.ZodTransform<string | undefined, string | undefined>>;
325
326
  }, z.core.$strip>;
326
327
  /**
327
328
  * Legacy `function` role (pre-tools API). Translated to a `tool` role
@@ -526,6 +527,7 @@ export declare const messageSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
526
527
  type: z.ZodString;
527
528
  }, z.core.$loose>]>>]>>;
528
529
  tool_call_id: z.ZodOptional<z.ZodString>;
530
+ name: z.ZodPipe<z.ZodOptional<z.ZodString>, z.ZodTransform<string | undefined, string | undefined>>;
529
531
  }, z.core.$strip>, z.ZodObject<{
530
532
  role: z.ZodLiteral<"function">;
531
533
  name: z.ZodString;
@@ -736,6 +738,7 @@ export declare const openaiChatRequestSchema: z.ZodObject<{
736
738
  type: z.ZodString;
737
739
  }, z.core.$loose>]>>]>>;
738
740
  tool_call_id: z.ZodOptional<z.ZodString>;
741
+ name: z.ZodPipe<z.ZodOptional<z.ZodString>, z.ZodTransform<string | undefined, string | undefined>>;
739
742
  }, z.core.$strip>, z.ZodObject<{
740
743
  role: z.ZodLiteral<"function">;
741
744
  name: z.ZodString;
@@ -21,7 +21,26 @@ export interface OpenAICompletionsOptions extends StreamOptions {
21
21
  /** Force-disable reasoning where supported, or request the lowest effort on generic effort endpoints. */
22
22
  disableReasoning?: boolean;
23
23
  serviceTier?: ServiceTier;
24
+ /**
25
+ * Routing-variant suffix appended to OpenRouter model IDs when none is
26
+ * already present (`anthropic/claude-haiku-latest` → `…:nitro`). Common
27
+ * values: `"nitro"`, `"floor"`, `"online"`, `"exacto"`. Ignored when the
28
+ * resolved `model.id` already contains a colon-suffix after the last
29
+ * provider segment (explicit `:nitro` in the selector or a catalog entry
30
+ * with the variant baked in).
31
+ */
32
+ openrouterVariant?: string;
24
33
  }
34
+ /**
35
+ * Append an OpenRouter routing-variant suffix (e.g. `:nitro`, `:floor`, `:online`, `:exacto`)
36
+ * to a model id when no explicit variant is already present. A variant is considered
37
+ * "already present" when `modelId` contains a colon after the last `/` separator —
38
+ * which covers both user-typed selectors (`anthropic/claude-haiku:nitro`) and catalog
39
+ * entries that bake the variant in (`deepseek/deepseek-v3.1-terminus:exacto`).
40
+ *
41
+ * Exported for unit testing.
42
+ */
43
+ export declare function applyOpenRouterRoutingVariant(modelId: string, variant: string | undefined): string;
25
44
  export declare const streamOpenAICompletions: StreamFunction<"openai-completions">;
26
45
  export declare function parseChunkUsage(rawUsage: object, model: Model<"openai-completions">, premiumRequests: number | undefined): AssistantMessage["usage"];
27
46
  export declare function convertMessages(model: Model<"openai-completions">, context: Context, compat: ResolvedOpenAICompat): ChatCompletionMessageParam[];
@@ -74,8 +74,17 @@ type ReasoningOptions = {
74
74
  * Apply reasoning-related Responses parameters: enable encrypted reasoning content for replay,
75
75
  * set effort/summary when requested, and otherwise inject the GPT-5 "Juice: 0" no-reasoning hack.
76
76
  * Mutates `params` and may push a developer message into `messages`.
77
+ *
78
+ * @param omitReasoningEffort - When `true`, suppresses `params.reasoning.effort` from the wire
79
+ * body. Set by `xai-responses.ts` via {@link OpenAIResponsesOptions.omitReasoningEffort} for
80
+ * xAI Grok models that return HTTP 400 on any `reasoning.effort` value (e.g. grok-build,
81
+ * grok-4.20-0309-reasoning). When `true` and `options.reasoning` is set but
82
+ * `options.reasoningSummary` is absent, `params.reasoning` is intentionally omitted from the
83
+ * wire body entirely — these models reason natively at their own internal default effort level
84
+ * without needing explicit activation. Callers that pass `options.reasoning` for such models
85
+ * should expect this documented downgrade: the model will reason, but at its default effort.
77
86
  */
78
- export declare function applyResponsesReasoningParams<P extends OpenAI.Responses.ResponseCreateParamsStreaming>(params: P, model: Model<Api>, options: ReasoningOptions | undefined, messages: ResponseInput, mapEffort?: (effort: string) => string): void;
87
+ export declare function applyResponsesReasoningParams<P extends OpenAI.Responses.ResponseCreateParamsStreaming>(params: P, model: Model<Api>, options: ReasoningOptions | undefined, messages: ResponseInput, mapEffort?: (effort: string) => string, includeEncryptedReasoning?: boolean, omitReasoningEffort?: boolean): void;
79
88
  /** Populate `output.usage` from a Responses-API `response.usage` payload. Does not invoke `calculateCost`. */
80
89
  export declare function populateResponsesUsageFromResponse(output: AssistantMessage, usage: {
81
90
  input_tokens?: number | null;
@@ -12,11 +12,45 @@ export interface OpenAIResponsesOptions extends StreamOptions {
12
12
  * Azure OpenAI and GitHub Copilot Responses paths require tool results to match prior tool calls.
13
13
  */
14
14
  strictResponsesPairing?: boolean;
15
+ /**
16
+ * Pass `include: ["reasoning.encrypted_content"]` on requests when the
17
+ * model supports reasoning. Default: true (preserves current behavior).
18
+ * Set to false when the upstream Responses endpoint rejects replayed
19
+ * encrypted reasoning (e.g., xAI Grok under SuperGrok OAuth).
20
+ */
21
+ includeEncryptedReasoning?: boolean;
22
+ /**
23
+ * Strip `type: "reasoning"` items from replayed conversation history
24
+ * before they hit the wire. Default: false (preserves current behavior).
25
+ * Set to true when the upstream rejects replayed reasoning wrappers.
26
+ */
27
+ filterReasoningHistory?: boolean;
28
+ /**
29
+ * Suppress the `reasoning.effort` wire param when set, even if
30
+ * `options.reasoning` is requested. Default: false. xAI Grok models
31
+ * outside the effort-capable allowlist 400 with "Model X does not
32
+ * support parameter reasoningEffort" — the xAI Responses adapter sets
33
+ * this when the target model is not in GROK_EFFORT_CAPABLE_PREFIXES.
34
+ */
35
+ omitReasoningEffort?: boolean;
36
+ /**
37
+ * Extra request headers merged onto the underlying client's
38
+ * defaultHeaders. Used by adapter wrappers to inject provider-specific
39
+ * routing or cache hints.
40
+ */
41
+ headers?: Record<string, string>;
42
+ /**
43
+ * Extra body fields merged into the Responses request payload. Used by
44
+ * adapter wrappers to inject provider-specific body keys (e.g.,
45
+ * prompt_cache_key for prompt-cache routing).
46
+ */
47
+ extraBody?: Record<string, unknown>;
15
48
  }
16
49
  /**
17
50
  * Generate function for OpenAI Responses API
18
51
  */
19
52
  export declare const streamOpenAIResponses: StreamFunction<"openai-responses">;
53
+ export declare function getOpenAIResponsesCacheSessionId(options: Pick<OpenAIResponsesOptions, "cacheRetention" | "sessionId" | "promptCacheKey"> | undefined): string | undefined;
20
54
  export declare function supportsDeveloperRole(modelOrBaseUrl: Pick<Model, "provider" | "baseUrl"> | string): boolean;
21
55
  /**
22
56
  * Whether this model should get the OpenAI custom-tool grammar variant
@@ -0,0 +1,23 @@
1
+ import type { StreamFunction } from "../types";
2
+ /**
3
+ * xAI Grok Responses adapter (SuperGrok OAuth path).
4
+ *
5
+ * Three xAI-specific behaviors vs the generic OpenAI Responses adapter:
6
+ *
7
+ * 1. `x-grok-conv-id` header + body `prompt_cache_key` route prompt-cache
8
+ * hits on xAI's edge. Hermes uses both (agent/transports/codex.py:182-193).
9
+ * The header is undocumented by xAI; `previous_response_id` is the
10
+ * documented alternative — switch if xAI deprecates the header.
11
+ * 2. includeEncryptedReasoning=false — xAI's /v1/responses rejects replayed
12
+ * `encrypted_content` blobs minted under SuperGrok OAuth.
13
+ * 3. filterReasoningHistory=true — strip `type: "reasoning"` items from
14
+ * replayed conversation history; the blob inside is non-replayable under
15
+ * OAuth and the wrapper item 404s without it (store=false; server cannot
16
+ * resolve by id).
17
+ *
18
+ * Everything else is the generic OpenAI Responses transport. The xAI bearer
19
+ * token arrives in `options.apiKey` via AuthStorage.getApiKey() upstream, and
20
+ * the xAI base URL (`https://api.x.ai/v1`) arrives via `model.baseUrl` from
21
+ * the provider registry — not routed through this wrapper.
22
+ */
23
+ export declare const streamXAIResponses: StreamFunction<"openai-responses">;
@@ -48,7 +48,7 @@ export interface ThinkingConfig {
48
48
  /** Provider-specific transport used to encode the selected effort. */
49
49
  mode: ThinkingControlMode;
50
50
  }
51
- export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "zenmux" | "lm-studio";
51
+ export type KnownProvider = "alibaba-coding-plan" | "amazon-bedrock" | "anthropic" | "google" | "google-gemini-cli" | "google-antigravity" | "google-vertex" | "openai" | "openai-codex" | "kimi-code" | "minimax-code" | "minimax-code-cn" | "github-copilot" | "fireworks" | "firepass" | "gitlab-duo" | "cursor" | "deepseek" | "xai" | "xai-oauth" | "groq" | "cerebras" | "openrouter" | "kilo" | "vercel-ai-gateway" | "zai" | "zhipu-coding-plan" | "mistral" | "minimax" | "opencode-go" | "opencode-zen" | "synthetic" | "cloudflare-ai-gateway" | "huggingface" | "litellm" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "qianfan" | "qwen-portal" | "together" | "venice" | "vllm" | "xiaomi" | "wafer-pass" | "wafer-serverless" | "zenmux" | "lm-studio";
52
52
  export type Provider = KnownProvider | string;
53
53
  import type { Effort } from "./model-thinking";
54
54
  /** Token budgets for each thinking level (token-based providers only) */
@@ -294,6 +294,16 @@ export interface SimpleStreamOptions extends StreamOptions {
294
294
  syntheticApiFormat?: "openai" | "anthropic";
295
295
  /** Hint that websocket transport should be preferred when supported by the provider implementation. */
296
296
  preferWebsockets?: boolean;
297
+ /**
298
+ * OpenRouter routing-variant suffix automatically appended to model IDs when
299
+ * the request targets OpenRouter (`model.provider === "openrouter"`). Common
300
+ * values: `"nitro"` (throughput), `"floor"` (cheapest), `"online"` (web
301
+ * search plugin), `"exacto"` (cherry-picked high-quality providers, only
302
+ * defined for some models). Ignored when the resolved model id already
303
+ * contains a `:<variant>` suffix (e.g. the user typed `:nitro` explicitly
304
+ * or the catalog entry already names the variant).
305
+ */
306
+ openrouterVariant?: string;
297
307
  }
298
308
  export type StreamFunction<TApi extends Api> = (model: Model<TApi>, context: Context, options: OptionsForApi<TApi>) => AssistantMessageEventStream;
299
309
  export interface TextSignatureV1 {
@@ -2,4 +2,3 @@ export * from "./antigravity";
2
2
  export * from "./codex";
3
3
  export * from "./gemini";
4
4
  export * from "./openai-compatible";
5
- export * from "./vertex";
@@ -0,0 +1 @@
1
+ export declare const loginOpenRouter: (options: import("./types").OAuthController) => Promise<string>;
@@ -7,7 +7,7 @@ export type OAuthCredentials = {
7
7
  email?: string;
8
8
  accountId?: string;
9
9
  };
10
- export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "vllm" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
10
+ export type OAuthProvider = "alibaba-coding-plan" | "anthropic" | "cerebras" | "cloudflare-ai-gateway" | "cursor" | "deepseek" | "fireworks" | "firepass" | "github-copilot" | "google-gemini-cli" | "google-antigravity" | "gitlab-duo" | "huggingface" | "kimi-code" | "kilo" | "kagi" | "litellm" | "lm-studio" | "minimax-code" | "minimax-code-cn" | "moonshot" | "nvidia" | "nanogpt" | "ollama" | "ollama-cloud" | "openai-codex" | "openai-codex-device" | "opencode-go" | "openrouter" | "opencode-zen" | "parallel" | "perplexity" | "qianfan" | "qwen-portal" | "synthetic" | "tavily" | "together" | "venice" | "vercel-ai-gateway" | "wafer-pass" | "wafer-serverless" | "vllm" | "xai-oauth" | "xiaomi" | "zenmux" | "zai" | "zhipu-coding-plan";
11
11
  export type OAuthProviderId = OAuthProvider | (string & {});
12
12
  export type OAuthPrompt = {
13
13
  message: string;
@@ -0,0 +1,2 @@
1
+ export declare const loginWaferPass: (options: import("./types").OAuthController) => Promise<string>;
2
+ export declare const loginWaferServerless: (options: import("./types").OAuthController) => Promise<string>;
@@ -0,0 +1,60 @@
1
+ /**
2
+ * xAI Grok (SuperGrok Subscription) OAuth flow.
3
+ *
4
+ * Loopback PKCE flow on `127.0.0.1:56121/callback`. One token unlocks Grok-4.x
5
+ * chat, Grok Imagine image generation, and Grok Voice TTS via subsequent
6
+ * commits. Endpoint discovery is hardened against MITM via
7
+ * {@link validateXAIEndpoint}: any non-HTTPS or non-`x.ai`/`*.x.ai` host is
8
+ * rejected on every call site, not just the first.
9
+ */
10
+ import { OAuthCallbackFlow } from "./callback-server";
11
+ import type { OAuthController, OAuthCredentials } from "./types";
12
+ /**
13
+ * Validate an xAI OIDC discovery endpoint against scheme + host.
14
+ *
15
+ * Hermes `_xai_validate_oauth_endpoint` L2997-3035. The discovery response is
16
+ * long-lived and cached in {@link OAuthCredentials}; a single MITM during
17
+ * initial login could substitute a malicious `token_endpoint` that would then
18
+ * receive every future refresh_token. Rejecting non-HTTPS or non-`x.ai` /
19
+ * `*.x.ai` hosts pins the cached endpoint to the xAI auth origin.
20
+ *
21
+ * @throws Error with message `Invalid xAI <field>: <url>` when the URL fails
22
+ * either scheme or host validation.
23
+ */
24
+ export declare function validateXAIEndpoint(url: string, field: string): string;
25
+ /**
26
+ * Check whether a JWT access token is at or past its `exp` claim (with an
27
+ * optional refresh-skew margin).
28
+ *
29
+ * Hermes `_xai_access_token_is_expiring` L2979-2994. Returns `false` for any
30
+ * malformed input — this is a refresh-trigger check, not a validation, so
31
+ * non-JWTs ("no token in cache") must NOT trigger a spurious refresh.
32
+ */
33
+ export declare function isXAIAccessTokenExpiring(jwt: string, skewSeconds?: number): boolean;
34
+ /**
35
+ * xAI Grok OAuth loopback flow (Hermes `_xai_oauth_loopback_login` L5315-5469).
36
+ *
37
+ * Uses a fixed redirect URI so the callback server fails fast instead of
38
+ * falling back to a random port that xAI's redirect_uri allowlist rejects.
39
+ */
40
+ export declare class XAIOAuthFlow extends OAuthCallbackFlow {
41
+ #private;
42
+ constructor(ctrl: OAuthController);
43
+ generateAuthUrl(state: string, redirectUri: string): Promise<{
44
+ url: string;
45
+ instructions?: string;
46
+ }>;
47
+ exchangeToken(code: string, _state: string, redirectUri: string): Promise<OAuthCredentials>;
48
+ }
49
+ /**
50
+ * Login with xAI Grok OAuth (SuperGrok Subscription).
51
+ */
52
+ export declare function loginXAIOAuth(ctrl: OAuthController): Promise<OAuthCredentials>;
53
+ /**
54
+ * Refresh an xAI OAuth access token using a stored refresh_token.
55
+ *
56
+ * Hermes `refresh_xai_oauth_pure` L3087-3160. Re-runs OIDC discovery and
57
+ * re-validates the cached `token_endpoint` on the refresh hot path so a
58
+ * cached-but-poisoned endpoint cannot silently leak a refresh_token.
59
+ */
60
+ export declare function refreshXAIOAuthToken(refreshToken: string): Promise<OAuthCredentials>;