@oh-my-pi/pi-ai 15.1.5 → 15.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,23 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.1.7] - 2026-05-19
6
+ ### Added
7
+
8
+ - Added Anthropic realization of `serviceTier: "priority"`. The anthropic-messages provider now sets `speed: "fast"` on the request and appends the `fast-mode-2026-02-01` beta to `Anthropic-Beta` whenever the caller passes `serviceTier: "priority"`. When the server rejects an unsupported model with `invalid_request_error`, the provider transparently retries the same turn without the fast-mode signal (mirroring the strict-tools fallback pattern), persists the disable via a new `providerSessionState.fastModeDisabled` flag so subsequent requests in the session skip the field, and surfaces the action via the new `AssistantMessage.disabledFeatures` array (id `"priority"`) so callers can sync user-facing toggles. A new `clearAnthropicFastModeFallback(providerSessionState)` helper lets callers re-arm priority after the auto-fallback fired.
9
+ - Added scoped `ServiceTier` values: `"openai-only"` (priority on `openai`/`openai-codex`, ignored elsewhere) and `"claude-only"` (priority on direct `anthropic`, ignored on Bedrock/Vertex Claude and elsewhere). A new `resolveServiceTier(serviceTier, provider)` helper computes the effective tier for the provider; existing OpenAI/Anthropic provider code routes through it, so `service_tier` and Anthropic fast-mode emission both respect scope. `getPriorityPremiumRequests` now counts Anthropic+priority as one premium request (previously zero) and continues to ignore providers that drop the field on the wire.
10
+
11
+ ### Fixed
12
+
13
+ - Fixed Anthropic fast mode (`serviceTier: "priority"`) looping on 429 `rate_limit_error: "Extra usage is required for fast mode."` for accounts without the extra-usage entitlement. `isAnthropicFastModeUnsupportedError` now matches the 429 phrasing in addition to the 400 `invalid_request_error` "does not support the `speed` parameter" case, so the provider drops `speed: "fast"` on the in-turn retry, sets `providerSessionState.fastModeDisabled` for the remainder of the session, and surfaces `disabledFeatures: ["priority"]` to the caller instead of retrying with the same payload until `PROVIDER_MAX_RETRIES` is exhausted.
14
+
15
+ ## [15.1.6] - 2026-05-19
16
+
17
+ ### Fixed
18
+
19
+ - Fixed `{}` (empty JSON Schema, the wire representation of `z.unknown()`) being passed verbatim to grammar-constrained samplers (llama.cpp, etc.) in `additionalProperties`, `items`, and other schema-valued positions across **every provider** (OpenAI, Anthropic, Google, Ollama, Bedrock, Cursor). Grammar builders treat `{}` as "generate an empty object" rather than "any JSON value", causing open-typed fields (e.g. `extra.title` from `z.record(z.string(), z.unknown())`) to always emit `{}` instead of the intended string/number/etc. `toolWireSchema` now applies a new `normalizeEmptySchemas` pass (exported) to both the Zod and TypeBox/raw-JSON-Schema branches, converting `{}` → `true` (semantically identical per JSON Schema draft 2020-12 §4.3.1) in all schema-valued positions. Strict-mode opt-out is preserved across all providers: OpenAI's `hasUnrepresentableStrictObjectMap` hits the `=== true` branch instead of the `isJsonObject({})` branch (same result); Anthropic's `normalizeAnthropicStrictSchemaNode` opts out via `additionalProperties !== false` (still true for `true`); Google's `normalizeSchemaForGoogle` strips `additionalProperties` regardless (pre-existing). ([#1179](https://github.com/can1357/oh-my-pi/issues/1179))
20
+ - Fixed `pi-ai login <provider>` crashing with `Unknown provider` for providers that only the `auth-storage` `login()` switch knew about (perplexity, alibaba-coding-plan, gitlab-duo, huggingface, opencode-zen/go, lm-studio, ollama, cerebras, fireworks, qianfan, synthetic, venice, litellm, moonshot, together, cloudflare/vercel ai gateways, vllm, qwen-portal, nvidia, xiaomi, and any custom OAuth provider). The CLI now delegates to `SqliteAuthCredentialStore.login()` instead of duplicating a smaller switch, so the auth-broker `omp auth-broker login <provider>` flow works for every registered OAuth provider.
21
+
5
22
  ## [15.1.4] - 2026-05-19
6
23
  ### Changed
7
24
 
@@ -1,6 +1,6 @@
1
1
  import Anthropic, { type ClientOptions as AnthropicSdkClientOptions } from "@anthropic-ai/sdk";
2
2
  import type { MessageParam } from "@anthropic-ai/sdk/resources/messages";
3
- import type { FetchImpl, Message, Model, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
3
+ import type { FetchImpl, Message, Model, ProviderSessionState, ServiceTier, SimpleStreamOptions, StreamFunction, StreamOptions, Usage } from "../types";
4
4
  export type AnthropicHeaderOptions = {
5
5
  apiKey: string;
6
6
  baseUrl?: string;
@@ -17,6 +17,15 @@ type AnthropicCacheControl = {
17
17
  type: "ephemeral";
18
18
  ttl?: "1h" | "5m";
19
19
  };
20
+ /**
21
+ * Clears the in-session "server rejected fast mode" sticky flag. Call when the
22
+ * caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
23
+ * `/fast on` after a previous turn auto-disabled it) so the next request
24
+ * actually carries `speed: "fast"` again. No-op when the map or state entry
25
+ * hasn't been materialized yet.
26
+ */
27
+ export declare function clearAnthropicFastModeFallback(providerSessionState: Map<string, ProviderSessionState> | undefined): void;
28
+ export declare function isAnthropicFastModeUnsupportedError(error: unknown): boolean;
20
29
  export declare const claudeCodeVersion = "2.1.63";
21
30
  export declare const claudeToolPrefix: string;
22
31
  export declare const claudeCodeSystemInstruction = "You are a Claude agent, built on Anthropic's Claude Agent SDK.";
@@ -77,6 +86,16 @@ export interface AnthropicOptions extends StreamOptions {
77
86
  name: string;
78
87
  };
79
88
  betas?: string[] | string;
89
+ /**
90
+ * Realization of `serviceTier: "priority"` on Anthropic models. When
91
+ * `"priority"`, sets `speed: "fast"` on the request and appends the
92
+ * `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
93
+ * with `invalid_request_error`, which triggers an in-provider one-shot
94
+ * fallback (see `fastModeDisabled` provider state).
95
+ *
96
+ * Other `ServiceTier` values are currently ignored on this provider.
97
+ */
98
+ serviceTier?: ServiceTier;
80
99
  /** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
81
100
  isOAuth?: boolean;
82
101
  /**
@@ -69,18 +69,47 @@ export type ToolChoice = "auto" | "none" | "any" | "required" | {
69
69
  name: string;
70
70
  };
71
71
  export type CacheRetention = "none" | "short" | "long";
72
- /** OpenAI service tier for processing priority. Only applies to OpenAI-compatible APIs. */
73
- export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority";
74
- export declare function shouldSendServiceTier(serviceTier?: ServiceTier | null, provider?: Provider): serviceTier is "flex" | "scale" | "priority";
75
72
  /**
76
- * Premium-request weight contributed by sending a `priority` service tier to
77
- * a provider that supports it. Mirrors GitHub Copilot's `premiumRequests`
78
- * accounting so the "premium requests" stat aggregates priority traffic too.
73
+ * Service tier hint for processing priority / cost control.
79
74
  *
80
- * Returns 1 per priority request, 0 otherwise. Non-priority tiers (`flex`,
81
- * `scale`) and providers that ignore `service_tier` always return 0.
75
+ * The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
76
+ * `"priority"`) are passed through to providers that understand them
77
+ * (OpenAI's `service_tier` field directly; Anthropic translates
78
+ * `"priority"` into `speed: "fast"` on supported Opus models).
79
+ *
80
+ * The scoped values target a specific provider family and behave as the
81
+ * unscoped value on the matching provider, or `undefined` everywhere else.
82
+ * They let users opt into priority on one family without paying premium
83
+ * costs on the other when switching models mid-session.
84
+ *
85
+ * - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
86
+ * - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
87
+ */
88
+ export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
89
+ /** Resolved tier — one of the values that providers actually consume on the wire. */
90
+ export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
91
+ /**
92
+ * Resolves a possibly scoped `ServiceTier` to the effective tier for the
93
+ * given provider. Scoped values match their target family and otherwise
94
+ * collapse to `undefined`; unscoped values pass through unchanged.
95
+ */
96
+ export declare function resolveServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): ResolvedServiceTier | undefined;
97
+ /**
98
+ * True when the (possibly scoped) tier should be sent as OpenAI's
99
+ * `service_tier` request field for the given provider. Non-OpenAI
100
+ * providers, unsupported tiers (`"auto"`, `"default"`), and scope
101
+ * mismatches all return false.
82
102
  */
83
- export declare function getPriorityPremiumRequests(serviceTier?: ServiceTier | null, provider?: Provider): number;
103
+ export declare function shouldSendServiceTier(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): boolean;
104
+ /**
105
+ * Premium-request weight contributed by sending priority to a provider
106
+ * that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
107
+ * so the "premium requests" stat aggregates priority traffic across the
108
+ * OpenAI family and Anthropic fast-mode realizations.
109
+ *
110
+ * Returns 1 per resolved priority request, 0 otherwise.
111
+ */
112
+ export declare function getPriorityPremiumRequests(serviceTier: ServiceTier | null | undefined, provider: Provider | undefined): number;
84
113
  export interface ProviderSessionState {
85
114
  close(): void;
86
115
  }
@@ -371,6 +400,14 @@ export interface AssistantMessage {
371
400
  errorMessage?: string;
372
401
  /** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
373
402
  errorStatus?: number;
403
+ /**
404
+ * Stable identifiers for request features the provider silently dropped
405
+ * during this turn (e.g. `"priority"`). Set when a server-side rejection
406
+ * triggered an in-provider fallback retry that succeeded without the
407
+ * feature. Callers can use this to sync user-facing toggles back to the
408
+ * server's actual state.
409
+ */
410
+ disabledFeatures?: string[];
374
411
  /** Provider-specific opaque payload used to reconstruct transport-native history. */
375
412
  providerPayload?: ProviderPayload;
376
413
  timestamp: number;
@@ -1,2 +1,4 @@
1
1
  export type JsonObject = Record<string, unknown>;
2
2
  export declare function isJsonObject(value: unknown): value is JsonObject;
3
+ /** True when `value` is a plain JSON object with no own enumerable keys. */
4
+ export declare function isJsonObjectEmpty(value: JsonObject): boolean;
@@ -26,11 +26,29 @@ import type { Tool } from "../../types";
26
26
  * impostors do not.
27
27
  */
28
28
  export declare function isZodSchema(value: unknown): value is ZodType;
29
+ /**
30
+ * Normalize `{}` (empty JSON Schema = `z.unknown()` / unconstrained value) to
31
+ * boolean `true` in every schema-valued position. JSON Schema draft 2020-12
32
+ * §4.3.1: `{}` and `true` are semantically equivalent ("any JSON value").
33
+ * Grammar-constrained samplers (llama.cpp, etc.) treat the object form as
34
+ * "generate an empty object" rather than "any JSON value", causing open-typed
35
+ * fields like `extra.title` (from `z.record(z.string(), z.unknown())`) to
36
+ * always emit `{}` instead of the intended string/number/etc. (issue #1179).
37
+ *
38
+ * Mutates in place. Provider-agnostic — applied to every tool wire schema so
39
+ * Anthropic, Google, OpenAI, Ollama, Bedrock, and Cursor all see the
40
+ * normalized form, regardless of whether the source was Zod or TypeBox.
41
+ */
42
+ export declare function normalizeEmptySchemas(node: unknown): void;
29
43
  /** Convert a Zod schema into the JSON Schema shape providers consume. */
30
44
  export declare function zodToWireSchema(schema: ZodType): Record<string, unknown>;
31
45
  /**
32
46
  * Resolve a tool's parameters to a JSON Schema object suitable for sending
33
47
  * over the wire. Zod schemas are converted (and cached); legacy TypeBox / raw
34
48
  * JSON Schema parameters are upgraded to draft 2020-12 (and cached).
49
+ *
50
+ * Both branches finish with `normalizeEmptySchemas` so every provider —
51
+ * OpenAI, Anthropic, Google, Ollama, Bedrock, Cursor — sees `{}` normalized
52
+ * to `true` in schema-valued positions (issue #1179).
35
53
  */
36
54
  export declare function toolWireSchema(tool: Tool): Record<string, unknown>;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.1.5",
4
+ "version": "15.1.7",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -43,7 +43,7 @@
43
43
  "dependencies": {
44
44
  "@anthropic-ai/sdk": "^0.94.0",
45
45
  "@bufbuild/protobuf": "^2.12.0",
46
- "@oh-my-pi/pi-utils": "15.1.5",
46
+ "@oh-my-pi/pi-utils": "15.1.7",
47
47
  "openai": "^6.36.0",
48
48
  "partial-json": "^0.1.7",
49
49
  "zod": "4.4.3"
package/src/cli.ts CHANGED
@@ -1,8 +1,8 @@
1
1
  #!/usr/bin/env bun
2
2
  import * as readline from "node:readline";
3
- import { SqliteAuthCredentialStore } from "./auth-storage";
3
+ import { AuthStorage, SqliteAuthCredentialStore } from "./auth-storage";
4
4
  import { getOAuthProviders } from "./utils/oauth";
5
- import type { OAuthCredentials, OAuthProvider } from "./utils/oauth/types";
5
+ import type { OAuthProvider } from "./utils/oauth/types";
6
6
 
7
7
  const PROVIDERS = getOAuthProviders();
8
8
 
@@ -58,289 +58,29 @@ function prompt(rl: readline.Interface, question: string): Promise<string> {
58
58
 
59
59
  async function login(provider: OAuthProvider): Promise<void> {
60
60
  const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
61
-
62
61
  const promptFn = (msg: string) => prompt(rl, `${msg} `);
63
- const storage = await SqliteAuthCredentialStore.open();
62
+ const store = await SqliteAuthCredentialStore.open();
63
+ const storage = new AuthStorage(store);
64
+ await storage.reload();
64
65
 
65
66
  try {
66
- let credentials: OAuthCredentials;
67
-
68
- switch (provider) {
69
- case "anthropic": {
70
- const { loginAnthropic } = await import("./utils/oauth/anthropic");
71
- credentials = await loginAnthropic({
72
- onAuth(info) {
73
- const { url } = info;
74
- console.log(`\nOpen this URL in your browser:\n${url}\n`);
75
- },
76
- onProgress(message) {
77
- console.log(message);
78
- },
79
- });
80
- break;
81
- }
82
-
83
- case "github-copilot": {
84
- const { loginGitHubCopilot } = await import("./utils/oauth/github-copilot");
85
- credentials = await loginGitHubCopilot({
86
- onAuth(url, instructions) {
87
- console.log(`\nOpen this URL in your browser:\n${url}`);
88
- if (instructions) console.log(instructions);
89
- console.log();
90
- },
91
- async onPrompt(p) {
92
- return await promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
93
- },
94
- });
95
- break;
96
- }
97
-
98
- case "google-gemini-cli": {
99
- const { loginGeminiCli } = await import("./utils/oauth/google-gemini-cli");
100
- credentials = await loginGeminiCli({
101
- onAuth(info) {
102
- const { url, instructions } = info;
103
- console.log(`\nOpen this URL in your browser:\n${url}`);
104
- if (instructions) console.log(instructions);
105
- console.log();
106
- },
107
- });
108
- break;
109
- }
110
-
111
- case "google-antigravity": {
112
- const { loginAntigravity } = await import("./utils/oauth/google-antigravity");
113
- credentials = await loginAntigravity({
114
- onAuth(info) {
115
- const { url, instructions } = info;
116
- console.log(`\nOpen this URL in your browser:\n${url}`);
117
- if (instructions) console.log(instructions);
118
- console.log();
119
- },
120
- });
121
- break;
122
- }
123
- case "openai-codex": {
124
- const { loginOpenAICodex } = await import("./utils/oauth/openai-codex");
125
- credentials = await loginOpenAICodex({
126
- onAuth(info) {
127
- const { url, instructions } = info;
128
- console.log(`\nOpen this URL in your browser:\n${url}`);
129
- if (instructions) console.log(instructions);
130
- console.log();
131
- },
132
- async onPrompt(p) {
133
- return await promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
134
- },
135
- });
136
- break;
137
- }
138
-
139
- case "kimi-code": {
140
- const { loginKimi } = await import("./utils/oauth/kimi");
141
- credentials = await loginKimi({
142
- onAuth(info) {
143
- const { url, instructions } = info;
144
- console.log(`\nOpen this URL in your browser:\n${url}`);
145
- if (instructions) console.log(instructions);
146
- console.log();
147
- },
148
- });
149
- break;
150
- }
151
- case "kilo": {
152
- const { loginKilo } = await import("./utils/oauth/kilo");
153
- credentials = await loginKilo({
154
- onAuth(info) {
155
- const { url, instructions } = info;
156
- console.log(`\nOpen this URL in your browser:\n${url}`);
157
- if (instructions) console.log(instructions);
158
- console.log();
159
- },
160
- });
161
- break;
162
- }
163
- case "kagi": {
164
- const { loginKagi } = await import("./utils/oauth/kagi");
165
- const apiKey = await loginKagi({
166
- onAuth(info) {
167
- const { url, instructions } = info;
168
- console.log(`\nOpen this URL in your browser:\n${url}`);
169
- if (instructions) console.log(instructions);
170
- console.log();
171
- },
172
- onPrompt(p) {
173
- return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
174
- },
175
- });
176
- storage.saveApiKey(provider, apiKey);
177
- console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
178
- return;
179
- }
180
- case "tavily": {
181
- const { loginTavily } = await import("./utils/oauth/tavily");
182
- const apiKey = await loginTavily({
183
- onAuth(info) {
184
- const { url, instructions } = info;
185
- console.log(`\nOpen this URL in your browser:\n${url}`);
186
- if (instructions) console.log(instructions);
187
- console.log();
188
- },
189
- onPrompt(p) {
190
- return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
191
- },
192
- });
193
- storage.saveApiKey(provider, apiKey);
194
- console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
195
- return;
196
- }
197
- case "parallel": {
198
- const { loginParallel } = await import("./utils/oauth/parallel");
199
- const apiKey = await loginParallel({
200
- onAuth(info) {
201
- const { url, instructions } = info;
202
- console.log(`\nOpen this URL in your browser:\n${url}`);
203
- if (instructions) console.log(instructions);
204
- console.log();
205
- },
206
- onPrompt(p) {
207
- return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
208
- },
209
- });
210
- storage.saveApiKey(provider, apiKey);
211
- console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
212
- return;
213
- }
214
-
215
- case "cursor": {
216
- const { loginCursor } = await import("./utils/oauth/cursor");
217
- credentials = await loginCursor(
218
- url => {
219
- console.log(`\nOpen this URL in your browser:\n${url}\n`);
220
- },
221
- () => {
222
- console.log("Waiting for browser authentication...");
223
- },
224
- );
225
- break;
226
- }
227
-
228
- case "zai": {
229
- const { loginZai } = await import("./utils/oauth/zai");
230
- const apiKey = await loginZai({
231
- onAuth(info) {
232
- const { url, instructions } = info;
233
- console.log(`\nOpen this URL in your browser:\n${url}`);
234
- if (instructions) console.log(instructions);
235
- console.log();
236
- },
237
- onPrompt(p) {
238
- return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
239
- },
240
- });
241
- storage.saveApiKey(provider, apiKey);
242
- console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
243
- return;
244
- }
245
-
246
- case "nanogpt": {
247
- const { loginNanoGPT } = await import("./utils/oauth/nanogpt");
248
- const apiKey = await loginNanoGPT({
249
- onAuth(info) {
250
- const { url, instructions } = info;
251
- console.log(`\nOpen this URL in your browser:\n${url}`);
252
- if (instructions) console.log(instructions);
253
- console.log();
254
- },
255
- onPrompt(p) {
256
- return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
257
- },
258
- });
259
- storage.saveApiKey(provider, apiKey);
260
- console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
261
- return;
262
- }
263
-
264
- case "zenmux": {
265
- const { loginZenMux } = await import("./utils/oauth/zenmux");
266
- const apiKey = await loginZenMux({
267
- onAuth(info) {
268
- const { url, instructions } = info;
269
- console.log(`\nOpen this URL in your browser:\n${url}`);
270
- if (instructions) console.log(instructions);
271
- console.log();
272
- },
273
- onPrompt(p) {
274
- return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
275
- },
276
- });
277
- storage.saveApiKey(provider, apiKey);
278
- console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
279
- return;
280
- }
281
- case "ollama-cloud": {
282
- const { loginOllamaCloud } = await import("./utils/oauth/ollama-cloud");
283
- const apiKey = await loginOllamaCloud({
284
- onAuth(info) {
285
- const { url, instructions } = info;
286
- console.log(`\nOpen this URL in your browser:\n${url}`);
287
- if (instructions) console.log(instructions);
288
- console.log();
289
- },
290
- onPrompt(p) {
291
- return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
292
- },
293
- });
294
- storage.saveApiKey(provider, apiKey);
295
- console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
296
- return;
297
- }
298
-
299
- case "minimax-code": {
300
- const { loginMiniMaxCode } = await import("./utils/oauth/minimax-code");
301
- const apiKey = await loginMiniMaxCode({
302
- onAuth(info) {
303
- const { url, instructions } = info;
304
- console.log(`\nOpen this URL in your browser:\n${url}`);
305
- if (instructions) console.log(instructions);
306
- console.log();
307
- },
308
- onPrompt(p) {
309
- return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
310
- },
311
- });
312
- storage.saveApiKey(provider, apiKey);
313
- console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
314
- return;
315
- }
316
-
317
- case "minimax-code-cn": {
318
- const { loginMiniMaxCodeCn } = await import("./utils/oauth/minimax-code");
319
- const apiKey = await loginMiniMaxCodeCn({
320
- onAuth(info) {
321
- const { url, instructions } = info;
322
- console.log(`\nOpen this URL in your browser:\n${url}`);
323
- if (instructions) console.log(instructions);
324
- console.log();
325
- },
326
- onPrompt(p) {
327
- return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
328
- },
329
- });
330
- storage.saveApiKey(provider, apiKey);
331
- console.log(`\nAPI key saved to ~/.omp/agent/agent.db`);
332
- return;
333
- }
334
-
335
- default:
336
- throw new Error(`Unknown provider: ${provider}`);
337
- }
338
-
339
- storage.saveOAuth(provider, credentials);
340
-
67
+ await storage.login(provider, {
68
+ onAuth(info) {
69
+ const { url, instructions } = info;
70
+ console.log(`\nOpen this URL in your browser:\n${url}`);
71
+ if (instructions) console.log(instructions);
72
+ console.log();
73
+ },
74
+ onProgress(message) {
75
+ console.log(message);
76
+ },
77
+ onPrompt(p) {
78
+ return promptFn(`${p.message}${p.placeholder ? ` (${p.placeholder})` : ""}:`);
79
+ },
80
+ });
341
81
  console.log(`\nCredentials saved to ~/.omp/agent/agent.db`);
342
82
  } finally {
343
- storage.close();
83
+ store.close();
344
84
  rl.close();
345
85
  }
346
86
  }
@@ -32,6 +32,7 @@ import type {
32
32
  Model,
33
33
  ProviderSessionState,
34
34
  RedactedThinkingContent,
35
+ ServiceTier,
35
36
  SimpleStreamOptions,
36
37
  StopReason,
37
38
  StreamFunction,
@@ -43,6 +44,7 @@ import type {
43
44
  ToolResultMessage,
44
45
  Usage,
45
46
  } from "../types";
47
+ import { resolveServiceTier } from "../types";
46
48
  import {
47
49
  isAnthropicOAuthToken,
48
50
  isRecord,
@@ -111,6 +113,7 @@ const claudeCodeBetaDefaults = [
111
113
  ];
112
114
  const fineGrainedToolStreamingBeta = "fine-grained-tool-streaming-2025-05-14";
113
115
  const interleavedThinkingBeta = "interleaved-thinking-2025-05-14";
116
+ const fastModeBeta = "fast-mode-2026-02-01";
114
117
 
115
118
  function getHeaderCaseInsensitive(headers: Record<string, string> | undefined, headerName: string): string | undefined {
116
119
  if (!headers) return undefined;
@@ -224,13 +227,16 @@ const ANTHROPIC_PROVIDER_SESSION_STATE_KEY = "anthropic-messages";
224
227
 
225
228
  type AnthropicProviderSessionState = ProviderSessionState & {
226
229
  strictToolsDisabled: boolean;
230
+ fastModeDisabled: boolean;
227
231
  };
228
232
 
229
233
  function createAnthropicProviderSessionState(): AnthropicProviderSessionState {
230
234
  const state: AnthropicProviderSessionState = {
231
235
  strictToolsDisabled: false,
236
+ fastModeDisabled: false,
232
237
  close: () => {
233
238
  state.strictToolsDisabled = false;
239
+ state.fastModeDisabled = false;
234
240
  },
235
241
  };
236
242
  return state;
@@ -249,6 +255,23 @@ function getAnthropicProviderSessionState(
249
255
  return created;
250
256
  }
251
257
 
258
+ /**
259
+ * Clears the in-session "server rejected fast mode" sticky flag. Call when the
260
+ * caller is explicitly re-arming `serviceTier: "priority"` (e.g. user toggled
261
+ * `/fast on` after a previous turn auto-disabled it) so the next request
262
+ * actually carries `speed: "fast"` again. No-op when the map or state entry
263
+ * hasn't been materialized yet.
264
+ */
265
+ export function clearAnthropicFastModeFallback(
266
+ providerSessionState: Map<string, ProviderSessionState> | undefined,
267
+ ): void {
268
+ if (!providerSessionState) return;
269
+ const state = providerSessionState.get(ANTHROPIC_PROVIDER_SESSION_STATE_KEY) as
270
+ | AnthropicProviderSessionState
271
+ | undefined;
272
+ if (state) state.fastModeDisabled = false;
273
+ }
274
+
252
275
  function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
253
276
  if (extractHttpStatusFromError(error) !== 400) return false;
254
277
  const message = error instanceof Error ? error.message : String(error);
@@ -258,11 +281,45 @@ function isAnthropicStrictGrammarTooLargeError(error: unknown): boolean {
258
281
  return /invalid_request_error/i.test(message) && (isStrictGrammarTooLarge || isSchemaCompilationTooComplex);
259
282
  }
260
283
 
284
+ export function isAnthropicFastModeUnsupportedError(error: unknown): boolean {
285
+ const status = extractHttpStatusFromError(error);
286
+ if (status !== 400 && status !== 429) return false;
287
+ const message = error instanceof Error ? error.message : String(error);
288
+ // 400 invalid_request_error — model doesn't accept `speed` at all.
289
+ // Observed: "'claude-opus-4-5-20251101' does not support the `speed` parameter."
290
+ // Stay tolerant of phrasing drift ("is not supported", quoted vs backticked field).
291
+ if (
292
+ status === 400 &&
293
+ /invalid_request_error/i.test(message) &&
294
+ /\bspeed\b/i.test(message) &&
295
+ /not support/i.test(message)
296
+ ) {
297
+ return true;
298
+ }
299
+ // 429 rate_limit_error — account lacks the extra-usage entitlement fast mode requires.
300
+ // Observed: "Extra usage is required for fast mode."
301
+ if (status === 429 && /rate_limit_error/i.test(message) && /fast mode/i.test(message)) {
302
+ return true;
303
+ }
304
+ return false;
305
+ }
306
+
261
307
  function hasStrictAnthropicTools(params: MessageCreateParamsStreaming): boolean {
262
308
  const tools = params.tools as Array<{ strict?: unknown }> | undefined;
263
309
  return tools?.some(tool => tool.strict === true) ?? false;
264
310
  }
265
311
 
312
+ /**
313
+ * `speed` lives on `BetaMessageCreateParams` (client.beta.messages) but this
314
+ * provider posts via `client.messages.create`, whose param type doesn't
315
+ * include it. This alias narrows the cast to one place.
316
+ */
317
+ type ParamsWithSpeed = MessageCreateParamsStreaming & { speed?: "fast" };
318
+
319
+ function dropAnthropicFastMode(params: MessageCreateParamsStreaming): void {
320
+ delete (params as ParamsWithSpeed).speed;
321
+ }
322
+
266
323
  function dropAnthropicStrictTools(params: MessageCreateParamsStreaming): void {
267
324
  const tools = params.tools as Array<{ strict?: unknown }> | undefined;
268
325
  if (!tools) return;
@@ -526,6 +583,16 @@ export interface AnthropicOptions extends StreamOptions {
526
583
  interleavedThinking?: boolean;
527
584
  toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
528
585
  betas?: string[] | string;
586
+ /**
587
+ * Realization of `serviceTier: "priority"` on Anthropic models. When
588
+ * `"priority"`, sets `speed: "fast"` on the request and appends the
589
+ * `fast-mode-2026-02-01` beta header. Anthropic rejects unsupported models
590
+ * with `invalid_request_error`, which triggers an in-provider one-shot
591
+ * fallback (see `fastModeDisabled` provider state).
592
+ *
593
+ * Other `ServiceTier` values are currently ignored on this provider.
594
+ */
595
+ serviceTier?: ServiceTier;
529
596
  /** Force OAuth bearer auth mode for proxy tokens that don't match Anthropic token prefixes. */
530
597
  isOAuth?: boolean;
531
598
  /**
@@ -961,10 +1028,16 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
961
1028
  } else {
962
1029
  const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
963
1030
 
1031
+ const extraBetas = normalizeExtraBetas(options?.betas);
1032
+ const wantsAnthropicPriority = resolveServiceTier(options?.serviceTier, model.provider) === "priority";
1033
+ if (wantsAnthropicPriority && !extraBetas.includes(fastModeBeta)) {
1034
+ extraBetas.push(fastModeBeta);
1035
+ }
1036
+
964
1037
  const created = createClient(model, {
965
1038
  model,
966
1039
  apiKey,
967
- extraBetas: normalizeExtraBetas(options?.betas),
1040
+ extraBetas,
968
1041
  stream: true,
969
1042
  interleavedThinking: options?.interleavedThinking ?? true,
970
1043
  headers: options?.headers,
@@ -984,6 +1057,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
984
1057
  let disableStrictTools =
985
1058
  (providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
986
1059
  let strictFallbackErrorMessage: string | undefined;
1060
+ let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
987
1061
  const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
988
1062
  let nextParams = buildParams(model, baseUrl, context, isOAuthToken, options, disableStrictTools);
989
1063
  const replacementPayload = await options?.onPayload?.(nextParams, model);
@@ -993,6 +1067,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
993
1067
  if (disableStrictTools) {
994
1068
  dropAnthropicStrictTools(nextParams);
995
1069
  }
1070
+ if (dropFastMode) {
1071
+ dropAnthropicFastMode(nextParams);
1072
+ }
996
1073
  rawRequestDump = {
997
1074
  provider: model.provider,
998
1075
  api: output.api,
@@ -1284,6 +1361,30 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1284
1361
  firstTokenTime = undefined;
1285
1362
  continue;
1286
1363
  }
1364
+ if (
1365
+ !dropFastMode &&
1366
+ resolveServiceTier(options?.serviceTier, model.provider) === "priority" &&
1367
+ firstTokenTime === undefined &&
1368
+ isAnthropicFastModeUnsupportedError(streamFailure)
1369
+ ) {
1370
+ logger.debug("anthropic: fast mode unsupported, retrying without speed", {
1371
+ model: model.id,
1372
+ error: streamFailure instanceof Error ? streamFailure.message : String(streamFailure),
1373
+ });
1374
+ if (providerSessionState) {
1375
+ providerSessionState.fastModeDisabled = true;
1376
+ }
1377
+ dropFastMode = true;
1378
+ params = await prepareParams();
1379
+ providerRetryAttempt = 0;
1380
+ output.content.length = 0;
1381
+ output.responseId = undefined;
1382
+ output.providerPayload = undefined;
1383
+ output.usage = createEmptyUsage(copilotDynamicHeaders?.premiumRequests);
1384
+ output.stopReason = "stop";
1385
+ firstTokenTime = undefined;
1386
+ continue;
1387
+ }
1287
1388
  const isTransientEnvelopeFailure =
1288
1389
  isTransientStreamParseError(streamFailure) || isTransientStreamEnvelopeError(streamFailure);
1289
1390
  const canRetryTransientEnvelopeFailure = isTransientEnvelopeFailure && !streamedReplayUnsafeContent;
@@ -1315,6 +1416,9 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1315
1416
 
1316
1417
  output.duration = Date.now() - startTime;
1317
1418
  if (firstTokenTime) output.ttft = firstTokenTime - startTime;
1419
+ if (dropFastMode && resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
1420
+ output.disabledFeatures = [...(output.disabledFeatures ?? []), "priority"];
1421
+ }
1318
1422
  stream.push({ type: "done", reason: output.stopReason, message: output });
1319
1423
  stream.end();
1320
1424
  } catch (error) {
@@ -1862,6 +1966,10 @@ function buildParams(
1862
1966
  params.metadata = { user_id: metadataUserId };
1863
1967
  }
1864
1968
 
1969
+ if (resolveServiceTier(options?.serviceTier, model.provider) === "priority") {
1970
+ (params as ParamsWithSpeed).speed = "fast";
1971
+ }
1972
+
1865
1973
  if (options?.toolChoice) {
1866
1974
  if (typeof options.toolChoice === "string") {
1867
1975
  params.tool_choice = { type: options.toolChoice };
@@ -29,10 +29,10 @@ import {
29
29
  type FetchImpl,
30
30
  type Model,
31
31
  type ProviderSessionState,
32
+ resolveServiceTier,
32
33
  type ServiceTier,
33
34
  type StreamFunction,
34
35
  type StreamOptions,
35
- shouldSendServiceTier,
36
36
  type TextContent,
37
37
  type ThinkingContent,
38
38
  type Tool,
@@ -590,8 +590,9 @@ async function buildTransformedCodexRequestBody(
590
590
  if (options?.repetitionPenalty !== undefined) {
591
591
  params.repetition_penalty = options.repetitionPenalty;
592
592
  }
593
- if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
594
- params.service_tier = options.serviceTier;
593
+ const resolvedServiceTier = resolveServiceTier(options?.serviceTier, model.provider);
594
+ if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
595
+ params.service_tier = resolvedServiceTier;
595
596
  }
596
597
  if (context.tools && context.tools.length > 0) {
597
598
  params.tools = convertOpenAICodexResponsesTools(context.tools, model);
@@ -22,11 +22,11 @@ import {
22
22
  type Model,
23
23
  type OpenAICompat,
24
24
  type ProviderSessionState,
25
+ resolveServiceTier,
25
26
  type ServiceTier,
26
27
  type StopReason,
27
28
  type StreamFunction,
28
29
  type StreamOptions,
29
- shouldSendServiceTier,
30
30
  type TextContent,
31
31
  type ThinkingContent,
32
32
  type Tool,
@@ -1092,8 +1092,9 @@ function buildParams(
1092
1092
  if (options?.frequencyPenalty !== undefined) {
1093
1093
  params.frequency_penalty = options.frequencyPenalty;
1094
1094
  }
1095
- if (shouldSendServiceTier(options?.serviceTier, model.provider)) {
1096
- params.service_tier = options.serviceTier;
1095
+ const resolvedServiceTier = resolveServiceTier(options?.serviceTier, model.provider);
1096
+ if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
1097
+ params.service_tier = resolvedServiceTier;
1097
1098
  }
1098
1099
 
1099
1100
  if (context.tools) {
@@ -17,10 +17,10 @@ import {
17
17
  type AssistantMessage,
18
18
  type ImageContent,
19
19
  type Model,
20
+ resolveServiceTier,
20
21
  type ServiceTier,
21
22
  type StopReason,
22
23
  type StreamOptions,
23
- shouldSendServiceTier,
24
24
  type TextContent,
25
25
  type TextSignatureV1,
26
26
  type ThinkingContent,
@@ -650,8 +650,9 @@ export function applyCommonResponsesSamplingParams<P extends CommonResponsesPara
650
650
  if (options?.minP !== undefined) params.min_p = options.minP;
651
651
  if (options?.presencePenalty !== undefined) params.presence_penalty = options.presencePenalty;
652
652
  if (options?.repetitionPenalty !== undefined) params.repetition_penalty = options.repetitionPenalty;
653
- if (shouldSendServiceTier(options?.serviceTier, provider)) {
654
- params.service_tier = options.serviceTier;
653
+ const resolvedServiceTier = resolveServiceTier(options?.serviceTier, provider);
654
+ if (resolvedServiceTier === "flex" || resolvedServiceTier === "scale" || resolvedServiceTier === "priority") {
655
+ params.service_tier = resolvedServiceTier;
655
656
  }
656
657
  }
657
658
 
package/src/stream.ts CHANGED
@@ -580,6 +580,7 @@ function mapOptionsForApi<TApi extends Api>(
580
580
  thinkingEnabled: false,
581
581
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
582
582
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
583
+ serviceTier: options?.serviceTier,
583
584
  });
584
585
  }
585
586
 
@@ -590,6 +591,7 @@ function mapOptionsForApi<TApi extends Api>(
590
591
  thinkingEnabled: false,
591
592
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
592
593
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
594
+ serviceTier: options?.serviceTier,
593
595
  });
594
596
  }
595
597
 
@@ -603,6 +605,7 @@ function mapOptionsForApi<TApi extends Api>(
603
605
  effort,
604
606
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
605
607
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
608
+ serviceTier: options?.serviceTier,
606
609
  });
607
610
  }
608
611
 
@@ -613,6 +616,7 @@ function mapOptionsForApi<TApi extends Api>(
613
616
  thinkingBudgetTokens: thinkingBudget,
614
617
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
615
618
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
619
+ serviceTier: options?.serviceTier,
616
620
  });
617
621
  }
618
622
 
@@ -631,6 +635,7 @@ function mapOptionsForApi<TApi extends Api>(
631
635
  thinkingEnabled: false,
632
636
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
633
637
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
638
+ serviceTier: options?.serviceTier,
634
639
  });
635
640
  } else {
636
641
  return castApi<"anthropic-messages">({
@@ -640,6 +645,7 @@ function mapOptionsForApi<TApi extends Api>(
640
645
  thinkingBudgetTokens: thinkingBudget,
641
646
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
642
647
  thinkingDisplay: options?.hideThinkingSummary ? "omitted" : undefined,
648
+ serviceTier: options?.serviceTier,
643
649
  });
644
650
  }
645
651
  }
package/src/types.ts CHANGED
@@ -162,29 +162,78 @@ export type ToolChoice =
162
162
  // Base options all providers share
163
163
  export type CacheRetention = "none" | "short" | "long";
164
164
 
165
- /** OpenAI service tier for processing priority. Only applies to OpenAI-compatible APIs. */
166
- export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority";
165
+ /**
166
+ * Service tier hint for processing priority / cost control.
167
+ *
168
+ * The unscoped values (`"auto"`, `"default"`, `"flex"`, `"scale"`,
169
+ * `"priority"`) are passed through to providers that understand them
170
+ * (OpenAI's `service_tier` field directly; Anthropic translates
171
+ * `"priority"` into `speed: "fast"` on supported Opus models).
172
+ *
173
+ * The scoped values target a specific provider family and behave as the
174
+ * unscoped value on the matching provider, or `undefined` everywhere else.
175
+ * They let users opt into priority on one family without paying premium
176
+ * costs on the other when switching models mid-session.
177
+ *
178
+ * - `"openai-only"` → `"priority"` on `openai` and `openai-codex`; ignored elsewhere.
179
+ * - `"claude-only"` → `"priority"` on direct `anthropic` (not Bedrock/Vertex Claude).
180
+ */
181
+ export type ServiceTier = "auto" | "default" | "flex" | "scale" | "priority" | "openai-only" | "claude-only";
167
182
 
168
- export function shouldSendServiceTier(
169
- serviceTier?: ServiceTier | null,
170
- provider?: Provider,
171
- ): serviceTier is "flex" | "scale" | "priority" {
172
- if (provider !== "openai" && provider !== "openai-codex") {
173
- return false;
183
+ /** Resolved tier — one of the values that providers actually consume on the wire. */
184
+ export type ResolvedServiceTier = Exclude<ServiceTier, "openai-only" | "claude-only">;
185
+
186
+ /**
187
+ * Resolves a possibly scoped `ServiceTier` to the effective tier for the
188
+ * given provider. Scoped values match their target family and otherwise
189
+ * collapse to `undefined`; unscoped values pass through unchanged.
190
+ */
191
+ export function resolveServiceTier(
192
+ serviceTier: ServiceTier | null | undefined,
193
+ provider: Provider | undefined,
194
+ ): ResolvedServiceTier | undefined {
195
+ if (!serviceTier) return undefined;
196
+ switch (serviceTier) {
197
+ case "openai-only":
198
+ return provider === "openai" || provider === "openai-codex" ? "priority" : undefined;
199
+ case "claude-only":
200
+ return provider === "anthropic" ? "priority" : undefined;
201
+ default:
202
+ return serviceTier;
174
203
  }
175
- return serviceTier === "flex" || serviceTier === "scale" || serviceTier === "priority";
176
204
  }
177
205
 
178
206
  /**
179
- * Premium-request weight contributed by sending a `priority` service tier to
180
- * a provider that supports it. Mirrors GitHub Copilot's `premiumRequests`
181
- * accounting so the "premium requests" stat aggregates priority traffic too.
207
+ * True when the (possibly scoped) tier should be sent as OpenAI's
208
+ * `service_tier` request field for the given provider. Non-OpenAI
209
+ * providers, unsupported tiers (`"auto"`, `"default"`), and scope
210
+ * mismatches all return false.
211
+ */
212
+ export function shouldSendServiceTier(
213
+ serviceTier: ServiceTier | null | undefined,
214
+ provider: Provider | undefined,
215
+ ): boolean {
216
+ if (provider !== "openai" && provider !== "openai-codex") return false;
217
+ const resolved = resolveServiceTier(serviceTier, provider);
218
+ return resolved === "flex" || resolved === "scale" || resolved === "priority";
219
+ }
220
+
221
+ /**
222
+ * Premium-request weight contributed by sending priority to a provider
223
+ * that supports it. Mirrors GitHub Copilot's `premiumRequests` accounting
224
+ * so the "premium requests" stat aggregates priority traffic across the
225
+ * OpenAI family and Anthropic fast-mode realizations.
182
226
  *
183
- * Returns 1 per priority request, 0 otherwise. Non-priority tiers (`flex`,
184
- * `scale`) and providers that ignore `service_tier` always return 0.
227
+ * Returns 1 per resolved priority request, 0 otherwise.
185
228
  */
186
- export function getPriorityPremiumRequests(serviceTier?: ServiceTier | null, provider?: Provider): number {
187
- return shouldSendServiceTier(serviceTier, provider) && serviceTier === "priority" ? 1 : 0;
229
+ export function getPriorityPremiumRequests(
230
+ serviceTier: ServiceTier | null | undefined,
231
+ provider: Provider | undefined,
232
+ ): number {
233
+ if (resolveServiceTier(serviceTier, provider) !== "priority") return 0;
234
+ // Only providers that realize `priority` on the wire bill the user.
235
+ // Everywhere else, the field is silently dropped and nothing is charged.
236
+ return provider === "openai" || provider === "openai-codex" || provider === "anthropic" ? 1 : 0;
188
237
  }
189
238
 
190
239
  export interface ProviderSessionState {
@@ -502,6 +551,14 @@ export interface AssistantMessage {
502
551
  errorMessage?: string;
503
552
  /** HTTP status surfaced by the provider when the request failed. Populated by every provider's catch block alongside `errorMessage` so consumers (auth retry, telemetry, UI) can branch without regex-scraping the message. */
504
553
  errorStatus?: number;
554
+ /**
555
+ * Stable identifiers for request features the provider silently dropped
556
+ * during this turn (e.g. `"priority"`). Set when a server-side rejection
557
+ * triggered an in-provider fallback retry that succeeded without the
558
+ * feature. Callers can use this to sync user-facing toggles back to the
559
+ * server's actual state.
560
+ */
561
+ disabledFeatures?: string[];
505
562
  /** Provider-specific opaque payload used to reconstruct transport-native history. */
506
563
  providerPayload?: ProviderPayload;
507
564
  timestamp: number; // Unix timestamp in milliseconds
@@ -21,7 +21,7 @@ import {
21
21
  import { isValidJsonSchema } from "./meta-validator";
22
22
  import { type DescriptionSpillFormat, spillToDescription } from "./spill";
23
23
  import { enter, epochNext, exit, once, stamp } from "./stamps";
24
- import { isJsonObject, type JsonObject } from "./types";
24
+ import { isJsonObject, isJsonObjectEmpty, type JsonObject } from "./types";
25
25
  import { decontaminateZodInstance } from "./zod-decontaminate";
26
26
 
27
27
  export type ResidualSchemaIncompatibility = "type-array" | "type-null" | "nullable" | "combiners";
@@ -907,6 +907,15 @@ export const normalizeSchemaForOpenAIResponses: (schema: JsonObject) => JsonObje
907
907
  function normalizeOpenAIResponsesSchemaNode(value: unknown, cache: WeakMap<JsonObject, JsonObject>): unknown {
908
908
  if (!isJsonObject(value)) return value;
909
909
 
910
+ // `{}` (empty JSON Schema) ≡ `true` (JSON Schema draft 2020-12 §4.3.1).
911
+ // Grammar-constrained samplers (llama.cpp, etc.) treat the object form as
912
+ // "generate an empty object" rather than "any JSON value" (issue #1179).
913
+ // `toolWireSchema` already runs `normalizeEmptySchemas` upstream, but this
914
+ // guard remains as a safety net for callers that invoke
915
+ // `sanitizeSchemaForOpenAIResponses` directly on a schema that bypassed
916
+ // the wire-schema pipeline (e.g. provider-specific fixtures, debug paths).
917
+ if (isJsonObjectEmpty(value)) return true;
918
+
910
919
  const cached = cache.get(value);
911
920
  if (cached) return cached;
912
921
 
@@ -3,3 +3,9 @@ export type JsonObject = Record<string, unknown>;
3
3
  export function isJsonObject(value: unknown): value is JsonObject {
4
4
  return !!value && typeof value === "object" && !Array.isArray(value);
5
5
  }
6
+
7
+ /** True when `value` is a plain JSON object with no own enumerable keys. */
8
+ export function isJsonObjectEmpty(value: JsonObject): boolean {
9
+ for (const _ in value) return false;
10
+ return true;
11
+ }
@@ -62,16 +62,47 @@ const kJsonWireSchema = Symbol("pi.schema.json.wire");
62
62
  * treat defaulted fields as optional; Zod inverts this and keeps them
63
63
  * required at the input boundary, then materializes the default).
64
64
  * - Strip the noisy safe-integer bounds Zod injects for `z.number().int()`.
65
+ *
66
+ * The empty-schema normalization (`{}` → `true`, see `normalizeEmptySchemas`)
67
+ * runs separately from `toolWireSchema` so both Zod and TypeBox tools get it.
65
68
  */
66
69
  function postProcess(schema: Record<string, unknown>): Record<string, unknown> {
67
70
  delete schema.$schema;
68
71
  walk(schema);
72
+ normalizeEmptySchemas(schema);
69
73
  return schema;
70
74
  }
71
75
 
72
76
  const SAFE_INTEGER_MAX = Number.MAX_SAFE_INTEGER;
73
77
  const SAFE_INTEGER_MIN = Number.MIN_SAFE_INTEGER;
74
78
 
79
+ /** Keys whose values are a single JSON Schema (not an array or map). */
80
+ const SCHEMA_VALUE_KEYS = [
81
+ "additionalProperties",
82
+ "unevaluatedProperties",
83
+ "unevaluatedItems",
84
+ "items",
85
+ "contains",
86
+ "propertyNames",
87
+ "if",
88
+ "then",
89
+ "else",
90
+ "not",
91
+ ] as const;
92
+
93
+ /** Keys whose values are a map of `{ key: Schema }` entries. */
94
+ const SCHEMA_MAP_KEYS = ["properties", "patternProperties", "$defs", "definitions"] as const;
95
+
96
+ /** Keys whose values are an array of schemas. */
97
+ const SCHEMA_ARRAY_KEYS = ["anyOf", "oneOf", "allOf", "prefixItems"] as const;
98
+
99
+ /** True when `val` is a plain empty object `{}`. */
100
+ function isEmptyObject(val: unknown): val is Record<string, never> {
101
+ if (val === null || typeof val !== "object" || Array.isArray(val)) return false;
102
+ for (const _ in val as object) return false;
103
+ return true;
104
+ }
105
+
75
106
  function walk(node: unknown): void {
76
107
  if (Array.isArray(node)) {
77
108
  for (const child of node) walk(child);
@@ -107,6 +138,50 @@ function walk(node: unknown): void {
107
138
  for (const k in obj) walk(obj[k]);
108
139
  }
109
140
 
141
+ /**
142
+ * Normalize `{}` (empty JSON Schema = `z.unknown()` / unconstrained value) to
143
+ * boolean `true` in every schema-valued position. JSON Schema draft 2020-12
144
+ * §4.3.1: `{}` and `true` are semantically equivalent ("any JSON value").
145
+ * Grammar-constrained samplers (llama.cpp, etc.) treat the object form as
146
+ * "generate an empty object" rather than "any JSON value", causing open-typed
147
+ * fields like `extra.title` (from `z.record(z.string(), z.unknown())`) to
148
+ * always emit `{}` instead of the intended string/number/etc. (issue #1179).
149
+ *
150
+ * Mutates in place. Provider-agnostic — applied to every tool wire schema so
151
+ * Anthropic, Google, OpenAI, Ollama, Bedrock, and Cursor all see the
152
+ * normalized form, regardless of whether the source was Zod or TypeBox.
153
+ */
154
+ export function normalizeEmptySchemas(node: unknown): void {
155
+ if (Array.isArray(node)) {
156
+ for (const child of node) normalizeEmptySchemas(child);
157
+ return;
158
+ }
159
+ if (!node || typeof node !== "object") return;
160
+ const obj = node as Record<string, unknown>;
161
+
162
+ for (const key of SCHEMA_VALUE_KEYS) {
163
+ if (Object.hasOwn(obj, key) && isEmptyObject(obj[key])) obj[key] = true;
164
+ }
165
+ for (const mapKey of SCHEMA_MAP_KEYS) {
166
+ const map = obj[mapKey];
167
+ if (map !== null && typeof map === "object" && !Array.isArray(map)) {
168
+ for (const k in map as Record<string, unknown>) {
169
+ if (isEmptyObject((map as Record<string, unknown>)[k])) (map as Record<string, unknown>)[k] = true;
170
+ }
171
+ }
172
+ }
173
+ for (const arrKey of SCHEMA_ARRAY_KEYS) {
174
+ const arr = obj[arrKey];
175
+ if (Array.isArray(arr)) {
176
+ for (let i = 0; i < arr.length; i++) {
177
+ if (isEmptyObject(arr[i])) arr[i] = true;
178
+ }
179
+ }
180
+ }
181
+
182
+ for (const k in obj) normalizeEmptySchemas(obj[k]);
183
+ }
184
+
110
185
  /** Convert a Zod schema into the JSON Schema shape providers consume. */
111
186
  export function zodToWireSchema(schema: ZodType): Record<string, unknown> {
112
187
  return stamp(schema, kZodWireSchema, s => {
@@ -122,13 +197,17 @@ export function zodToWireSchema(schema: ZodType): Record<string, unknown> {
122
197
  * Resolve a tool's parameters to a JSON Schema object suitable for sending
123
198
  * over the wire. Zod schemas are converted (and cached); legacy TypeBox / raw
124
199
  * JSON Schema parameters are upgraded to draft 2020-12 (and cached).
200
+ *
201
+ * Both branches finish with `normalizeEmptySchemas` so every provider —
202
+ * OpenAI, Anthropic, Google, Ollama, Bedrock, Cursor — sees `{}` normalized
203
+ * to `true` in schema-valued positions (issue #1179).
125
204
  */
126
205
  export function toolWireSchema(tool: Tool): Record<string, unknown> {
127
206
  const params: TSchema = tool.parameters;
128
207
  if (isZodSchema(params)) return zodToWireSchema(params);
129
- return stamp(
130
- params as Record<string, unknown>,
131
- kJsonWireSchema,
132
- p => upgradeJsonSchemaTo202012(p) as Record<string, unknown>,
133
- );
208
+ return stamp(params as Record<string, unknown>, kJsonWireSchema, p => {
209
+ const upgraded = upgradeJsonSchemaTo202012(p) as Record<string, unknown>;
210
+ normalizeEmptySchemas(upgraded);
211
+ return upgraded;
212
+ });
134
213
  }