@oh-my-pi/pi-ai 16.0.1 → 16.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,33 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [16.0.3] - 2026-06-16
6
+
7
+ ### Added
8
+
9
+ - Exported `renderDelimitedThinking` from the `@oh-my-pi/pi-ai/dialect` barrel so consumers can reuse the dialect's `<thinking>` envelope unwrap-and-rewrap logic (the only `./dialect/rendering` primitive re-exported; the rest stay dialect-internal).
10
+
11
+ ### Fixed
12
+
13
+ - Fixed OpenAI Responses/Codex tool schema normalization stripping provider-rejected regex lookaround patterns from MCP tool parameter schemas. ([#2784](https://github.com/can1357/oh-my-pi/issues/2784))
14
+ - Fixed OpenAI Responses parallel tool-call routing so late keyed argument deltas for a closed call are dropped instead of being appended to another open call.
15
+
16
+ ## [16.0.2] - 2026-06-16
17
+
18
+ ### Added
19
+
20
+ - Added `UMANS_WEBSEARCH_PROVIDER=native|exa` support for routing Umans gateway-owned web search requests.
21
+
22
+ ### Fixed
23
+
24
+ - A single MCP tool whose input schema can't be emitted as a valid strict tool schema for the active provider no longer fails the whole turn with HTTP 400. `convertTools` (openai-responses) now validates each tool's emitted parameter schema for `enum`/`const`-vs-`type` contradictions that pass structural JSON-Schema validation but the provider rejects — e.g. a non-null `enum` on a `type: "null"` node, or an `enum` on an `array` node — and quarantines just the offending tool with a `logger.warn` naming the tool and schema path, keeping every other tool usable. Adds `findStrictToolSchemaViolation` to `@oh-my-pi/pi-ai/utils/schema` ([#2652](https://github.com/can1357/oh-my-pi/issues/2652))
25
+ - Fixed OpenAI Responses-compatible streams from Ollama/local hosts dropping arguments for parallel tool calls whose deltas use `fc_<call_id>` item ids, which left earlier `ast_grep` calls with `{}` and failed validation. ([#2715](https://github.com/can1357/oh-my-pi/issues/2715))
26
+ - Fixed dialect transcript rendering so literal thinking envelopes are unwrapped before adding the dialect's own thinking tags, preventing nested `<thinking>` output in advisor raw dumps ([#2700](https://github.com/can1357/oh-my-pi/issues/2700)).
27
+ - Fixed Anthropic-compatible Umans requests escaping client tool names and forwarding gateway web search headers so Kimi answers normally instead of returning raw gateway search results.
28
+ - Fixed Google Gemini tool calls with `toolChoice: "auto"` serializing an explicit `toolConfig` AUTO mode, which can cause Gemini-3 models to leak raw planning JSON instead of executing tools. ([#2776](https://github.com/can1357/oh-my-pi/issues/2776))
29
+ - Fixed OpenAI-compatible Ollama completions that return empty `finish_reason:length` after filling `num_ctx` so they surface an actionable context-window error instead of an empty length stop. ([#2774](https://github.com/can1357/oh-my-pi/issues/2774))
30
+ - Fixed Codex browser login issuing credentials for the `opencode` OAuth originator while OMP requests identify as `pi`, which could make the first authenticated Codex request return 401 ([#2696](https://github.com/can1357/oh-my-pi/issues/2696)).
31
+
5
32
  ## [16.0.1] - 2026-06-15
6
33
 
7
34
  ### Added
@@ -5,4 +5,5 @@ export * from "./factory";
5
5
  export * from "./history";
6
6
  export * from "./inventory";
7
7
  export * from "./owned-stream";
8
+ export { renderDelimitedThinking } from "./rendering";
8
9
  export * from "./types";
@@ -8,6 +8,8 @@ export interface AnthropicRequestOptions {
8
8
  timeout?: number;
9
9
  /** Per-request retry budget override. */
10
10
  maxRetries?: number;
11
+ /** Per-request headers merged after client defaults. */
12
+ headers?: Record<string, string>;
11
13
  }
12
14
  /**
13
15
  * Extra `RequestInit` fields merged into every fetch call. Bun extends
@@ -1,6 +1,6 @@
1
- import type { Model, ServiceTier, StreamFunction, StreamOptions, Tool, ToolChoice } from "../types";
1
+ import type { Context, Model, ProviderSessionState, ServiceTier, StreamFunction, StreamOptions, Tool, ToolChoice } from "../types";
2
2
  import { type OpenAIResponsesToolChoice } from "../utils/tool-choice";
3
- import type { Tool as OpenAITool } from "./openai-responses-wire";
3
+ import type { Tool as OpenAITool, ResponseCreateParamsStreaming, ResponseInput } from "./openai-responses-wire";
4
4
  export declare function normalizeOpenAIResponsesPromptCacheKey(sessionId: string | undefined): string | undefined;
5
5
  export interface OpenAIResponsesOptions extends StreamOptions {
6
6
  reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
@@ -56,11 +56,46 @@ export interface OpenAIResponsesOptions extends StreamOptions {
56
56
  */
57
57
  extraBody?: Record<string, unknown>;
58
58
  }
59
+ interface OpenAIResponsesProviderSessionState extends ProviderSessionState {
60
+ nativeHistoryReplayWarmed: boolean;
61
+ /** Stateful `previous_response_id` chain baselines, keyed by baseUrl/model/session. */
62
+ chains: Map<string, OpenAIResponsesChainState>;
63
+ }
64
+ interface OpenAIResponsesChainState {
65
+ /**
66
+ * Wire params of the last successful turn, with per-turn trailing
67
+ * scaffolding stripped from `input` (never carries previous_response_id).
68
+ */
69
+ lastParams?: OpenAIResponsesSamplingParams;
70
+ lastResponseId?: string;
71
+ /** Output items of the last response, in replay-sanitized form (matches next-turn input). */
72
+ lastResponseItems?: ResponseInput;
73
+ canAppend: boolean;
74
+ /** Consecutive stale-previous-response failures; reset on a successful chained completion. */
75
+ staleFailures: number;
76
+ /** Set once chaining is judged unsupported for this session (circuit breaker). */
77
+ disabled: boolean;
78
+ }
79
+ type OpenAIResponsesSamplingParams = ResponseCreateParamsStreaming & {
80
+ top_p?: number;
81
+ top_k?: number;
82
+ min_p?: number;
83
+ presence_penalty?: number;
84
+ repetition_penalty?: number;
85
+ stream_options?: {
86
+ include_obfuscation?: boolean;
87
+ };
88
+ };
59
89
  /**
60
90
  * Generate function for OpenAI Responses API
61
91
  */
62
92
  export declare const streamOpenAIResponses: StreamFunction<"openai-responses">;
63
93
  export declare function getOpenAIResponsesCacheSessionId(options: Pick<OpenAIResponsesOptions, "cacheRetention" | "sessionId" | "promptCacheKey"> | undefined): string | undefined;
94
+ /** @internal Exported for tests. */
95
+ export declare function buildParams(model: Model<"openai-responses">, context: Context, options: OpenAIResponsesOptions | undefined, providerSessionState: OpenAIResponsesProviderSessionState | undefined): {
96
+ params: OpenAIResponsesSamplingParams;
97
+ trailingScaffoldingItems: number;
98
+ };
64
99
  /**
65
100
  * Whether this model should get the OpenAI custom-tool grammar variant
66
101
  * for `apply_patch`. The generated model catalog sets
@@ -72,4 +107,5 @@ export declare function supportsFreeformApplyPatch(model: Model<"openai-response
72
107
  /** @internal Exported for tests. */
73
108
  export declare function mapOpenAIResponsesToolChoiceForTools(choice: ToolChoice | undefined, tools: Tool[], model: Model<"openai-responses">): OpenAIResponsesToolChoice;
74
109
  /** @internal Exported for tests. */
75
- export declare function convertTools(tools: Tool[], strictMode: boolean, model: Model<"openai-responses">): OpenAITool[];
110
+ export declare function convertTools(tools: Tool[], strictMode: boolean, model: Model<"openai-responses">, onQuarantine?: (toolName: string, schemaPath: string) => void): OpenAITool[];
111
+ export {};
@@ -1,10 +1,20 @@
1
+ /**
2
+ * OpenAI Codex (ChatGPT OAuth) flow — browser and device-code flows.
3
+ */
1
4
  import type { OAuthController, OAuthCredentials } from "./types";
2
5
  export declare function decodeJwt<T = Record<string, unknown>>(token: string): T | null;
6
+ /** Builds the Codex browser OAuth URL used by browser login; exported for auth regression tests. */
7
+ export declare function createOpenAICodexAuthorizationUrl(args: {
8
+ state: string;
9
+ redirectUri: string;
10
+ challenge: string;
11
+ originator?: string;
12
+ }): string;
3
13
  /**
4
14
  * Login with OpenAI Codex OAuth
5
15
  */
6
16
  export type OpenAICodexLoginOptions = OAuthController & {
7
- /** Optional originator value for OpenAI Codex OAuth. Default: "opencode". */
17
+ /** Optional originator value for OpenAI Codex OAuth. Default matches OMP Codex request headers. */
8
18
  originator?: string;
9
19
  };
10
20
  export declare function loginOpenAICodex(options: OpenAICodexLoginOptions): Promise<OAuthCredentials>;
@@ -25,11 +25,12 @@ import type { AssistantMessage } from "../types";
25
25
  * - Kimi For Coding: "exceeded model token limit: X (requested: Y)"
26
26
  * - Anthropic 413: "request_too_large" (request body exceeds size limit)
27
27
  * - HTTP 413: "Payload Too Large" / "Request Entity Too Large"
28
+ * - Ollama OpenAI-compatible: "prompt filled the context window"
28
29
  *
29
30
  * **Unreliable detection:**
30
31
  * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),
31
32
  * sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.
32
- * - Ollama: Silently truncates input without error. Cannot be detected via this function.
33
+ * - Ollama native: Silently truncates input without error. Cannot be detected via this function.
33
34
  * The response will have usage.input < expected, but we don't know the expected value.
34
35
  *
35
36
  * ## Custom Providers
@@ -8,6 +8,7 @@ export * from "./json-schema-validator";
8
8
  export * from "./meta-validator";
9
9
  export * from "./normalize";
10
10
  export * from "./spill";
11
+ export * from "./strict-tool-validation";
11
12
  export * from "./types";
12
13
  export * from "./typescript";
13
14
  export * from "./wire";
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Detects tool-parameter schemas that pass structural JSON-Schema validation
3
+ * (so {@link isValidJsonSchema} accepts them) yet make OpenAI-style providers
4
+ * reject the whole request with HTTP 400 — namely an `enum`/`const` whose
5
+ * value(s) cannot satisfy the node's declared `type`. MCP servers emit these
6
+ * when a nullable/array branch is built incorrectly (e.g. a non-null `enum`
7
+ * copied onto a `type: "null"` branch, or an `enum` placed on an `array`
8
+ * schema instead of its `items`). One such tool 400s the entire turn, so
9
+ * callers quarantine just the offending tool. See issue #2652.
10
+ */
11
+ /**
12
+ * Walk a tool parameter schema for OpenAI-strict `enum`/`const`-vs-`type`
13
+ * contradictions. Returns a JSON-pointer-ish path to the first offending node,
14
+ * or `null` when the schema is safe to emit.
15
+ */
16
+ export declare function findStrictToolSchemaViolation(schema: unknown, path?: string): string | null;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "16.0.1",
4
+ "version": "16.0.3",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -38,8 +38,8 @@
38
38
  },
39
39
  "dependencies": {
40
40
  "@bufbuild/protobuf": "^2.12.0",
41
- "@oh-my-pi/pi-catalog": "16.0.1",
42
- "@oh-my-pi/pi-utils": "16.0.1",
41
+ "@oh-my-pi/pi-catalog": "16.0.3",
42
+ "@oh-my-pi/pi-utils": "16.0.3",
43
43
  "partial-json": "^0.1.7",
44
44
  "zod": "^4"
45
45
  },
@@ -5,4 +5,9 @@ export * from "./factory";
5
5
  export * from "./history";
6
6
  export * from "./inventory";
7
7
  export * from "./owned-stream";
8
+ // `./rendering` is a dialect-internal primitives module deliberately excluded
9
+ // from the barrel. `renderDelimitedThinking` is the one helper an external
10
+ // consumer needs (the legacy markdown `/dump` reuses its `<thinking>` envelope
11
+ // unwrap), so re-export only that symbol rather than `export *`-ing the rest.
12
+ export { renderDelimitedThinking } from "./rendering";
8
13
  export * from "./types";
@@ -157,9 +157,64 @@ export function messageContentText(
157
157
  return text;
158
158
  }
159
159
 
160
+ function isAsciiWhitespace(code: number): boolean {
161
+ return code === 9 || code === 10 || code === 11 || code === 12 || code === 13 || code === 32;
162
+ }
163
+
164
+ function trimAsciiStart(text: string, start: number, end: number): number {
165
+ let cursor = start;
166
+ while (cursor < end && isAsciiWhitespace(text.charCodeAt(cursor))) cursor++;
167
+ return cursor;
168
+ }
169
+
170
+ function trimAsciiEnd(text: string, start: number, end: number): number {
171
+ let cursor = end;
172
+ while (cursor > start && isAsciiWhitespace(text.charCodeAt(cursor - 1))) cursor--;
173
+ return cursor;
174
+ }
175
+
176
+ function findDelimitedThinkingClose(open: string, close: string, text: string, start: number, end: number): number {
177
+ let depth = 1;
178
+ let cursor = start;
179
+ while (cursor < end) {
180
+ const nextClose = text.indexOf(close, cursor);
181
+ if (nextClose < 0 || nextClose >= end) return -1;
182
+ const nextOpen = text.indexOf(open, cursor);
183
+ if (nextOpen >= 0 && nextOpen < nextClose) {
184
+ depth++;
185
+ cursor = nextOpen + open.length;
186
+ continue;
187
+ }
188
+ depth--;
189
+ if (depth === 0) return nextClose;
190
+ cursor = nextClose + close.length;
191
+ }
192
+ return -1;
193
+ }
194
+
195
+ function unwrapDelimitedThinking(open: string, close: string, text: string): string {
196
+ const end = trimAsciiEnd(text, 0, text.length);
197
+ let cursor = trimAsciiStart(text, 0, end);
198
+ if (cursor >= end || !text.startsWith(open, cursor)) return text;
199
+
200
+ const segments: string[] = [];
201
+ while (cursor < end) {
202
+ if (!text.startsWith(open, cursor)) return text;
203
+ const innerStart = cursor + open.length;
204
+ const innerEnd = findDelimitedThinkingClose(open, close, text, innerStart, end);
205
+ if (innerEnd < 0) return text;
206
+
207
+ const trimmedInnerEnd = trimAsciiEnd(text, innerStart, innerEnd);
208
+ const trimmedInnerStart = trimAsciiStart(text, innerStart, trimmedInnerEnd);
209
+ segments.push(unwrapDelimitedThinking(open, close, text.slice(trimmedInnerStart, trimmedInnerEnd)));
210
+ cursor = trimAsciiStart(text, innerEnd + close.length, end);
211
+ }
212
+ return segments.join("\n");
213
+ }
214
+
160
215
  export function renderDelimitedThinking(open: string, close: string, text: string): string {
161
216
  if (!text) return "";
162
- return `${open}\n${text}\n${close}`;
217
+ return `${open}\n${unwrapDelimitedThinking(open, close, text)}\n${close}`;
163
218
  }
164
219
 
165
220
  export function chatMlTurn(role: "assistant" | "system" | "tool" | "user", body: string): string {
@@ -39,6 +39,8 @@ export interface AnthropicRequestOptions {
39
39
  timeout?: number;
40
40
  /** Per-request retry budget override. */
41
41
  maxRetries?: number;
42
+ /** Per-request headers merged after client defaults. */
43
+ headers?: Record<string, string>;
42
44
  }
43
45
 
44
46
  /**
@@ -217,7 +219,7 @@ export class AnthropicMessagesClient implements AnthropicMessagesClientLike {
217
219
  return new AnthropicApiRequest(() => this.#send(path, params, options));
218
220
  }
219
221
 
220
- #buildHeaders(): Record<string, string> {
222
+ #buildHeaders(requestHeaders?: Record<string, string>): Record<string, string> {
221
223
  const opts = this.#options;
222
224
  const defaults = opts.defaultHeaders ?? {};
223
225
  const headers: Record<string, string> = {};
@@ -228,6 +230,7 @@ export class AnthropicMessagesClient implements AnthropicMessagesClientLike {
228
230
  headers.Authorization = `Bearer ${opts.authToken}`;
229
231
  }
230
232
  Object.assign(headers, defaults);
233
+ Object.assign(headers, requestHeaders);
231
234
  return headers;
232
235
  }
233
236
 
@@ -242,7 +245,7 @@ export class AnthropicMessagesClient implements AnthropicMessagesClientLike {
242
245
  const timeoutMs = options?.timeout ?? opts.timeout ?? DEFAULT_TIMEOUT_MS;
243
246
  const maxRetries = Math.max(0, options?.maxRetries ?? opts.maxRetries ?? DEFAULT_MAX_RETRIES);
244
247
  const url = `${opts.baseURL ?? "https://api.anthropic.com"}${path}`;
245
- const headers = this.#buildHeaders();
248
+ const headers = this.#buildHeaders(options?.headers);
246
249
  const body = JSON.stringify(params);
247
250
 
248
251
  for (let attempt = 0; ; attempt++) {
@@ -704,6 +704,8 @@ export function resolveAnthropicMetadataUserId(
704
704
  return generateClaudeJsonUserId(sessionId, accountId);
705
705
  }
706
706
  const ANTHROPIC_BUILTIN_TOOL_NAMES = new Set(["web_search", "code_execution", "text_editor", "computer"]);
707
+ const UMANS_WEBSEARCH_PROVIDER_HEADER = "X-Umans-Websearch-Provider";
708
+ const UMANS_WEBSEARCH_TOOL_NAME = "web_search";
707
709
  export const applyClaudeToolPrefix = (name: string): string => {
708
710
  if (!claudeToolPrefix) return name;
709
711
  if (ANTHROPIC_BUILTIN_TOOL_NAMES.has(name.toLowerCase())) return name;
@@ -721,6 +723,50 @@ export const stripClaudeToolPrefix = (name: string): string => {
721
723
  return name.slice(claudeToolPrefix.length);
722
724
  };
723
725
 
726
+ function normalizeUmansWebSearchProvider(value: string | undefined): "native" | "exa" | undefined {
727
+ const normalized = value?.trim().toLowerCase();
728
+ return normalized === "native" || normalized === "exa" ? normalized : undefined;
729
+ }
730
+
731
+ function getUmansWebSearchProvider(headers: Record<string, string> | undefined): "native" | "exa" | undefined {
732
+ const explicit = getHeaderCaseInsensitive(headers, UMANS_WEBSEARCH_PROVIDER_HEADER);
733
+ if (explicit !== undefined) return normalizeUmansWebSearchProvider(explicit);
734
+ return normalizeUmansWebSearchProvider($env.UMANS_WEBSEARCH_PROVIDER);
735
+ }
736
+
737
+ function isUmansAnthropicModel(model: Model<"anthropic-messages">): boolean {
738
+ return model.provider === "umans" || model.baseUrl.toLowerCase().includes("api.code.umans.ai");
739
+ }
740
+
741
+ function getUmansWebSearchHeader(
742
+ model: Model<"anthropic-messages">,
743
+ headers: Record<string, string> | undefined,
744
+ ): Record<string, string> | undefined {
745
+ if (!isUmansAnthropicModel(model)) return undefined;
746
+ const provider = getUmansWebSearchProvider(headers);
747
+ return provider ? { [UMANS_WEBSEARCH_PROVIDER_HEADER]: provider } : undefined;
748
+ }
749
+
750
+ function shouldUseUmansGatewayWebSearch(name: string, enabled: boolean): boolean {
751
+ return enabled && name.toLowerCase() === UMANS_WEBSEARCH_TOOL_NAME;
752
+ }
753
+
754
+ function encodeAnthropicToolName(
755
+ name: string,
756
+ isOAuthToken: boolean,
757
+ escapeBuiltinToolNames: boolean,
758
+ useUmansGatewayWebSearch = false,
759
+ ): string {
760
+ if (shouldUseUmansGatewayWebSearch(name, useUmansGatewayWebSearch)) return name;
761
+ if (escapeBuiltinToolNames) return `${claudeToolPrefix}${name}`;
762
+ return isOAuthToken ? applyClaudeToolPrefix(name) : name;
763
+ }
764
+
765
+ function decodeAnthropicToolName(name: string, isOAuthToken: boolean, escapeBuiltinToolNames: boolean): string {
766
+ if (isOAuthToken || escapeBuiltinToolNames) return stripClaudeToolPrefix(name);
767
+ return name;
768
+ }
769
+
724
770
  const ANTHROPIC_MANY_IMAGE_THRESHOLD = 20;
725
771
  const ANTHROPIC_MANY_IMAGE_MAX_DIMENSION = 2000;
726
772
 
@@ -1467,10 +1513,12 @@ const THINKING_ENVELOPE_CLOSE = "</thinking>";
1467
1513
 
1468
1514
  function unwrapAnthropicThinkingEnvelope(text: string): string | undefined {
1469
1515
  let current = text.trim();
1516
+ let stripped = false;
1470
1517
  while (current.startsWith(THINKING_ENVELOPE_OPEN) && current.endsWith(THINKING_ENVELOPE_CLOSE)) {
1471
1518
  current = current.slice(THINKING_ENVELOPE_OPEN.length, current.length - THINKING_ENVELOPE_CLOSE.length).trim();
1519
+ stripped = true;
1472
1520
  }
1473
- return current === text ? undefined : current;
1521
+ return stripped ? current : undefined;
1474
1522
  }
1475
1523
 
1476
1524
  function createEmptyUsage(premiumRequests?: number): Usage {
@@ -1578,6 +1626,8 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1578
1626
  let disableStrictTools =
1579
1627
  (providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
1580
1628
  let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
1629
+ const mergedCallerHeaders = mergeHeaders(model.headers, options?.headers);
1630
+ const umansGatewayWebSearchHeader = getUmansWebSearchHeader(model, mergedCallerHeaders);
1581
1631
 
1582
1632
  let client: AnthropicMessagesClientLike;
1583
1633
  let isOAuthToken: boolean;
@@ -1639,7 +1689,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1639
1689
  }
1640
1690
  const preparedContext = await prepareAnthropicManyImageContext(context, model.input.includes("image"));
1641
1691
  const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
1642
- let nextParams = buildParams(model, preparedContext, isOAuthToken, options, disableStrictTools);
1692
+ let nextParams = buildParams(
1693
+ model,
1694
+ preparedContext,
1695
+ isOAuthToken,
1696
+ options,
1697
+ disableStrictTools,
1698
+ umansGatewayWebSearchHeader !== undefined,
1699
+ );
1643
1700
  if (disableStrictTools) {
1644
1701
  dropAnthropicStrictTools(nextParams);
1645
1702
  }
@@ -1717,7 +1774,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1717
1774
  // to zero even when no watchdog timeout is configured (the helper only
1718
1775
  // pins it alongside a timeout; a client retry budget of 5 would otherwise
1719
1776
  // multiply with PROVIDER_MAX_RETRIES into up to 66 wire attempts).
1720
- const requestOptions = { ...createSdkStreamRequestOptions(requestSignal, requestTimeoutMs), maxRetries: 0 };
1777
+ const requestOptions = {
1778
+ ...createSdkStreamRequestOptions(requestSignal, requestTimeoutMs),
1779
+ maxRetries: 0,
1780
+ ...(umansGatewayWebSearchHeader ? { headers: umansGatewayWebSearchHeader } : {}),
1781
+ };
1721
1782
  const anthropicRequest: unknown =
1722
1783
  isOAuthToken && client.beta
1723
1784
  ? client.beta.messages.create({ ...params, stream: true }, requestOptions)
@@ -1900,9 +1961,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1900
1961
  const block: Block = {
1901
1962
  type: "toolCall",
1902
1963
  id: event.content_block.id,
1903
- name: isOAuthToken
1904
- ? stripClaudeToolPrefix(event.content_block.name)
1905
- : event.content_block.name,
1964
+ name: decodeAnthropicToolName(
1965
+ event.content_block.name,
1966
+ isOAuthToken,
1967
+ model.compat.escapeBuiltinToolNames,
1968
+ ),
1906
1969
  arguments: event.content_block.input ?? {},
1907
1970
  partialJson: "",
1908
1971
  index: event.index,
@@ -2377,7 +2440,13 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
2377
2440
  isOAuth: oauthToken,
2378
2441
  extraBetas: betaFeatures,
2379
2442
  stream,
2380
- modelHeaders: mergeHeaders(model.headers, foundryCustomHeaders, headers, dynamicHeaders),
2443
+ modelHeaders: mergeHeaders(
2444
+ model.headers,
2445
+ foundryCustomHeaders,
2446
+ getUmansWebSearchHeader(model, mergeHeaders(model.headers, headers)),
2447
+ headers,
2448
+ dynamicHeaders,
2449
+ ),
2381
2450
  isCloudflareAiGateway: model.provider === "cloudflare-ai-gateway",
2382
2451
  claudeCodeSessionId,
2383
2452
  claudeCodeBetas: oauthToken
@@ -2744,6 +2813,7 @@ function buildParams(
2744
2813
  isOAuthToken: boolean,
2745
2814
  options?: AnthropicOptions,
2746
2815
  disableStrictTools = false,
2816
+ useUmansGatewayWebSearch = false,
2747
2817
  ): MessageCreateParamsStreaming {
2748
2818
  const { cacheControl } = getCacheControl(model, options?.cacheRetention, isOAuthToken);
2749
2819
 
@@ -2765,6 +2835,8 @@ function buildParams(
2765
2835
  isOAuthToken,
2766
2836
  disableStrictTools || model.provider === "github-copilot",
2767
2837
  model.compat.supportsEagerToolInputStreaming,
2838
+ model.compat.escapeBuiltinToolNames,
2839
+ useUmansGatewayWebSearch,
2768
2840
  );
2769
2841
  } else if (isOAuthToken) {
2770
2842
  tools = [];
@@ -2890,10 +2962,16 @@ function buildParams(
2890
2962
  if (options?.toolChoice) {
2891
2963
  if (typeof options.toolChoice === "string") {
2892
2964
  params.tool_choice = { type: options.toolChoice };
2893
- } else if (isOAuthToken && options.toolChoice.name) {
2894
- params.tool_choice = { ...options.toolChoice, name: applyClaudeToolPrefix(options.toolChoice.name) };
2895
- } else {
2896
- params.tool_choice = options.toolChoice;
2965
+ } else if (options.toolChoice.name) {
2966
+ params.tool_choice = {
2967
+ ...options.toolChoice,
2968
+ name: encodeAnthropicToolName(
2969
+ options.toolChoice.name,
2970
+ isOAuthToken,
2971
+ model.compat.escapeBuiltinToolNames,
2972
+ useUmansGatewayWebSearch,
2973
+ ),
2974
+ };
2897
2975
  }
2898
2976
  // Claude Fable/Mythos 5 reject forced tool use outright ("tool_choice forces
2899
2977
  // tool use is not compatible with this model"). Downgrade any/tool → auto so the
@@ -3098,7 +3176,7 @@ export function convertAnthropicMessages(
3098
3176
  blocks.push({
3099
3177
  type: "tool_use",
3100
3178
  id: block.id,
3101
- name: isOAuthToken ? applyClaudeToolPrefix(block.name) : block.name,
3179
+ name: encodeAnthropicToolName(block.name, isOAuthToken, model.compat.escapeBuiltinToolNames),
3102
3180
  // Always sanitize: the model itself can emit lone-surrogate escapes
3103
3181
  // in tool-argument JSON (streamed out fine, rejected with a 400 on
3104
3182
  // replay by Anthropic's strict UTF-8 validation). toWellFormedDeep
@@ -3684,6 +3762,8 @@ function convertTools(
3684
3762
  isOAuthToken: boolean,
3685
3763
  disableStrictTools = false,
3686
3764
  supportsEagerToolInputStreaming = true,
3765
+ escapeBuiltinToolNames = false,
3766
+ useUmansGatewayWebSearch = false,
3687
3767
  ): AnthropicWireTool[] {
3688
3768
  if (!tools) return [];
3689
3769
  const schemaPlans = buildAnthropicToolSchemaPlans(tools, disableStrictTools);
@@ -3691,7 +3771,7 @@ function convertTools(
3691
3771
  return tools.map((tool, index) => {
3692
3772
  const plan = schemaPlans[index];
3693
3773
  const baseTool = {
3694
- name: isOAuthToken ? applyClaudeToolPrefix(tool.name) : tool.name,
3774
+ name: encodeAnthropicToolName(tool.name, isOAuthToken, escapeBuiltinToolNames, useUmansGatewayWebSearch),
3695
3775
  description: tool.description || "",
3696
3776
  input_schema: plan.inputSchema,
3697
3777
  };
@@ -793,9 +793,12 @@ export function buildGoogleGenerateContentParams<T extends "google-generative-ai
793
793
  if (context.tools && context.tools.length > 0 && options.toolChoice) {
794
794
  const choice = options.toolChoice;
795
795
  if (typeof choice === "string") {
796
- config.toolConfig = {
797
- functionCallingConfig: { mode: mapToolChoice(choice) },
798
- };
796
+ const mode = mapToolChoice(choice);
797
+ if (mode !== "AUTO") {
798
+ config.toolConfig = {
799
+ functionCallingConfig: { mode },
800
+ };
801
+ }
799
802
  } else {
800
803
  // Named-tool routing — `mode: "ANY"` plus an explicit allow-list. The
801
804
  // caller is responsible for ensuring the names exist in `context.tools`.
@@ -1162,6 +1162,10 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
1162
1162
  output.stopReason = "toolUse";
1163
1163
  }
1164
1164
 
1165
+ if (model.provider === "ollama" && output.stopReason === "length" && !hasVisibleCompletionContent(output)) {
1166
+ output.stopReason = "error";
1167
+ output.errorMessage = EMPTY_OLLAMA_LENGTH_COMPLETION_MESSAGE;
1168
+ }
1165
1169
  const firstEventTimeoutError = abortTracker.getLocalAbortReason();
1166
1170
  if (firstEventTimeoutError) {
1167
1171
  throw firstEventTimeoutError;
@@ -2196,6 +2200,19 @@ function shouldRetryWithoutStrictTools(
2196
2200
  );
2197
2201
  }
2198
2202
 
2203
+ const NON_WHITESPACE_RE = /\S/;
2204
+
2205
+ function hasVisibleCompletionContent(message: AssistantMessage): boolean {
2206
+ for (const block of message.content) {
2207
+ if (block.type === "toolCall") return true;
2208
+ if (block.type === "text" && NON_WHITESPACE_RE.test(block.text)) return true;
2209
+ }
2210
+ return false;
2211
+ }
2212
+
2213
+ const EMPTY_OLLAMA_LENGTH_COMPLETION_MESSAGE =
2214
+ "Model returned no content: prompt filled the context window; raise Ollama num_ctx or shorten the prompt.";
2215
+
2199
2216
  function mapStopReason(reason: ChatCompletionChunk.Choice["finish_reason"] | string): {
2200
2217
  stopReason: StopReason;
2201
2218
  errorMessage?: string;
@@ -490,30 +490,42 @@ export async function processResponsesStream<TApi extends Api>(
490
490
  // function_call deltas interleaved, and a singleton `current` reference would
491
491
  // fold them into the wrong block and drop arguments on every call but the last.
492
492
  //
493
- // llama.cpp's `to_json_oaicompat_resp` (issue #2015) compounds this: `output_item.added`
494
- // for function_call/custom_tool_call carries `item.call_id` but no `item.id` and no
495
- // `output_index`, while the matching `function_call_arguments.delta` carries
496
- // `item_id = "fc_<call_id>"`. Registering function-call items by `call_id` as a
497
- // secondary key lets the delta lookup find the right block on hosts that emit one
498
- // identifier but not the other.
493
+ // OpenAI-compatible hosts can compound this by omitting `item.id` and
494
+ // `output_index` on `output_item.added` while routing later argument deltas to
495
+ // either the bare `call_id` or a synthesized `fc_<call_id>` item id. Register
496
+ // both keys so each delta reaches its own block instead of falling back to the
497
+ // most recently added parallel call.
499
498
  const openItemsByOutputIndex = new Map<number, StreamingItem>();
500
499
  const openItemsByItemId = new Map<string, StreamingItem>();
500
+ const openItemsByPrefixedCallId = new Map<string, StreamingItem>();
501
501
  let lastOpenItem: StreamingItem | null = null;
502
502
  const openItemsInOrder: StreamingItem[] = [];
503
503
 
504
+ const prefixedFunctionCallItemKey = (callId: string | undefined): string | undefined =>
505
+ callId ? `fc_${callId}` : undefined;
506
+
504
507
  const registerOpenItem = (
505
508
  outputIndex: number | undefined,
506
509
  itemId: string | undefined,
507
510
  entry: StreamingItem,
508
511
  alternateItemKey?: string,
512
+ prefixedAlternateItemKey?: string,
509
513
  ): void => {
510
514
  if (typeof outputIndex === "number") openItemsByOutputIndex.set(outputIndex, entry);
511
515
  if (itemId) openItemsByItemId.set(itemId, entry);
512
516
  if (alternateItemKey && alternateItemKey !== itemId) openItemsByItemId.set(alternateItemKey, entry);
517
+ if (
518
+ prefixedAlternateItemKey &&
519
+ prefixedAlternateItemKey !== itemId &&
520
+ prefixedAlternateItemKey !== alternateItemKey
521
+ ) {
522
+ openItemsByPrefixedCallId.set(prefixedAlternateItemKey, entry);
523
+ }
513
524
  openItemsInOrder.push(entry);
514
525
  lastOpenItem = entry;
515
526
  };
516
527
  const lookupOpenItem = (event: { output_index?: number; item_id?: string }): StreamingItem | undefined => {
528
+ const hasKey = typeof event.output_index === "number" || event.item_id !== undefined;
517
529
  if (typeof event.output_index === "number") {
518
530
  const found = openItemsByOutputIndex.get(event.output_index);
519
531
  if (found) return found;
@@ -522,16 +534,43 @@ export async function processResponsesStream<TApi extends Api>(
522
534
  const found = openItemsByItemId.get(event.item_id);
523
535
  if (found) return found;
524
536
  }
525
- // Fallback for tests / mock providers that omit identifiers on stream events.
526
- return lastOpenItem ?? undefined;
537
+ // Keyed events whose item already closed are stale; drop them instead of
538
+ // routing to a sibling. Only fully identifierless mock/proxy events use the
539
+ // legacy singleton fallback.
540
+ return hasKey ? undefined : (lastOpenItem ?? undefined);
527
541
  };
528
542
  const hasOpenItemKey = (event: { output_index?: number; item_id?: string }): boolean =>
529
543
  typeof event.output_index === "number" || event.item_id !== undefined;
544
+ const lookupOpenToolCallAlias = (
545
+ event: { output_index?: number; item_id?: string },
546
+ type: "function_call" | "custom_tool_call",
547
+ ): StreamingItem | undefined => {
548
+ if (typeof event.output_index === "number") {
549
+ const byOutputIndex = openItemsByOutputIndex.get(event.output_index);
550
+ if (byOutputIndex) return byOutputIndex;
551
+ // A lossy host (llama.cpp/Ollama, issue #2015) can omit `output_index` on
552
+ // `output_item.added` while still stamping the spec-required field on the
553
+ // delta. The index was never registered, so fall through to the prefixed
554
+ // alias / exact item-id maps instead of dropping to `lastOpenItem`.
555
+ }
556
+ if (event.item_id) {
557
+ // Prefixed call-id aliases share the same wire namespace as real call ids.
558
+ // Argument/input events can use the prefixed form, while final
559
+ // output_item.done events below use exact call ids; keep aliases in a
560
+ // separate map so a real `call_id: "fc_x"` cannot overwrite the alias
561
+ // for `call_id: "x"`.
562
+ const alias = openItemsByPrefixedCallId.get(event.item_id);
563
+ if (alias?.item.type === type) return alias;
564
+ const exact = openItemsByItemId.get(event.item_id);
565
+ if (exact) return exact;
566
+ }
567
+ return lookupOpenItem(event);
568
+ };
530
569
  const lookupOpenFunctionCallItem = (event: {
531
570
  output_index?: number;
532
571
  item_id?: string;
533
572
  }): StreamingItem | undefined => {
534
- if (hasOpenItemKey(event)) return lookupOpenItem(event);
573
+ if (hasOpenItemKey(event)) return lookupOpenToolCallAlias(event, "function_call");
535
574
  for (const candidate of openItemsInOrder) {
536
575
  if (
537
576
  candidate.item.type === "function_call" &&
@@ -548,10 +587,19 @@ export async function processResponsesStream<TApi extends Api>(
548
587
  itemId: string | undefined,
549
588
  entry: StreamingItem | undefined,
550
589
  alternateItemKey?: string,
590
+ prefixedAlternateItemKey?: string,
551
591
  ): void => {
552
592
  if (typeof outputIndex === "number") openItemsByOutputIndex.delete(outputIndex);
553
593
  if (itemId) openItemsByItemId.delete(itemId);
554
594
  if (alternateItemKey && alternateItemKey !== itemId) openItemsByItemId.delete(alternateItemKey);
595
+ if (
596
+ prefixedAlternateItemKey &&
597
+ prefixedAlternateItemKey !== itemId &&
598
+ prefixedAlternateItemKey !== alternateItemKey &&
599
+ openItemsByPrefixedCallId.get(prefixedAlternateItemKey) === entry
600
+ ) {
601
+ openItemsByPrefixedCallId.delete(prefixedAlternateItemKey);
602
+ }
555
603
  if (entry) {
556
604
  const index = openItemsInOrder.indexOf(entry);
557
605
  if (index >= 0) openItemsInOrder.splice(index, 1);
@@ -591,7 +639,13 @@ export async function processResponsesStream<TApi extends Api>(
591
639
  partialJson: item.arguments || "",
592
640
  };
593
641
  output.content.push(block);
594
- registerOpenItem(event.output_index, item.id, { item, block }, item.call_id);
642
+ registerOpenItem(
643
+ event.output_index,
644
+ item.id,
645
+ { item, block },
646
+ item.call_id,
647
+ prefixedFunctionCallItemKey(item.call_id),
648
+ );
595
649
  stream.push({ type: "toolcall_start", contentIndex: contentIndexOf(block), partial: output });
596
650
  } else if (item.type === "custom_tool_call") {
597
651
  const block: StreamingToolCallBlock = {
@@ -609,7 +663,13 @@ export async function processResponsesStream<TApi extends Api>(
609
663
  partialJson: item.input ?? "",
610
664
  };
611
665
  output.content.push(block);
612
- registerOpenItem(event.output_index, item.id, { item, block }, item.call_id);
666
+ registerOpenItem(
667
+ event.output_index,
668
+ item.id,
669
+ { item, block },
670
+ item.call_id,
671
+ prefixedFunctionCallItemKey(item.call_id),
672
+ );
613
673
  stream.push({ type: "toolcall_start", contentIndex: contentIndexOf(block), partial: output });
614
674
  }
615
675
  } else if (event.type === "response.reasoning_summary_part.added") {
@@ -739,7 +799,7 @@ export async function processResponsesStream<TApi extends Api>(
739
799
  delete (block as { lastParseLen?: number }).lastParseLen;
740
800
  }
741
801
  } else if (event.type === "response.custom_tool_call_input.delta") {
742
- const entry = lookupOpenItem(event);
802
+ const entry = lookupOpenToolCallAlias(event, "custom_tool_call");
743
803
  if (entry?.item.type === "custom_tool_call" && entry.block.type === "toolCall") {
744
804
  const block = entry.block;
745
805
  block.partialJson += event.delta;
@@ -752,7 +812,7 @@ export async function processResponsesStream<TApi extends Api>(
752
812
  });
753
813
  }
754
814
  } else if (event.type === "response.custom_tool_call_input.done") {
755
- const entry = lookupOpenItem(event);
815
+ const entry = lookupOpenToolCallAlias(event, "custom_tool_call");
756
816
  if (entry?.item.type === "custom_tool_call" && entry.block.type === "toolCall") {
757
817
  entry.block.partialJson = event.input;
758
818
  entry.block.arguments = { input: event.input };
@@ -842,7 +902,7 @@ export async function processResponsesStream<TApi extends Api>(
842
902
  output.content.push(toolCall);
843
903
  contentIndex = output.content.length - 1;
844
904
  }
845
- closeOpenItem(event.output_index, item.id, entry, item.call_id);
905
+ closeOpenItem(event.output_index, item.id, entry, item.call_id, prefixedFunctionCallItemKey(item.call_id));
846
906
  stream.push({ type: "toolcall_end", contentIndex, toolCall, partial: output });
847
907
  } else if (item.type === "custom_tool_call") {
848
908
  const block = entry?.block.type === "toolCall" ? entry.block : undefined;
@@ -866,7 +926,7 @@ export async function processResponsesStream<TApi extends Api>(
866
926
  output.content.push(toolCall);
867
927
  contentIndex = output.content.length - 1;
868
928
  }
869
- closeOpenItem(event.output_index, item.id, entry, item.call_id);
929
+ closeOpenItem(event.output_index, item.id, entry, item.call_id, prefixedFunctionCallItemKey(item.call_id));
870
930
  stream.push({ type: "toolcall_end", contentIndex, toolCall, partial: output });
871
931
  }
872
932
  } else if (event.type === "response.completed" || event.type === "response.incomplete") {
@@ -34,7 +34,13 @@ import {
34
34
  import { postOpenAIStream } from "../utils/openai-http";
35
35
  import { notifyProviderResponse } from "../utils/provider-response";
36
36
  import { callWithCopilotModelRetry } from "../utils/retry";
37
- import { adaptSchemaForStrict, NO_STRICT, sanitizeSchemaForOpenAIResponses, toolWireSchema } from "../utils/schema";
37
+ import {
38
+ adaptSchemaForStrict,
39
+ findStrictToolSchemaViolation,
40
+ NO_STRICT,
41
+ sanitizeSchemaForOpenAIResponses,
42
+ toolWireSchema,
43
+ } from "../utils/schema";
38
44
  import { mapToOpenAIResponsesToolChoice, type OpenAIResponsesToolChoice } from "../utils/tool-choice";
39
45
  import {
40
46
  buildCopilotDynamicHeaders,
@@ -661,7 +667,8 @@ function getOpenAIResponsesRoutingSessionId(
661
667
  return normalizeOpenAIResponsesPromptCacheKey(options?.sessionId);
662
668
  }
663
669
 
664
- function buildParams(
670
+ /** @internal Exported for tests. */
671
+ export function buildParams(
665
672
  model: Model<"openai-responses">,
666
673
  context: Context,
667
674
  options: OpenAIResponsesOptions | undefined,
@@ -714,7 +721,21 @@ function buildParams(
714
721
  if (context.tools) {
715
722
  params.tools = convertTools(context.tools, model.compat.supportsStrictMode, model);
716
723
  if (options?.toolChoice) {
717
- params.tool_choice = mapOpenAIResponsesToolChoiceForTools(options.toolChoice, context.tools, model);
724
+ // Map tool_choice against the tools that survived quarantine, not the
725
+ // original list: a forced choice for a dropped tool — or "required" when
726
+ // every tool was dropped — would otherwise send a tool_choice with no
727
+ // matching tool, which the provider rejects just like the bad schema did (#2652).
728
+ const emittedNames = new Set(
729
+ params.tools.map(t => (t as { name?: string }).name).filter((n): n is string => n !== undefined),
730
+ );
731
+ const survivingTools =
732
+ params.tools.length === context.tools.length
733
+ ? context.tools
734
+ : context.tools.filter(t => emittedNames.has(t.customWireName ?? t.name));
735
+ const toolChoice = mapOpenAIResponsesToolChoiceForTools(options.toolChoice, survivingTools, model);
736
+ if (toolChoice !== undefined && params.tools.length > 0) {
737
+ params.tool_choice = toolChoice;
738
+ }
718
739
  }
719
740
  // The apply_patch spec §1 marks only `apply_patch` itself as
720
741
  // `supports_parallel_tool_calls = false`. OpenAI's Responses API
@@ -861,11 +882,20 @@ export function mapOpenAIResponsesToolChoiceForTools(
861
882
  }
862
883
 
863
884
  /** @internal Exported for tests. */
864
- export function convertTools(tools: Tool[], strictMode: boolean, model: Model<"openai-responses">): OpenAITool[] {
885
+ export function convertTools(
886
+ tools: Tool[],
887
+ strictMode: boolean,
888
+ model: Model<"openai-responses">,
889
+ onQuarantine: (toolName: string, schemaPath: string) => void = (toolName, schemaPath) =>
890
+ logger.warn(
891
+ `Tool "${toolName}" omitted from the openai-responses request: its parameter schema is invalid for this provider at ${schemaPath} (an enum/const value cannot match its declared type). Other tools are unaffected.`,
892
+ ),
893
+ ): OpenAITool[] {
865
894
  const allowFreeform = supportsFreeformApplyPatch(model);
866
- return tools.map(tool => {
895
+ const out: OpenAITool[] = [];
896
+ for (const tool of tools) {
867
897
  if (allowFreeform && tool.customFormat) {
868
- return {
898
+ out.push({
869
899
  type: "custom",
870
900
  // Tool advertises its wire-level name (e.g. `apply_patch`) — the
871
901
  // agent-loop dispatcher will match incoming calls by either the
@@ -877,18 +907,29 @@ export function convertTools(tools: Tool[], strictMode: boolean, model: Model<"o
877
907
  syntax: tool.customFormat.syntax,
878
908
  definition: compactGrammarDefinition(tool.customFormat.syntax, tool.customFormat.definition),
879
909
  },
880
- } as unknown as OpenAITool;
910
+ } as unknown as OpenAITool);
911
+ continue;
881
912
  }
882
913
  const strict = !NO_STRICT && strictMode && tool.strict !== false;
883
914
  const baseParameters = toolWireSchema(tool);
884
915
  const responseParameters = sanitizeSchemaForOpenAIResponses(baseParameters);
885
916
  const { schema: parameters, strict: effectiveStrict } = adaptSchemaForStrict(responseParameters, strict);
886
- return {
917
+ // Quarantine a tool whose emitted schema carries a provider-rejecting
918
+ // enum/const-vs-type contradiction: dropping just that tool keeps the rest
919
+ // of the request valid instead of letting one bad MCP schema 400 the whole
920
+ // turn (#2652). Other tools and built-ins are unaffected.
921
+ const violation = findStrictToolSchemaViolation(parameters);
922
+ if (violation) {
923
+ onQuarantine(tool.name, violation);
924
+ continue;
925
+ }
926
+ out.push({
887
927
  type: "function",
888
928
  name: tool.name,
889
929
  description: tool.description || "",
890
930
  parameters,
891
931
  ...(effectiveStrict && { strict: true }),
892
- } as OpenAITool;
893
- });
932
+ } as OpenAITool);
933
+ }
934
+ return out;
894
935
  }
@@ -1,6 +1,8 @@
1
1
  /**
2
2
  * OpenAI Codex (ChatGPT OAuth) flow — browser and device-code flows.
3
3
  */
4
+
5
+ import { OPENAI_HEADER_VALUES } from "@oh-my-pi/pi-catalog/wire/codex";
4
6
  import { OAuthCallbackFlow, type OAuthCallbackFlowOptions } from "./callback-server";
5
7
  import { generatePKCE } from "./pkce";
6
8
  import type { OAuthController, OAuthCredentials } from "./types";
@@ -60,6 +62,29 @@ interface PKCE {
60
62
  verifier: string;
61
63
  challenge: string;
62
64
  }
65
+ /** Builds the Codex browser OAuth URL used by browser login; exported for auth regression tests. */
66
+ export function createOpenAICodexAuthorizationUrl(args: {
67
+ state: string;
68
+ redirectUri: string;
69
+ challenge: string;
70
+ originator?: string;
71
+ }): string {
72
+ const originator = args.originator?.trim() || OPENAI_HEADER_VALUES.ORIGINATOR_CODEX;
73
+ const searchParams = new URLSearchParams({
74
+ response_type: "code",
75
+ client_id: CLIENT_ID,
76
+ redirect_uri: args.redirectUri,
77
+ scope: SCOPE,
78
+ code_challenge: args.challenge,
79
+ code_challenge_method: "S256",
80
+ state: args.state,
81
+ id_token_add_organizations: "true",
82
+ codex_cli_simplified_flow: "true",
83
+ originator,
84
+ });
85
+
86
+ return `${AUTHORIZE_URL}?${searchParams.toString()}`;
87
+ }
63
88
 
64
89
  class OpenAICodexOAuthFlow extends OAuthCallbackFlow {
65
90
  constructor(
@@ -79,20 +104,12 @@ class OpenAICodexOAuthFlow extends OAuthCallbackFlow {
79
104
  }
80
105
 
81
106
  async generateAuthUrl(state: string, redirectUri: string): Promise<{ url: string; instructions?: string }> {
82
- const searchParams = new URLSearchParams({
83
- response_type: "code",
84
- client_id: CLIENT_ID,
85
- redirect_uri: redirectUri,
86
- scope: SCOPE,
87
- code_challenge: this.pkce.challenge,
88
- code_challenge_method: "S256",
107
+ const url = createOpenAICodexAuthorizationUrl({
89
108
  state,
90
- id_token_add_organizations: "true",
91
- codex_cli_simplified_flow: "true",
109
+ redirectUri,
110
+ challenge: this.pkce.challenge,
92
111
  originator: this.originator,
93
112
  });
94
-
95
- const url = `${AUTHORIZE_URL}?${searchParams.toString()}`;
96
113
  return { url, instructions: "A browser window should open. Complete login to finish." };
97
114
  }
98
115
 
@@ -153,13 +170,13 @@ async function exchangeCodeForToken(code: string, verifier: string, redirectUri:
153
170
  * Login with OpenAI Codex OAuth
154
171
  */
155
172
  export type OpenAICodexLoginOptions = OAuthController & {
156
- /** Optional originator value for OpenAI Codex OAuth. Default: "opencode". */
173
+ /** Optional originator value for OpenAI Codex OAuth. Default matches OMP Codex request headers. */
157
174
  originator?: string;
158
175
  };
159
176
 
160
177
  export async function loginOpenAICodex(options: OpenAICodexLoginOptions): Promise<OAuthCredentials> {
161
178
  const pkce = await generatePKCE();
162
- const originator = options.originator?.trim() || "opencode";
179
+ const originator = options.originator?.trim() || OPENAI_HEADER_VALUES.ORIGINATOR_CODEX;
163
180
  const flow = new OpenAICodexOAuthFlow(options, pkce, originator);
164
181
 
165
182
  return flow.login();
@@ -23,7 +23,8 @@ import type { AssistantMessage } from "../types";
23
23
  * - HTTP 413 variants: "Payload Too Large" / "Request Entity Too Large"
24
24
  * - z.ai / GLM: Returns finish_reason: "model_context_window_exceeded" mapped to error message
25
25
  * - z.ai: Does NOT error, accepts overflow silently - handled via usage.input > contextWindow
26
- * - Ollama: Silently truncates input - not detectable via error message
26
+ * - Ollama OpenAI-compatible: "prompt filled the context window" after empty finish_reason:length
27
+ * - Ollama native: Silently truncates input - not detectable via error message
27
28
  */
28
29
  const OVERFLOW_PATTERNS = [
29
30
  /prompt is too long/i, // Anthropic
@@ -51,6 +52,7 @@ const OVERFLOW_PATTERNS = [
51
52
  /entity too large/i, // Generic HTTP 413 variant
52
53
  /\b413\b.*\b(request|payload|entity)\b.*\btoo large\b/i, // "413 Request Entity Too Large" variants
53
54
  /model_context_window_exceeded/i, // z.ai non-standard finish_reason surfaced as error text
55
+ /prompt filled the context window/i, // Ollama OpenAI-compatible empty length completion
54
56
  ];
55
57
  /**
56
58
  * Check if an assistant message represents a context overflow error.
@@ -78,11 +80,12 @@ const OVERFLOW_PATTERNS = [
78
80
  * - Kimi For Coding: "exceeded model token limit: X (requested: Y)"
79
81
  * - Anthropic 413: "request_too_large" (request body exceeds size limit)
80
82
  * - HTTP 413: "Payload Too Large" / "Request Entity Too Large"
83
+ * - Ollama OpenAI-compatible: "prompt filled the context window"
81
84
  *
82
85
  * **Unreliable detection:**
83
86
  * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),
84
87
  * sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.
85
- * - Ollama: Silently truncates input without error. Cannot be detected via this function.
88
+ * - Ollama native: Silently truncates input without error. Cannot be detected via this function.
86
89
  * The response will have usage.input < expected, but we don't know the expected value.
87
90
  *
88
91
  * ## Custom Providers
@@ -8,6 +8,7 @@ export * from "./json-schema-validator";
8
8
  export * from "./meta-validator";
9
9
  export * from "./normalize";
10
10
  export * from "./spill";
11
+ export * from "./strict-tool-validation";
11
12
  export * from "./types";
12
13
  export * from "./typescript";
13
14
  export * from "./wire";
@@ -936,8 +936,25 @@ export function sanitizeSchemaForOpenAIResponses(schema: JsonObject): JsonObject
936
936
  * `normalizeSchemaFor*` dispatcher naming used elsewhere in this module.
937
937
  */
938
938
  export const normalizeSchemaForOpenAIResponses: (schema: JsonObject) => JsonObject = sanitizeSchemaForOpenAIResponses;
939
+ const OPENAI_UNSUPPORTED_REGEX_LOOKAROUNDS = new Set(["=", "!", "<=", "<!"]);
940
+ const OPENAI_RESPONSES_PATTERN_PROPERTIES_FALLBACK = ".*";
939
941
 
940
- function normalizeOpenAIResponsesSchemaNode(value: unknown, cache: WeakMap<JsonObject, JsonObject>): unknown {
942
+ function hasOpenAIUnsupportedRegexLookaround(pattern: string): boolean {
943
+ let groupStart = pattern.indexOf("(?");
944
+ while (groupStart !== -1) {
945
+ let escapes = 0;
946
+ for (let i = groupStart - 1; i >= 0 && pattern[i] === "\\"; i--) escapes++;
947
+ if (escapes % 2 === 0) {
948
+ const operator =
949
+ pattern[groupStart + 2] === "<" ? pattern.slice(groupStart + 2, groupStart + 4) : pattern[groupStart + 2];
950
+ if (OPENAI_UNSUPPORTED_REGEX_LOOKAROUNDS.has(operator)) return true;
951
+ }
952
+ groupStart = pattern.indexOf("(?", groupStart + 2);
953
+ }
954
+ return false;
955
+ }
956
+
957
+ function normalizeOpenAIResponsesSchemaNode(value: unknown, cache: WeakMap<JsonObject, unknown>): unknown {
941
958
  if (!isJsonObject(value)) return value;
942
959
 
943
960
  // `{}` (empty JSON Schema) ≡ `true` (JSON Schema draft 2020-12 §4.3.1).
@@ -973,11 +990,21 @@ function normalizeOpenAIResponsesSchemaNode(value: unknown, cache: WeakMap<JsonO
973
990
  changed = true;
974
991
  continue;
975
992
  }
993
+ if (
994
+ key === "pattern" &&
995
+ typeof value.pattern === "string" &&
996
+ hasOpenAIUnsupportedRegexLookaround(value.pattern)
997
+ ) {
998
+ changed = true;
999
+ continue;
1000
+ }
976
1001
 
977
1002
  const child = value[key];
978
1003
  let next: unknown = child;
979
- if (OPENAI_RESPONSES_SCHEMA_MAP_KEYS.has(key) && isJsonObject(child)) {
980
- next = normalizeOpenAIResponsesSchemaMap(child, cache);
1004
+ if (key === "patternProperties" && isJsonObject(child)) {
1005
+ next = normalizeOpenAIResponsesSchemaMap(child, cache, true);
1006
+ } else if (OPENAI_RESPONSES_SCHEMA_MAP_KEYS.has(key) && isJsonObject(child)) {
1007
+ next = normalizeOpenAIResponsesSchemaMap(child, cache, false);
981
1008
  } else if (OPENAI_RESPONSES_SCHEMA_ARRAY_KEYS.has(key) && Array.isArray(child)) {
982
1009
  next = normalizeOpenAIResponsesSchemaArray(child, cache);
983
1010
  } else if (OPENAI_RESPONSES_SCHEMA_VALUE_KEYS.has(key) && isJsonObject(child)) {
@@ -1008,7 +1035,7 @@ function normalizeOpenAIResponsesSchemaNode(value: unknown, cache: WeakMap<JsonO
1008
1035
  // the seeded partial and set `changed = true` for that node, so a node
1009
1036
  // that finishes with `changed === false` is provably non-cyclic and
1010
1037
  // referentially equal to its input.
1011
- const result = changed ? output : value;
1038
+ const result = changed ? (isJsonObjectEmpty(output) ? true : output) : value;
1012
1039
  cache.set(value, result);
1013
1040
  return result;
1014
1041
  }
@@ -1022,7 +1049,7 @@ function declaresObjectType(type: unknown): boolean {
1022
1049
  return false;
1023
1050
  }
1024
1051
 
1025
- function normalizeOpenAIResponsesSchemaArray(value: unknown[], cache: WeakMap<JsonObject, JsonObject>): unknown[] {
1052
+ function normalizeOpenAIResponsesSchemaArray(value: unknown[], cache: WeakMap<JsonObject, unknown>): unknown[] {
1026
1053
  let changed = false;
1027
1054
  const output = value.map(item => {
1028
1055
  const next = normalizeOpenAIResponsesSchemaNode(item, cache);
@@ -1032,7 +1059,11 @@ function normalizeOpenAIResponsesSchemaArray(value: unknown[], cache: WeakMap<Js
1032
1059
  return changed ? output : value;
1033
1060
  }
1034
1061
 
1035
- function normalizeOpenAIResponsesSchemaMap(schemaMap: JsonObject, cache: WeakMap<JsonObject, JsonObject>): JsonObject {
1062
+ function normalizeOpenAIResponsesSchemaMap(
1063
+ schemaMap: JsonObject,
1064
+ cache: WeakMap<JsonObject, unknown>,
1065
+ stripUnsupportedRegexKeys: boolean,
1066
+ ): JsonObject {
1036
1067
  let changed = false;
1037
1068
  const output: JsonObject = {};
1038
1069
  for (const key in schemaMap) {
@@ -1040,11 +1071,29 @@ function normalizeOpenAIResponsesSchemaMap(schemaMap: JsonObject, cache: WeakMap
1040
1071
  const child = schemaMap[key];
1041
1072
  const next = normalizeOpenAIResponsesSchemaNode(child, cache);
1042
1073
  if (next !== child) changed = true;
1074
+ if (stripUnsupportedRegexKeys && hasOpenAIUnsupportedRegexLookaround(key)) {
1075
+ changed = true;
1076
+ appendOpenAIResponsesFallbackPatternProperty(output, next);
1077
+ continue;
1078
+ }
1043
1079
  output[key] = next;
1044
1080
  }
1045
1081
  return changed ? output : schemaMap;
1046
1082
  }
1047
1083
 
1084
+ function appendOpenAIResponsesFallbackPatternProperty(output: JsonObject, schema: unknown): void {
1085
+ const existing = output[OPENAI_RESPONSES_PATTERN_PROPERTIES_FALLBACK];
1086
+ if (existing === undefined) {
1087
+ output[OPENAI_RESPONSES_PATTERN_PROPERTIES_FALLBACK] = schema;
1088
+ return;
1089
+ }
1090
+ if (isJsonObject(existing) && Array.isArray(existing.anyOf) && Object.keys(existing).length === 1) {
1091
+ existing.anyOf = [...existing.anyOf, schema];
1092
+ return;
1093
+ }
1094
+ output[OPENAI_RESPONSES_PATTERN_PROPERTIES_FALLBACK] = { anyOf: [existing, schema] };
1095
+ }
1096
+
1048
1097
  // ---------------------------------------------------------------------------
1049
1098
  // OpenAI strict mode — sanitize + enforce
1050
1099
  // ---------------------------------------------------------------------------
@@ -0,0 +1,117 @@
1
+ /**
2
+ * Detects tool-parameter schemas that pass structural JSON-Schema validation
3
+ * (so {@link isValidJsonSchema} accepts them) yet make OpenAI-style providers
4
+ * reject the whole request with HTTP 400 — namely an `enum`/`const` whose
5
+ * value(s) cannot satisfy the node's declared `type`. MCP servers emit these
6
+ * when a nullable/array branch is built incorrectly (e.g. a non-null `enum`
7
+ * copied onto a `type: "null"` branch, or an `enum` placed on an `array`
8
+ * schema instead of its `items`). One such tool 400s the entire turn, so
9
+ * callers quarantine just the offending tool. See issue #2652.
10
+ */
11
+
12
+ type JsonRecord = Record<string, unknown>;
13
+
14
+ const SCHEMA_TYPE_NAMES: Record<string, true> = {
15
+ string: true,
16
+ number: true,
17
+ integer: true,
18
+ boolean: true,
19
+ object: true,
20
+ array: true,
21
+ null: true,
22
+ };
23
+
24
+ function jsonValueMatchesType(value: unknown, type: string): boolean {
25
+ switch (type) {
26
+ case "string":
27
+ return typeof value === "string";
28
+ case "number":
29
+ return typeof value === "number";
30
+ case "integer":
31
+ return typeof value === "number" && Number.isInteger(value);
32
+ case "boolean":
33
+ return typeof value === "boolean";
34
+ case "null":
35
+ return value === null;
36
+ case "object":
37
+ return typeof value === "object" && value !== null && !Array.isArray(value);
38
+ case "array":
39
+ return Array.isArray(value);
40
+ default:
41
+ // Unknown type keyword — don't flag (forward compatibility).
42
+ return true;
43
+ }
44
+ }
45
+
46
+ function declaredTypes(node: JsonRecord): string[] {
47
+ const t = node.type;
48
+ if (typeof t === "string") return t in SCHEMA_TYPE_NAMES ? [t] : [];
49
+ if (Array.isArray(t)) return t.filter((x): x is string => typeof x === "string" && x in SCHEMA_TYPE_NAMES);
50
+ return [];
51
+ }
52
+
53
+ const CHILD_MAP_KEYS = ["properties", "patternProperties", "$defs", "definitions", "dependentSchemas"] as const;
54
+ const CHILD_SCHEMA_KEYS = [
55
+ "items",
56
+ "contains",
57
+ "not",
58
+ "if",
59
+ "then",
60
+ "else",
61
+ "propertyNames",
62
+ "additionalProperties",
63
+ "unevaluatedProperties",
64
+ "unevaluatedItems",
65
+ ] as const;
66
+ const CHILD_ARRAY_KEYS = ["anyOf", "oneOf", "allOf", "prefixItems"] as const;
67
+
68
+ /**
69
+ * Walk a tool parameter schema for OpenAI-strict `enum`/`const`-vs-`type`
70
+ * contradictions. Returns a JSON-pointer-ish path to the first offending node,
71
+ * or `null` when the schema is safe to emit.
72
+ */
73
+ export function findStrictToolSchemaViolation(schema: unknown, path = "#"): string | null {
74
+ if (Array.isArray(schema)) {
75
+ for (let i = 0; i < schema.length; i++) {
76
+ const hit = findStrictToolSchemaViolation(schema[i], `${path}/${i}`);
77
+ if (hit) return hit;
78
+ }
79
+ return null;
80
+ }
81
+ if (typeof schema !== "object" || schema === null) return null;
82
+ const node = schema as JsonRecord;
83
+
84
+ const types = declaredTypes(node);
85
+ if (types.length > 0) {
86
+ if (Array.isArray(node.enum) && node.enum.some(v => !types.some(t => jsonValueMatchesType(v, t)))) {
87
+ return `${path}/enum`;
88
+ }
89
+ if ("const" in node && !types.some(t => jsonValueMatchesType(node.const, t))) {
90
+ return `${path}/const`;
91
+ }
92
+ }
93
+
94
+ for (const key of CHILD_MAP_KEYS) {
95
+ const sub = node[key];
96
+ if (sub && typeof sub === "object" && !Array.isArray(sub)) {
97
+ for (const k of Object.keys(sub as JsonRecord)) {
98
+ const hit = findStrictToolSchemaViolation((sub as JsonRecord)[k], `${path}/${key}/${k}`);
99
+ if (hit) return hit;
100
+ }
101
+ }
102
+ }
103
+ for (const key of CHILD_SCHEMA_KEYS) {
104
+ if (key in node) {
105
+ const hit = findStrictToolSchemaViolation(node[key], `${path}/${key}`);
106
+ if (hit) return hit;
107
+ }
108
+ }
109
+ for (const key of CHILD_ARRAY_KEYS) {
110
+ const arr = node[key];
111
+ if (Array.isArray(arr)) {
112
+ const hit = findStrictToolSchemaViolation(arr, `${path}/${key}`);
113
+ if (hit) return hit;
114
+ }
115
+ }
116
+ return null;
117
+ }