@oh-my-pi/pi-ai 16.0.0 → 16.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,37 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [16.0.2] - 2026-06-16
6
+
7
+ ### Added
8
+
9
+ - Added `UMANS_WEBSEARCH_PROVIDER=native|exa` support for routing Umans gateway-owned web search requests.
10
+
11
+ ### Fixed
12
+
13
+ - A single MCP tool whose input schema can't be emitted as a valid strict tool schema for the active provider no longer fails the whole turn with HTTP 400. `convertTools` (openai-responses) now validates each tool's emitted parameter schema for `enum`/`const`-vs-`type` contradictions that pass structural JSON-Schema validation but the provider rejects — e.g. a non-null `enum` on a `type: "null"` node, or an `enum` on an `array` node — and quarantines just the offending tool with a `logger.warn` naming the tool and schema path, keeping every other tool usable. Adds `findStrictToolSchemaViolation` to `@oh-my-pi/pi-ai/utils/schema` ([#2652](https://github.com/can1357/oh-my-pi/issues/2652))
14
+ - Fixed OpenAI Responses-compatible streams from Ollama/local hosts dropping arguments for parallel tool calls whose deltas use `fc_<call_id>` item ids, which left earlier `ast_grep` calls with `{}` and failed validation. ([#2715](https://github.com/can1357/oh-my-pi/issues/2715))
15
+ - Fixed dialect transcript rendering so literal thinking envelopes are unwrapped before adding the dialect's own thinking tags, preventing nested `<thinking>` output in advisor raw dumps ([#2700](https://github.com/can1357/oh-my-pi/issues/2700)).
16
+ - Fixed Anthropic-compatible Umans requests escaping client tool names and forwarding gateway web search headers so Kimi answers normally instead of returning raw gateway search results.
17
+ - Fixed Google Gemini tool calls with `toolChoice: "auto"` serializing an explicit `toolConfig` AUTO mode, which can cause Gemini-3 models to leak raw planning JSON instead of executing tools. ([#2776](https://github.com/can1357/oh-my-pi/issues/2776))
18
+ - Fixed OpenAI-compatible Ollama completions that return empty `finish_reason:length` after filling `num_ctx` so they surface an actionable context-window error instead of an empty length stop. ([#2774](https://github.com/can1357/oh-my-pi/issues/2774))
19
+ - Fixed Codex browser login issuing credentials for the `opencode` OAuth originator while OMP requests identify as `pi`, which could make the first authenticated Codex request return 401 ([#2696](https://github.com/can1357/oh-my-pi/issues/2696)).
20
+
21
+ ## [16.0.1] - 2026-06-15
22
+
23
+ ### Added
24
+
25
+ - Added Umans AI Coding Plan API-key login support and `UMANS_AI_CODING_PLAN_API_KEY` environment fallback ([#2636](https://github.com/can1357/oh-my-pi/pull/2636) by [@oldschoola](https://github.com/oldschoola)).
26
+
27
+ ### Fixed
28
+
29
+ - Fixed OpenAI Responses, Azure OpenAI Responses, and Codex Responses providers ignoring async `onPayload` replacement bodies. Provider payload hooks can now transform the actual request body sent upstream, matching the Anthropic/Gemini replacement contract.
30
+ - Fixed OpenAI-compatible chat-completions streams that send object-shaped tool arguments in fragments by deep-merging nested objects and task arrays instead of replacing earlier chunks. ([#2617](https://github.com/can1357/oh-my-pi/issues/2617))
31
+ - Fixed OpenAI Responses strict-mode tool schema normalization for nullable enum MCP parameters so enum constraints are distributed to matching `anyOf` branches instead of being copied onto the `null` branch. ([#1835](https://github.com/can1357/oh-my-pi/issues/1835))
32
+ - Fixed Cursor provider formatting tool errors with the same `[Tool Result]` prefix as successful results, causing Composer models to misinterpret error messages (e.g. "Pattern must not be empty") as directives over long conversations. Errors now use a `[Tool Error]` prefix so the model can distinguish failures from successes in the prompt history. ([#1853](https://github.com/can1357/oh-my-pi/pull/1853))
33
+ - Fixed `validateToolArguments` silently accepting JSON-encoded array strings (e.g. `'["a","b"]'`) against `union(string, array<string>)` schemas — providers that double-serialize tool-call arguments (Z.AI / GLM) caused tools like `search` to receive the literal `["a","b"]` as a single path, producing zero matches (single element) or glob parse errors (multi-element). A new pre-validation pass parses JSON-array-shaped strings when the schema explicitly accepts both shapes. ([#1788](https://github.com/can1357/oh-my-pi/issues/1788))
34
+ - Fixed Anthropic thinking summaries that arrive wrapped in literal `<thinking>` tags so advisor/raw transcript dumps do not render nested thinking tags ([#2695](https://github.com/can1357/oh-my-pi/issues/2695)).
35
+
5
36
  ## [16.0.0] - 2026-06-15
6
37
 
7
38
  ### Breaking Changes
package/README.md CHANGED
@@ -68,6 +68,7 @@ Unified LLM API with automatic model discovery, provider configuration, token an
68
68
  - **Kilo Gateway** (supports OAuth `/login kilo` or `KILO_API_KEY`)
69
69
  - **LiteLLM** (requires `LITELLM_API_KEY`)
70
70
  - **zAI** (requires `ZAI_API_KEY`)
71
+ - **Umans AI Coding Plan** (supports `/login umans` or `UMANS_AI_CODING_PLAN_API_KEY`)
71
72
  - **MiniMax Token Plan** (requires `MINIMAX_CODE_API_KEY` or `MINIMAX_CODE_CN_API_KEY`)
72
73
  - **Xiaomi MiMo** (requires `XIAOMI_API_KEY`)
73
74
  - **ZenMux** (requires `ZENMUX_API_KEY`)
@@ -952,6 +953,7 @@ In Node.js environments, you can set environment variables to avoid passing API
952
953
  | Ollama Cloud | `OLLAMA_CLOUD_API_KEY` |
953
954
  | Qwen Portal | `QWEN_OAUTH_TOKEN` or `QWEN_PORTAL_API_KEY` |
954
955
  | zAI | `ZAI_API_KEY` |
956
+ | Umans AI Coding Plan | `UMANS_AI_CODING_PLAN_API_KEY` |
955
957
  | MiniMax Code | `MINIMAX_CODE_API_KEY` (international) or `MINIMAX_CODE_CN_API_KEY` (China) |
956
958
  | Xiaomi MiMo | `XIAOMI_API_KEY` |
957
959
  | ZenMux | `ZENMUX_API_KEY` |
@@ -978,6 +980,7 @@ Provider endpoint defaults for the current OpenAI-compatible integrations:
978
980
  - Xiaomi MiMo: `https://api.xiaomimimo.com/anthropic`
979
981
  - ZenMux (OpenAI): `https://zenmux.ai/api/v1`
980
982
  - ZenMux (Anthropic models): `https://zenmux.ai/api/anthropic`
983
+ - Umans AI Coding Plan: `https://api.code.umans.ai`
981
984
  - vLLM: `http://127.0.0.1:8000/v1`
982
985
  - Ollama: local OpenAI-compatible runtime (`http://127.0.0.1:11434/v1`)
983
986
  - Ollama Cloud: native Ollama API host (`https://ollama.com/api`, configured here as base URL `https://ollama.com`)
@@ -8,6 +8,8 @@ export interface AnthropicRequestOptions {
8
8
  timeout?: number;
9
9
  /** Per-request retry budget override. */
10
10
  maxRetries?: number;
11
+ /** Per-request headers merged after client defaults. */
12
+ headers?: Record<string, string>;
11
13
  }
12
14
  /**
13
15
  * Extra `RequestInit` fields merged into every fetch call. Bun extends
@@ -1,6 +1,6 @@
1
- import type { Model, ServiceTier, StreamFunction, StreamOptions, Tool, ToolChoice } from "../types";
1
+ import type { Context, Model, ProviderSessionState, ServiceTier, StreamFunction, StreamOptions, Tool, ToolChoice } from "../types";
2
2
  import { type OpenAIResponsesToolChoice } from "../utils/tool-choice";
3
- import type { Tool as OpenAITool } from "./openai-responses-wire";
3
+ import type { Tool as OpenAITool, ResponseCreateParamsStreaming, ResponseInput } from "./openai-responses-wire";
4
4
  export declare function normalizeOpenAIResponsesPromptCacheKey(sessionId: string | undefined): string | undefined;
5
5
  export interface OpenAIResponsesOptions extends StreamOptions {
6
6
  reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
@@ -56,11 +56,46 @@ export interface OpenAIResponsesOptions extends StreamOptions {
56
56
  */
57
57
  extraBody?: Record<string, unknown>;
58
58
  }
59
+ interface OpenAIResponsesProviderSessionState extends ProviderSessionState {
60
+ nativeHistoryReplayWarmed: boolean;
61
+ /** Stateful `previous_response_id` chain baselines, keyed by baseUrl/model/session. */
62
+ chains: Map<string, OpenAIResponsesChainState>;
63
+ }
64
+ interface OpenAIResponsesChainState {
65
+ /**
66
+ * Wire params of the last successful turn, with per-turn trailing
67
+ * scaffolding stripped from `input` (never carries previous_response_id).
68
+ */
69
+ lastParams?: OpenAIResponsesSamplingParams;
70
+ lastResponseId?: string;
71
+ /** Output items of the last response, in replay-sanitized form (matches next-turn input). */
72
+ lastResponseItems?: ResponseInput;
73
+ canAppend: boolean;
74
+ /** Consecutive stale-previous-response failures; reset on a successful chained completion. */
75
+ staleFailures: number;
76
+ /** Set once chaining is judged unsupported for this session (circuit breaker). */
77
+ disabled: boolean;
78
+ }
79
+ type OpenAIResponsesSamplingParams = ResponseCreateParamsStreaming & {
80
+ top_p?: number;
81
+ top_k?: number;
82
+ min_p?: number;
83
+ presence_penalty?: number;
84
+ repetition_penalty?: number;
85
+ stream_options?: {
86
+ include_obfuscation?: boolean;
87
+ };
88
+ };
59
89
  /**
60
90
  * Generate function for OpenAI Responses API
61
91
  */
62
92
  export declare const streamOpenAIResponses: StreamFunction<"openai-responses">;
63
93
  export declare function getOpenAIResponsesCacheSessionId(options: Pick<OpenAIResponsesOptions, "cacheRetention" | "sessionId" | "promptCacheKey"> | undefined): string | undefined;
94
+ /** @internal Exported for tests. */
95
+ export declare function buildParams(model: Model<"openai-responses">, context: Context, options: OpenAIResponsesOptions | undefined, providerSessionState: OpenAIResponsesProviderSessionState | undefined): {
96
+ params: OpenAIResponsesSamplingParams;
97
+ trailingScaffoldingItems: number;
98
+ };
64
99
  /**
65
100
  * Whether this model should get the OpenAI custom-tool grammar variant
66
101
  * for `apply_patch`. The generated model catalog sets
@@ -72,4 +107,5 @@ export declare function supportsFreeformApplyPatch(model: Model<"openai-response
72
107
  /** @internal Exported for tests. */
73
108
  export declare function mapOpenAIResponsesToolChoiceForTools(choice: ToolChoice | undefined, tools: Tool[], model: Model<"openai-responses">): OpenAIResponsesToolChoice;
74
109
  /** @internal Exported for tests. */
75
- export declare function convertTools(tools: Tool[], strictMode: boolean, model: Model<"openai-responses">): OpenAITool[];
110
+ export declare function convertTools(tools: Tool[], strictMode: boolean, model: Model<"openai-responses">, onQuarantine?: (toolName: string, schemaPath: string) => void): OpenAITool[];
111
+ export {};
@@ -1,10 +1,20 @@
1
+ /**
2
+ * OpenAI Codex (ChatGPT OAuth) flow — browser and device-code flows.
3
+ */
1
4
  import type { OAuthController, OAuthCredentials } from "./types";
2
5
  export declare function decodeJwt<T = Record<string, unknown>>(token: string): T | null;
6
+ /** Builds the Codex browser OAuth URL used by browser login; exported for auth regression tests. */
7
+ export declare function createOpenAICodexAuthorizationUrl(args: {
8
+ state: string;
9
+ redirectUri: string;
10
+ challenge: string;
11
+ originator?: string;
12
+ }): string;
3
13
  /**
4
14
  * Login with OpenAI Codex OAuth
5
15
  */
6
16
  export type OpenAICodexLoginOptions = OAuthController & {
7
- /** Optional originator value for OpenAI Codex OAuth. Default: "opencode". */
17
+ /** Optional originator value for OpenAI Codex OAuth. Default matches OMP Codex request headers. */
8
18
  originator?: string;
9
19
  };
10
20
  export declare function loginOpenAICodex(options: OpenAICodexLoginOptions): Promise<OAuthCredentials>;
@@ -208,6 +208,10 @@ declare const ALL: ({
208
208
  readonly id: "together";
209
209
  readonly name: "Together";
210
210
  readonly login: (cb: Parameters<typeof import("./together").loginTogether>[0]) => Promise<string>;
211
+ } | {
212
+ readonly id: "umans";
213
+ readonly name: "Umans AI Coding Plan";
214
+ readonly login: (cb: import("./oauth").OAuthLoginCallbacks) => Promise<string>;
211
215
  } | {
212
216
  readonly id: "venice";
213
217
  readonly name: "Venice";
@@ -0,0 +1,7 @@
1
+ import type { OAuthLoginCallbacks } from "./oauth/types";
2
+ export declare const loginUmans: (options: import("./oauth").OAuthController) => Promise<string>;
3
+ export declare const umansProvider: {
4
+ readonly id: "umans";
5
+ readonly name: "Umans AI Coding Plan";
6
+ readonly login: (cb: OAuthLoginCallbacks) => Promise<string>;
7
+ };
@@ -25,11 +25,12 @@ import type { AssistantMessage } from "../types";
25
25
  * - Kimi For Coding: "exceeded model token limit: X (requested: Y)"
26
26
  * - Anthropic 413: "request_too_large" (request body exceeds size limit)
27
27
  * - HTTP 413: "Payload Too Large" / "Request Entity Too Large"
28
+ * - Ollama OpenAI-compatible: "prompt filled the context window"
28
29
  *
29
30
  * **Unreliable detection:**
30
31
  * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),
31
32
  * sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.
32
- * - Ollama: Silently truncates input without error. Cannot be detected via this function.
33
+ * - Ollama native: Silently truncates input without error. Cannot be detected via this function.
33
34
  * The response will have usage.input < expected, but we don't know the expected value.
34
35
  *
35
36
  * ## Custom Providers
@@ -8,6 +8,7 @@ export * from "./json-schema-validator";
8
8
  export * from "./meta-validator";
9
9
  export * from "./normalize";
10
10
  export * from "./spill";
11
+ export * from "./strict-tool-validation";
11
12
  export * from "./types";
12
13
  export * from "./typescript";
13
14
  export * from "./wire";
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Detects tool-parameter schemas that pass structural JSON-Schema validation
3
+ * (so {@link isValidJsonSchema} accepts them) yet make OpenAI-style providers
4
+ * reject the whole request with HTTP 400 — namely an `enum`/`const` whose
5
+ * value(s) cannot satisfy the node's declared `type`. MCP servers emit these
6
+ * when a nullable/array branch is built incorrectly (e.g. a non-null `enum`
7
+ * copied onto a `type: "null"` branch, or an `enum` placed on an `array`
8
+ * schema instead of its `items`). One such tool 400s the entire turn, so
9
+ * callers quarantine just the offending tool. See issue #2652.
10
+ */
11
+ /**
12
+ * Walk a tool parameter schema for OpenAI-strict `enum`/`const`-vs-`type`
13
+ * contradictions. Returns a JSON-pointer-ish path to the first offending node,
14
+ * or `null` when the schema is safe to emit.
15
+ */
16
+ export declare function findStrictToolSchemaViolation(schema: unknown, path?: string): string | null;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "16.0.0",
4
+ "version": "16.0.2",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -38,8 +38,8 @@
38
38
  },
39
39
  "dependencies": {
40
40
  "@bufbuild/protobuf": "^2.12.0",
41
- "@oh-my-pi/pi-catalog": "16.0.0",
42
- "@oh-my-pi/pi-utils": "16.0.0",
41
+ "@oh-my-pi/pi-catalog": "16.0.2",
42
+ "@oh-my-pi/pi-utils": "16.0.2",
43
43
  "partial-json": "^0.1.7",
44
44
  "zod": "^4"
45
45
  },
@@ -157,9 +157,64 @@ export function messageContentText(
157
157
  return text;
158
158
  }
159
159
 
160
+ function isAsciiWhitespace(code: number): boolean {
161
+ return code === 9 || code === 10 || code === 11 || code === 12 || code === 13 || code === 32;
162
+ }
163
+
164
+ function trimAsciiStart(text: string, start: number, end: number): number {
165
+ let cursor = start;
166
+ while (cursor < end && isAsciiWhitespace(text.charCodeAt(cursor))) cursor++;
167
+ return cursor;
168
+ }
169
+
170
+ function trimAsciiEnd(text: string, start: number, end: number): number {
171
+ let cursor = end;
172
+ while (cursor > start && isAsciiWhitespace(text.charCodeAt(cursor - 1))) cursor--;
173
+ return cursor;
174
+ }
175
+
176
+ function findDelimitedThinkingClose(open: string, close: string, text: string, start: number, end: number): number {
177
+ let depth = 1;
178
+ let cursor = start;
179
+ while (cursor < end) {
180
+ const nextClose = text.indexOf(close, cursor);
181
+ if (nextClose < 0 || nextClose >= end) return -1;
182
+ const nextOpen = text.indexOf(open, cursor);
183
+ if (nextOpen >= 0 && nextOpen < nextClose) {
184
+ depth++;
185
+ cursor = nextOpen + open.length;
186
+ continue;
187
+ }
188
+ depth--;
189
+ if (depth === 0) return nextClose;
190
+ cursor = nextClose + close.length;
191
+ }
192
+ return -1;
193
+ }
194
+
195
+ function unwrapDelimitedThinking(open: string, close: string, text: string): string {
196
+ const end = trimAsciiEnd(text, 0, text.length);
197
+ let cursor = trimAsciiStart(text, 0, end);
198
+ if (cursor >= end || !text.startsWith(open, cursor)) return text;
199
+
200
+ const segments: string[] = [];
201
+ while (cursor < end) {
202
+ if (!text.startsWith(open, cursor)) return text;
203
+ const innerStart = cursor + open.length;
204
+ const innerEnd = findDelimitedThinkingClose(open, close, text, innerStart, end);
205
+ if (innerEnd < 0) return text;
206
+
207
+ const trimmedInnerEnd = trimAsciiEnd(text, innerStart, innerEnd);
208
+ const trimmedInnerStart = trimAsciiStart(text, innerStart, trimmedInnerEnd);
209
+ segments.push(unwrapDelimitedThinking(open, close, text.slice(trimmedInnerStart, trimmedInnerEnd)));
210
+ cursor = trimAsciiStart(text, innerEnd + close.length, end);
211
+ }
212
+ return segments.join("\n");
213
+ }
214
+
160
215
  export function renderDelimitedThinking(open: string, close: string, text: string): string {
161
216
  if (!text) return "";
162
- return `${open}\n${text}\n${close}`;
217
+ return `${open}\n${unwrapDelimitedThinking(open, close, text)}\n${close}`;
163
218
  }
164
219
 
165
220
  export function chatMlTurn(role: "assistant" | "system" | "tool" | "user", body: string): string {
@@ -39,6 +39,8 @@ export interface AnthropicRequestOptions {
39
39
  timeout?: number;
40
40
  /** Per-request retry budget override. */
41
41
  maxRetries?: number;
42
+ /** Per-request headers merged after client defaults. */
43
+ headers?: Record<string, string>;
42
44
  }
43
45
 
44
46
  /**
@@ -217,7 +219,7 @@ export class AnthropicMessagesClient implements AnthropicMessagesClientLike {
217
219
  return new AnthropicApiRequest(() => this.#send(path, params, options));
218
220
  }
219
221
 
220
- #buildHeaders(): Record<string, string> {
222
+ #buildHeaders(requestHeaders?: Record<string, string>): Record<string, string> {
221
223
  const opts = this.#options;
222
224
  const defaults = opts.defaultHeaders ?? {};
223
225
  const headers: Record<string, string> = {};
@@ -228,6 +230,7 @@ export class AnthropicMessagesClient implements AnthropicMessagesClientLike {
228
230
  headers.Authorization = `Bearer ${opts.authToken}`;
229
231
  }
230
232
  Object.assign(headers, defaults);
233
+ Object.assign(headers, requestHeaders);
231
234
  return headers;
232
235
  }
233
236
 
@@ -242,7 +245,7 @@ export class AnthropicMessagesClient implements AnthropicMessagesClientLike {
242
245
  const timeoutMs = options?.timeout ?? opts.timeout ?? DEFAULT_TIMEOUT_MS;
243
246
  const maxRetries = Math.max(0, options?.maxRetries ?? opts.maxRetries ?? DEFAULT_MAX_RETRIES);
244
247
  const url = `${opts.baseURL ?? "https://api.anthropic.com"}${path}`;
245
- const headers = this.#buildHeaders();
248
+ const headers = this.#buildHeaders(options?.headers);
246
249
  const body = JSON.stringify(params);
247
250
 
248
251
  for (let attempt = 0; ; attempt++) {
@@ -704,6 +704,8 @@ export function resolveAnthropicMetadataUserId(
704
704
  return generateClaudeJsonUserId(sessionId, accountId);
705
705
  }
706
706
  const ANTHROPIC_BUILTIN_TOOL_NAMES = new Set(["web_search", "code_execution", "text_editor", "computer"]);
707
+ const UMANS_WEBSEARCH_PROVIDER_HEADER = "X-Umans-Websearch-Provider";
708
+ const UMANS_WEBSEARCH_TOOL_NAME = "web_search";
707
709
  export const applyClaudeToolPrefix = (name: string): string => {
708
710
  if (!claudeToolPrefix) return name;
709
711
  if (ANTHROPIC_BUILTIN_TOOL_NAMES.has(name.toLowerCase())) return name;
@@ -721,6 +723,50 @@ export const stripClaudeToolPrefix = (name: string): string => {
721
723
  return name.slice(claudeToolPrefix.length);
722
724
  };
723
725
 
726
+ function normalizeUmansWebSearchProvider(value: string | undefined): "native" | "exa" | undefined {
727
+ const normalized = value?.trim().toLowerCase();
728
+ return normalized === "native" || normalized === "exa" ? normalized : undefined;
729
+ }
730
+
731
+ function getUmansWebSearchProvider(headers: Record<string, string> | undefined): "native" | "exa" | undefined {
732
+ const explicit = getHeaderCaseInsensitive(headers, UMANS_WEBSEARCH_PROVIDER_HEADER);
733
+ if (explicit !== undefined) return normalizeUmansWebSearchProvider(explicit);
734
+ return normalizeUmansWebSearchProvider($env.UMANS_WEBSEARCH_PROVIDER);
735
+ }
736
+
737
+ function isUmansAnthropicModel(model: Model<"anthropic-messages">): boolean {
738
+ return model.provider === "umans" || model.baseUrl.toLowerCase().includes("api.code.umans.ai");
739
+ }
740
+
741
+ function getUmansWebSearchHeader(
742
+ model: Model<"anthropic-messages">,
743
+ headers: Record<string, string> | undefined,
744
+ ): Record<string, string> | undefined {
745
+ if (!isUmansAnthropicModel(model)) return undefined;
746
+ const provider = getUmansWebSearchProvider(headers);
747
+ return provider ? { [UMANS_WEBSEARCH_PROVIDER_HEADER]: provider } : undefined;
748
+ }
749
+
750
+ function shouldUseUmansGatewayWebSearch(name: string, enabled: boolean): boolean {
751
+ return enabled && name.toLowerCase() === UMANS_WEBSEARCH_TOOL_NAME;
752
+ }
753
+
754
+ function encodeAnthropicToolName(
755
+ name: string,
756
+ isOAuthToken: boolean,
757
+ escapeBuiltinToolNames: boolean,
758
+ useUmansGatewayWebSearch = false,
759
+ ): string {
760
+ if (shouldUseUmansGatewayWebSearch(name, useUmansGatewayWebSearch)) return name;
761
+ if (escapeBuiltinToolNames) return `${claudeToolPrefix}${name}`;
762
+ return isOAuthToken ? applyClaudeToolPrefix(name) : name;
763
+ }
764
+
765
+ function decodeAnthropicToolName(name: string, isOAuthToken: boolean, escapeBuiltinToolNames: boolean): string {
766
+ if (isOAuthToken || escapeBuiltinToolNames) return stripClaudeToolPrefix(name);
767
+ return name;
768
+ }
769
+
724
770
  const ANTHROPIC_MANY_IMAGE_THRESHOLD = 20;
725
771
  const ANTHROPIC_MANY_IMAGE_MAX_DIMENSION = 2000;
726
772
 
@@ -1462,6 +1508,19 @@ export function isProviderRetryableError(error: unknown, provider?: string): boo
1462
1508
  return isRetryableError(error);
1463
1509
  }
1464
1510
 
1511
+ const THINKING_ENVELOPE_OPEN = "<thinking>";
1512
+ const THINKING_ENVELOPE_CLOSE = "</thinking>";
1513
+
1514
+ function unwrapAnthropicThinkingEnvelope(text: string): string | undefined {
1515
+ let current = text.trim();
1516
+ let stripped = false;
1517
+ while (current.startsWith(THINKING_ENVELOPE_OPEN) && current.endsWith(THINKING_ENVELOPE_CLOSE)) {
1518
+ current = current.slice(THINKING_ENVELOPE_OPEN.length, current.length - THINKING_ENVELOPE_CLOSE.length).trim();
1519
+ stripped = true;
1520
+ }
1521
+ return stripped ? current : undefined;
1522
+ }
1523
+
1465
1524
  function createEmptyUsage(premiumRequests?: number): Usage {
1466
1525
  return {
1467
1526
  input: 0,
@@ -1567,6 +1626,8 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1567
1626
  let disableStrictTools =
1568
1627
  (providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
1569
1628
  let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
1629
+ const mergedCallerHeaders = mergeHeaders(model.headers, options?.headers);
1630
+ const umansGatewayWebSearchHeader = getUmansWebSearchHeader(model, mergedCallerHeaders);
1570
1631
 
1571
1632
  let client: AnthropicMessagesClientLike;
1572
1633
  let isOAuthToken: boolean;
@@ -1628,7 +1689,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1628
1689
  }
1629
1690
  const preparedContext = await prepareAnthropicManyImageContext(context, model.input.includes("image"));
1630
1691
  const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
1631
- let nextParams = buildParams(model, preparedContext, isOAuthToken, options, disableStrictTools);
1692
+ let nextParams = buildParams(
1693
+ model,
1694
+ preparedContext,
1695
+ isOAuthToken,
1696
+ options,
1697
+ disableStrictTools,
1698
+ umansGatewayWebSearchHeader !== undefined,
1699
+ );
1632
1700
  if (disableStrictTools) {
1633
1701
  dropAnthropicStrictTools(nextParams);
1634
1702
  }
@@ -1668,6 +1736,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1668
1736
  if (block.type === "text") {
1669
1737
  stream.push({ type: "text_end", contentIndex, content: block.text, partial: output });
1670
1738
  } else if (block.type === "thinking") {
1739
+ const unwrappedThinking = unwrapAnthropicThinkingEnvelope(block.thinking);
1740
+ if (unwrappedThinking !== undefined) {
1741
+ block.thinking = unwrappedThinking;
1742
+ block.thinkingSignature = undefined;
1743
+ }
1671
1744
  stream.push({ type: "thinking_end", contentIndex, content: block.thinking, partial: output });
1672
1745
  } else if (block.type === "toolCall") {
1673
1746
  const finalJson =
@@ -1701,7 +1774,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1701
1774
  // to zero even when no watchdog timeout is configured (the helper only
1702
1775
  // pins it alongside a timeout; a client retry budget of 5 would otherwise
1703
1776
  // multiply with PROVIDER_MAX_RETRIES into up to 66 wire attempts).
1704
- const requestOptions = { ...createSdkStreamRequestOptions(requestSignal, requestTimeoutMs), maxRetries: 0 };
1777
+ const requestOptions = {
1778
+ ...createSdkStreamRequestOptions(requestSignal, requestTimeoutMs),
1779
+ maxRetries: 0,
1780
+ ...(umansGatewayWebSearchHeader ? { headers: umansGatewayWebSearchHeader } : {}),
1781
+ };
1705
1782
  const anthropicRequest: unknown =
1706
1783
  isOAuthToken && client.beta
1707
1784
  ? client.beta.messages.create({ ...params, stream: true }, requestOptions)
@@ -1884,9 +1961,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
1884
1961
  const block: Block = {
1885
1962
  type: "toolCall",
1886
1963
  id: event.content_block.id,
1887
- name: isOAuthToken
1888
- ? stripClaudeToolPrefix(event.content_block.name)
1889
- : event.content_block.name,
1964
+ name: decodeAnthropicToolName(
1965
+ event.content_block.name,
1966
+ isOAuthToken,
1967
+ model.compat.escapeBuiltinToolNames,
1968
+ ),
1890
1969
  arguments: event.content_block.input ?? {},
1891
1970
  partialJson: "",
1892
1971
  index: event.index,
@@ -2361,7 +2440,13 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
2361
2440
  isOAuth: oauthToken,
2362
2441
  extraBetas: betaFeatures,
2363
2442
  stream,
2364
- modelHeaders: mergeHeaders(model.headers, foundryCustomHeaders, headers, dynamicHeaders),
2443
+ modelHeaders: mergeHeaders(
2444
+ model.headers,
2445
+ foundryCustomHeaders,
2446
+ getUmansWebSearchHeader(model, mergeHeaders(model.headers, headers)),
2447
+ headers,
2448
+ dynamicHeaders,
2449
+ ),
2365
2450
  isCloudflareAiGateway: model.provider === "cloudflare-ai-gateway",
2366
2451
  claudeCodeSessionId,
2367
2452
  claudeCodeBetas: oauthToken
@@ -2382,10 +2467,9 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
2382
2467
  };
2383
2468
  }
2384
2469
 
2385
- // OpenCode Go's Anthropic-compatible gateway validates API-key auth through
2386
- // `X-Api-Key`; bearer-only requests reach the endpoint but return
2387
- // `Missing API key` before token validation.
2388
- if (model.provider === "opencode-go") {
2470
+ // OpenCode Go and Umans validate Anthropic-compatible API-key auth through
2471
+ // `X-Api-Key`; bearer-only requests reach the endpoint but fail auth.
2472
+ if (model.provider === "opencode-go" || model.provider === "umans") {
2389
2473
  delete defaultHeaders.Authorization;
2390
2474
  return {
2391
2475
  isOAuthToken: false,
@@ -2729,6 +2813,7 @@ function buildParams(
2729
2813
  isOAuthToken: boolean,
2730
2814
  options?: AnthropicOptions,
2731
2815
  disableStrictTools = false,
2816
+ useUmansGatewayWebSearch = false,
2732
2817
  ): MessageCreateParamsStreaming {
2733
2818
  const { cacheControl } = getCacheControl(model, options?.cacheRetention, isOAuthToken);
2734
2819
 
@@ -2750,6 +2835,8 @@ function buildParams(
2750
2835
  isOAuthToken,
2751
2836
  disableStrictTools || model.provider === "github-copilot",
2752
2837
  model.compat.supportsEagerToolInputStreaming,
2838
+ model.compat.escapeBuiltinToolNames,
2839
+ useUmansGatewayWebSearch,
2753
2840
  );
2754
2841
  } else if (isOAuthToken) {
2755
2842
  tools = [];
@@ -2875,10 +2962,16 @@ function buildParams(
2875
2962
  if (options?.toolChoice) {
2876
2963
  if (typeof options.toolChoice === "string") {
2877
2964
  params.tool_choice = { type: options.toolChoice };
2878
- } else if (isOAuthToken && options.toolChoice.name) {
2879
- params.tool_choice = { ...options.toolChoice, name: applyClaudeToolPrefix(options.toolChoice.name) };
2880
- } else {
2881
- params.tool_choice = options.toolChoice;
2965
+ } else if (options.toolChoice.name) {
2966
+ params.tool_choice = {
2967
+ ...options.toolChoice,
2968
+ name: encodeAnthropicToolName(
2969
+ options.toolChoice.name,
2970
+ isOAuthToken,
2971
+ model.compat.escapeBuiltinToolNames,
2972
+ useUmansGatewayWebSearch,
2973
+ ),
2974
+ };
2882
2975
  }
2883
2976
  // Claude Fable/Mythos 5 reject forced tool use outright ("tool_choice forces
2884
2977
  // tool use is not compatible with this model"). Downgrade any/tool → auto so the
@@ -3083,7 +3176,7 @@ export function convertAnthropicMessages(
3083
3176
  blocks.push({
3084
3177
  type: "tool_use",
3085
3178
  id: block.id,
3086
- name: isOAuthToken ? applyClaudeToolPrefix(block.name) : block.name,
3179
+ name: encodeAnthropicToolName(block.name, isOAuthToken, model.compat.escapeBuiltinToolNames),
3087
3180
  // Always sanitize: the model itself can emit lone-surrogate escapes
3088
3181
  // in tool-argument JSON (streamed out fine, rejected with a 400 on
3089
3182
  // replay by Anthropic's strict UTF-8 validation). toWellFormedDeep
@@ -3669,6 +3762,8 @@ function convertTools(
3669
3762
  isOAuthToken: boolean,
3670
3763
  disableStrictTools = false,
3671
3764
  supportsEagerToolInputStreaming = true,
3765
+ escapeBuiltinToolNames = false,
3766
+ useUmansGatewayWebSearch = false,
3672
3767
  ): AnthropicWireTool[] {
3673
3768
  if (!tools) return [];
3674
3769
  const schemaPlans = buildAnthropicToolSchemaPlans(tools, disableStrictTools);
@@ -3676,7 +3771,7 @@ function convertTools(
3676
3771
  return tools.map((tool, index) => {
3677
3772
  const plan = schemaPlans[index];
3678
3773
  const baseTool = {
3679
- name: isOAuthToken ? applyClaudeToolPrefix(tool.name) : tool.name,
3774
+ name: encodeAnthropicToolName(tool.name, isOAuthToken, escapeBuiltinToolNames, useUmansGatewayWebSearch),
3680
3775
  description: tool.description || "",
3681
3776
  input_schema: plan.inputSchema,
3682
3777
  };
@@ -139,8 +139,11 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
139
139
  try {
140
140
  const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
141
141
  const { url, headers } = buildAzureResponsesRequest(model, apiKey, options);
142
- const params = buildParams(model, context, options, deploymentName);
143
- options?.onPayload?.(params);
142
+ let params = buildParams(model, context, options, deploymentName);
143
+ const replacementPayload = await options?.onPayload?.(params, model);
144
+ if (replacementPayload !== undefined) {
145
+ params = replacementPayload as typeof params;
146
+ }
144
147
  const idleTimeoutMs = options?.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs();
145
148
  const firstEventTimeoutMs =
146
149
  options?.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
@@ -2335,9 +2335,10 @@ function buildRootPromptMessagesJson(
2335
2335
  } else if (msg.role === "toolResult") {
2336
2336
  const text = toolResultToText(msg);
2337
2337
  if (!text) continue;
2338
+ const prefix = msg.isError ? "[Tool Error]" : "[Tool Result]";
2338
2339
  pushJson({
2339
2340
  role: "user",
2340
- content: [{ type: "text", text: `[Tool Result]\n${text}` }],
2341
+ content: [{ type: "text", text: `${prefix}\n${text}` }],
2341
2342
  });
2342
2343
  }
2343
2344
  }
@@ -2415,10 +2416,11 @@ function buildConversationTurns(
2415
2416
  // Include tool results as assistant text for context
2416
2417
  const text = toolResultToText(stepMsg);
2417
2418
  if (text) {
2419
+ const prefix = stepMsg.isError ? "[Tool Error]" : "[Tool Result]";
2418
2420
  const step = create(ConversationStepSchema, {
2419
2421
  message: {
2420
2422
  case: "assistantMessage",
2421
- value: create(AssistantMessageSchema, { text: `[Tool Result]\n${text}` }),
2423
+ value: create(AssistantMessageSchema, { text: `${prefix}\n${text}` }),
2422
2424
  },
2423
2425
  });
2424
2426
  stepBlobIds.push(storeCursorBlob(blobStore, toBinary(ConversationStepSchema, step)));
@@ -793,9 +793,12 @@ export function buildGoogleGenerateContentParams<T extends "google-generative-ai
793
793
  if (context.tools && context.tools.length > 0 && options.toolChoice) {
794
794
  const choice = options.toolChoice;
795
795
  if (typeof choice === "string") {
796
- config.toolConfig = {
797
- functionCallingConfig: { mode: mapToolChoice(choice) },
798
- };
796
+ const mode = mapToolChoice(choice);
797
+ if (mode !== "AUTO") {
798
+ config.toolConfig = {
799
+ functionCallingConfig: { mode },
800
+ };
801
+ }
799
802
  } else {
800
803
  // Named-tool routing — `mode: "ANY"` plus an explicit allow-list. The
801
804
  // caller is responsible for ensuring the names exist in `context.tools`.