npm - @oh-my-pi/pi-ai - Versions diffs - 16.0.0 → 16.0.2 - Mend

@oh-my-pi/pi-ai 16.0.0 → 16.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/CHANGELOG.md +31 -0
package/README.md +3 -0
package/dist/types/providers/anthropic-client.d.ts +2 -0
package/dist/types/providers/openai-responses.d.ts +39 -3
package/dist/types/registry/oauth/openai-codex.d.ts +11 -1
package/dist/types/registry/registry.d.ts +4 -0
package/dist/types/registry/umans.d.ts +7 -0
package/dist/types/utils/overflow.d.ts +2 -1
package/dist/types/utils/schema/index.d.ts +1 -0
package/dist/types/utils/schema/strict-tool-validation.d.ts +16 -0
package/package.json +3 -3
package/src/dialect/rendering.ts +56 -1
package/src/providers/anthropic-client.ts +5 -2
package/src/providers/anthropic.ts +111 -16
package/src/providers/azure-openai-responses.ts +5 -2
package/src/providers/cursor.ts +4 -2
package/src/providers/google-shared.ts +6 -3
package/src/providers/openai-codex-responses.ts +20 -5
package/src/providers/openai-completions.ts +121 -19
package/src/providers/openai-responses-shared.ts +70 -13
package/src/providers/openai-responses.ts +65 -15
package/src/registry/oauth/openai-codex.ts +30 -13
package/src/registry/registry.ts +2 -0
package/src/registry/umans.ts +23 -0
package/src/utils/overflow.ts +5 -2
package/src/utils/schema/index.ts +1 -0
package/src/utils/schema/normalize.ts +40 -3
package/src/utils/schema/strict-tool-validation.ts +117 -0
package/src/utils/schema/wire.ts +18 -3
package/src/utils/validation.ts +159 -0

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,37 @@
 ## [Unreleased]
+## [16.0.2] - 2026-06-16
+### Added
+- Added `UMANS_WEBSEARCH_PROVIDER=native|exa` support for routing Umans gateway-owned web search requests.
+### Fixed
+- A single MCP tool whose input schema can't be emitted as a valid strict tool schema for the active provider no longer fails the whole turn with HTTP 400. `convertTools` (openai-responses) now validates each tool's emitted parameter schema for `enum`/`const`-vs-`type` contradictions that pass structural JSON-Schema validation but the provider rejects — e.g. a non-null `enum` on a `type: "null"` node, or an `enum` on an `array` node — and quarantines just the offending tool with a `logger.warn` naming the tool and schema path, keeping every other tool usable. Adds `findStrictToolSchemaViolation` to `@oh-my-pi/pi-ai/utils/schema` ([#2652](https://github.com/can1357/oh-my-pi/issues/2652))
+- Fixed OpenAI Responses-compatible streams from Ollama/local hosts dropping arguments for parallel tool calls whose deltas use `fc_<call_id>` item ids, which left earlier `ast_grep` calls with `{}` and failed validation. ([#2715](https://github.com/can1357/oh-my-pi/issues/2715))
+- Fixed dialect transcript rendering so literal thinking envelopes are unwrapped before adding the dialect's own thinking tags, preventing nested `<thinking>` output in advisor raw dumps ([#2700](https://github.com/can1357/oh-my-pi/issues/2700)).
+- Fixed Anthropic-compatible Umans requests escaping client tool names and forwarding gateway web search headers so Kimi answers normally instead of returning raw gateway search results.
+- Fixed Google Gemini tool calls with `toolChoice: "auto"` serializing an explicit `toolConfig` AUTO mode, which can cause Gemini-3 models to leak raw planning JSON instead of executing tools. ([#2776](https://github.com/can1357/oh-my-pi/issues/2776))
+- Fixed OpenAI-compatible Ollama completions that return empty `finish_reason:length` after filling `num_ctx` so they surface an actionable context-window error instead of an empty length stop. ([#2774](https://github.com/can1357/oh-my-pi/issues/2774))
+- Fixed Codex browser login issuing credentials for the `opencode` OAuth originator while OMP requests identify as `pi`, which could make the first authenticated Codex request return 401 ([#2696](https://github.com/can1357/oh-my-pi/issues/2696)).
+## [16.0.1] - 2026-06-15
+### Added
+- Added Umans AI Coding Plan API-key login support and `UMANS_AI_CODING_PLAN_API_KEY` environment fallback ([#2636](https://github.com/can1357/oh-my-pi/pull/2636) by [@oldschoola](https://github.com/oldschoola)).
+### Fixed
+- Fixed OpenAI Responses, Azure OpenAI Responses, and Codex Responses providers ignoring async `onPayload` replacement bodies. Provider payload hooks can now transform the actual request body sent upstream, matching the Anthropic/Gemini replacement contract.
+- Fixed OpenAI-compatible chat-completions streams that send object-shaped tool arguments in fragments by deep-merging nested objects and task arrays instead of replacing earlier chunks. ([#2617](https://github.com/can1357/oh-my-pi/issues/2617))
+- Fixed OpenAI Responses strict-mode tool schema normalization for nullable enum MCP parameters so enum constraints are distributed to matching `anyOf` branches instead of being copied onto the `null` branch. ([#1835](https://github.com/can1357/oh-my-pi/issues/1835))
+- Fixed Cursor provider formatting tool errors with the same `[Tool Result]` prefix as successful results, causing Composer models to misinterpret error messages (e.g. "Pattern must not be empty") as directives over long conversations. Errors now use a `[Tool Error]` prefix so the model can distinguish failures from successes in the prompt history. ([#1853](https://github.com/can1357/oh-my-pi/pull/1853))
+- Fixed `validateToolArguments` silently accepting JSON-encoded array strings (e.g. `'["a","b"]'`) against `union(string, array<string>)` schemas — providers that double-serialize tool-call arguments (Z.AI / GLM) caused tools like `search` to receive the literal `["a","b"]` as a single path, producing zero matches (single element) or glob parse errors (multi-element). A new pre-validation pass parses JSON-array-shaped strings when the schema explicitly accepts both shapes. ([#1788](https://github.com/can1357/oh-my-pi/issues/1788))
+- Fixed Anthropic thinking summaries that arrive wrapped in literal `<thinking>` tags so advisor/raw transcript dumps do not render nested thinking tags ([#2695](https://github.com/can1357/oh-my-pi/issues/2695)).
 ## [16.0.0] - 2026-06-15
 ### Breaking Changes

package/README.md CHANGED Viewed

@@ -68,6 +68,7 @@ Unified LLM API with automatic model discovery, provider configuration, token an
 - **Kilo Gateway** (supports OAuth `/login kilo` or `KILO_API_KEY`)
 - **LiteLLM** (requires `LITELLM_API_KEY`)
 - **zAI** (requires `ZAI_API_KEY`)
+- **Umans AI Coding Plan** (supports `/login umans` or `UMANS_AI_CODING_PLAN_API_KEY`)
 - **MiniMax Token Plan** (requires `MINIMAX_CODE_API_KEY` or `MINIMAX_CODE_CN_API_KEY`)
 - **Xiaomi MiMo** (requires `XIAOMI_API_KEY`)
 - **ZenMux** (requires `ZENMUX_API_KEY`)
@@ -952,6 +953,7 @@ In Node.js environments, you can set environment variables to avoid passing API
 | Ollama Cloud   | `OLLAMA_CLOUD_API_KEY`                                                     |
 | Qwen Portal    | `QWEN_OAUTH_TOKEN` or `QWEN_PORTAL_API_KEY`                                  |
 | zAI            | `ZAI_API_KEY`                                                                |
+| Umans AI Coding Plan | `UMANS_AI_CODING_PLAN_API_KEY`                                           |
 | MiniMax Code   | `MINIMAX_CODE_API_KEY` (international) or `MINIMAX_CODE_CN_API_KEY` (China) |
 | Xiaomi MiMo    | `XIAOMI_API_KEY`                                                             |
 | ZenMux         | `ZENMUX_API_KEY`                                                             |
@@ -978,6 +980,7 @@ Provider endpoint defaults for the current OpenAI-compatible integrations:
 - Xiaomi MiMo: `https://api.xiaomimimo.com/anthropic`
 - ZenMux (OpenAI): `https://zenmux.ai/api/v1`
 - ZenMux (Anthropic models): `https://zenmux.ai/api/anthropic`
+- Umans AI Coding Plan: `https://api.code.umans.ai`
 - vLLM: `http://127.0.0.1:8000/v1`
 - Ollama: local OpenAI-compatible runtime (`http://127.0.0.1:11434/v1`)
 - Ollama Cloud: native Ollama API host (`https://ollama.com/api`, configured here as base URL `https://ollama.com`)

package/dist/types/providers/anthropic-client.d.ts CHANGED Viewed

@@ -8,6 +8,8 @@ export interface AnthropicRequestOptions {
     timeout?: number;
     /** Per-request retry budget override. */
     maxRetries?: number;
+    /** Per-request headers merged after client defaults. */
+    headers?: Record<string, string>;
 }
 /**
  * Extra `RequestInit` fields merged into every fetch call. Bun extends

package/dist/types/providers/openai-responses.d.ts CHANGED Viewed

@@ -1,6 +1,6 @@
-import type { Model, ServiceTier, StreamFunction, StreamOptions, Tool, ToolChoice } from "../types";
+import type { Context, Model, ProviderSessionState, ServiceTier, StreamFunction, StreamOptions, Tool, ToolChoice } from "../types";
 import { type OpenAIResponsesToolChoice } from "../utils/tool-choice";
-import type { Tool as OpenAITool } from "./openai-responses-wire";
+import type { Tool as OpenAITool, ResponseCreateParamsStreaming, ResponseInput } from "./openai-responses-wire";
 export declare function normalizeOpenAIResponsesPromptCacheKey(sessionId: string | undefined): string | undefined;
 export interface OpenAIResponsesOptions extends StreamOptions {
     reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
@@ -56,11 +56,46 @@ export interface OpenAIResponsesOptions extends StreamOptions {
      */
     extraBody?: Record<string, unknown>;
 }
+interface OpenAIResponsesProviderSessionState extends ProviderSessionState {
+    nativeHistoryReplayWarmed: boolean;
+    /** Stateful `previous_response_id` chain baselines, keyed by baseUrl/model/session. */
+    chains: Map<string, OpenAIResponsesChainState>;
+}
+interface OpenAIResponsesChainState {
+    /**
+     * Wire params of the last successful turn, with per-turn trailing
+     * scaffolding stripped from `input` (never carries previous_response_id).
+     */
+    lastParams?: OpenAIResponsesSamplingParams;
+    lastResponseId?: string;
+    /** Output items of the last response, in replay-sanitized form (matches next-turn input). */
+    lastResponseItems?: ResponseInput;
+    canAppend: boolean;
+    /** Consecutive stale-previous-response failures; reset on a successful chained completion. */
+    staleFailures: number;
+    /** Set once chaining is judged unsupported for this session (circuit breaker). */
+    disabled: boolean;
+}
+type OpenAIResponsesSamplingParams = ResponseCreateParamsStreaming & {
+    top_p?: number;
+    top_k?: number;
+    min_p?: number;
+    presence_penalty?: number;
+    repetition_penalty?: number;
+    stream_options?: {
+        include_obfuscation?: boolean;
+    };
+};
 /**
  * Generate function for OpenAI Responses API
  */
 export declare const streamOpenAIResponses: StreamFunction<"openai-responses">;
 export declare function getOpenAIResponsesCacheSessionId(options: Pick<OpenAIResponsesOptions, "cacheRetention" | "sessionId" | "promptCacheKey"> | undefined): string | undefined;
+/** @internal Exported for tests. */
+export declare function buildParams(model: Model<"openai-responses">, context: Context, options: OpenAIResponsesOptions | undefined, providerSessionState: OpenAIResponsesProviderSessionState | undefined): {
+    params: OpenAIResponsesSamplingParams;
+    trailingScaffoldingItems: number;
+};
 /**
  * Whether this model should get the OpenAI custom-tool grammar variant
  * for `apply_patch`. The generated model catalog sets
@@ -72,4 +107,5 @@ export declare function supportsFreeformApplyPatch(model: Model<"openai-response
 /** @internal Exported for tests. */
 export declare function mapOpenAIResponsesToolChoiceForTools(choice: ToolChoice | undefined, tools: Tool[], model: Model<"openai-responses">): OpenAIResponsesToolChoice;
 /** @internal Exported for tests. */
-export declare function convertTools(tools: Tool[], strictMode: boolean, model: Model<"openai-responses">): OpenAITool[];
+export declare function convertTools(tools: Tool[], strictMode: boolean, model: Model<"openai-responses">, onQuarantine?: (toolName: string, schemaPath: string) => void): OpenAITool[];
+export {};

package/dist/types/registry/oauth/openai-codex.d.ts CHANGED Viewed

@@ -1,10 +1,20 @@
+/**
+ * OpenAI Codex (ChatGPT OAuth) flow — browser and device-code flows.
+ */
 import type { OAuthController, OAuthCredentials } from "./types";
 export declare function decodeJwt<T = Record<string, unknown>>(token: string): T | null;
+/** Builds the Codex browser OAuth URL used by browser login; exported for auth regression tests. */
+export declare function createOpenAICodexAuthorizationUrl(args: {
+    state: string;
+    redirectUri: string;
+    challenge: string;
+    originator?: string;
+}): string;
 /**
  * Login with OpenAI Codex OAuth
  */
 export type OpenAICodexLoginOptions = OAuthController & {
-    /** Optional originator value for OpenAI Codex OAuth. Default: "opencode". */
+    /** Optional originator value for OpenAI Codex OAuth. Default matches OMP Codex request headers. */
     originator?: string;
 };
 export declare function loginOpenAICodex(options: OpenAICodexLoginOptions): Promise<OAuthCredentials>;

package/dist/types/registry/registry.d.ts CHANGED Viewed

@@ -208,6 +208,10 @@ declare const ALL: ({
     readonly id: "together";
     readonly name: "Together";
     readonly login: (cb: Parameters<typeof import("./together").loginTogether>[0]) => Promise<string>;
+} | {
+    readonly id: "umans";
+    readonly name: "Umans AI Coding Plan";
+    readonly login: (cb: import("./oauth").OAuthLoginCallbacks) => Promise<string>;
 } | {
     readonly id: "venice";
     readonly name: "Venice";

package/dist/types/registry/umans.d.ts ADDED Viewed

@@ -0,0 +1,7 @@
+import type { OAuthLoginCallbacks } from "./oauth/types";
+export declare const loginUmans: (options: import("./oauth").OAuthController) => Promise<string>;
+export declare const umansProvider: {
+    readonly id: "umans";
+    readonly name: "Umans AI Coding Plan";
+    readonly login: (cb: OAuthLoginCallbacks) => Promise<string>;
+};

package/dist/types/utils/overflow.d.ts CHANGED Viewed

@@ -25,11 +25,12 @@ import type { AssistantMessage } from "../types";
  * - Kimi For Coding: "exceeded model token limit: X (requested: Y)"
  * - Anthropic 413: "request_too_large" (request body exceeds size limit)
  * - HTTP 413: "Payload Too Large" / "Request Entity Too Large"
+ * - Ollama OpenAI-compatible: "prompt filled the context window"
  *
  * **Unreliable detection:**
  * - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),
  *   sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.
- * - Ollama: Silently truncates input without error. Cannot be detected via this function.
+ * - Ollama native: Silently truncates input without error. Cannot be detected via this function.
  *   The response will have usage.input < expected, but we don't know the expected value.
  *
  * ## Custom Providers

package/dist/types/utils/schema/index.d.ts CHANGED Viewed

@@ -8,6 +8,7 @@ export * from "./json-schema-validator";
 export * from "./meta-validator";
 export * from "./normalize";
 export * from "./spill";
+export * from "./strict-tool-validation";
 export * from "./types";
 export * from "./typescript";
 export * from "./wire";

package/dist/types/utils/schema/strict-tool-validation.d.ts ADDED Viewed

@@ -0,0 +1,16 @@
+/**
+ * Detects tool-parameter schemas that pass structural JSON-Schema validation
+ * (so {@link isValidJsonSchema} accepts them) yet make OpenAI-style providers
+ * reject the whole request with HTTP 400 — namely an `enum`/`const` whose
+ * value(s) cannot satisfy the node's declared `type`. MCP servers emit these
+ * when a nullable/array branch is built incorrectly (e.g. a non-null `enum`
+ * copied onto a `type: "null"` branch, or an `enum` placed on an `array`
+ * schema instead of its `items`). One such tool 400s the entire turn, so
+ * callers quarantine just the offending tool. See issue #2652.
+ */
+/**
+ * Walk a tool parameter schema for OpenAI-strict `enum`/`const`-vs-`type`
+ * contradictions. Returns a JSON-pointer-ish path to the first offending node,
+ * or `null` when the schema is safe to emit.
+ */
+export declare function findStrictToolSchemaViolation(schema: unknown, path?: string): string | null;

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
 	"type": "module",
 	"name": "@oh-my-pi/pi-ai",
-	"version": "16.0.0",
+	"version": "16.0.2",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"homepage": "https://omp.sh",
 	"author": "Can Boluk",
@@ -38,8 +38,8 @@
 	},
 	"dependencies": {
 		"@bufbuild/protobuf": "^2.12.0",
-		"@oh-my-pi/pi-catalog": "16.0.0",
-		"@oh-my-pi/pi-utils": "16.0.0",
+		"@oh-my-pi/pi-catalog": "16.0.2",
+		"@oh-my-pi/pi-utils": "16.0.2",
 		"partial-json": "^0.1.7",
 		"zod": "^4"
 	},

package/src/dialect/rendering.ts CHANGED Viewed

@@ -157,9 +157,64 @@ export function messageContentText(
 	return text;
 }
+function isAsciiWhitespace(code: number): boolean {
+	return code === 9 || code === 10 || code === 11 || code === 12 || code === 13 || code === 32;
+}
+function trimAsciiStart(text: string, start: number, end: number): number {
+	let cursor = start;
+	while (cursor < end && isAsciiWhitespace(text.charCodeAt(cursor))) cursor++;
+	return cursor;
+}
+function trimAsciiEnd(text: string, start: number, end: number): number {
+	let cursor = end;
+	while (cursor > start && isAsciiWhitespace(text.charCodeAt(cursor - 1))) cursor--;
+	return cursor;
+}
+function findDelimitedThinkingClose(open: string, close: string, text: string, start: number, end: number): number {
+	let depth = 1;
+	let cursor = start;
+	while (cursor < end) {
+		const nextClose = text.indexOf(close, cursor);
+		if (nextClose < 0 || nextClose >= end) return -1;
+		const nextOpen = text.indexOf(open, cursor);
+		if (nextOpen >= 0 && nextOpen < nextClose) {
+			depth++;
+			cursor = nextOpen + open.length;
+			continue;
+		}
+		depth--;
+		if (depth === 0) return nextClose;
+		cursor = nextClose + close.length;
+	}
+	return -1;
+}
+function unwrapDelimitedThinking(open: string, close: string, text: string): string {
+	const end = trimAsciiEnd(text, 0, text.length);
+	let cursor = trimAsciiStart(text, 0, end);
+	if (cursor >= end || !text.startsWith(open, cursor)) return text;
+	const segments: string[] = [];
+	while (cursor < end) {
+		if (!text.startsWith(open, cursor)) return text;
+		const innerStart = cursor + open.length;
+		const innerEnd = findDelimitedThinkingClose(open, close, text, innerStart, end);
+		if (innerEnd < 0) return text;
+		const trimmedInnerEnd = trimAsciiEnd(text, innerStart, innerEnd);
+		const trimmedInnerStart = trimAsciiStart(text, innerStart, trimmedInnerEnd);
+		segments.push(unwrapDelimitedThinking(open, close, text.slice(trimmedInnerStart, trimmedInnerEnd)));
+		cursor = trimAsciiStart(text, innerEnd + close.length, end);
+	}
+	return segments.join("\n");
+}
 export function renderDelimitedThinking(open: string, close: string, text: string): string {
 	if (!text) return "";
-	return `${open}\n${text}\n${close}`;
+	return `${open}\n${unwrapDelimitedThinking(open, close, text)}\n${close}`;
 }
 export function chatMlTurn(role: "assistant" | "system" | "tool" | "user", body: string): string {

package/src/providers/anthropic-client.ts CHANGED Viewed

@@ -39,6 +39,8 @@ export interface AnthropicRequestOptions {
 	timeout?: number;
 	/** Per-request retry budget override. */
 	maxRetries?: number;
+	/** Per-request headers merged after client defaults. */
+	headers?: Record<string, string>;
 }
 /**
@@ -217,7 +219,7 @@ export class AnthropicMessagesClient implements AnthropicMessagesClientLike {
 		return new AnthropicApiRequest(() => this.#send(path, params, options));
 	}
-	#buildHeaders(): Record<string, string> {
+	#buildHeaders(requestHeaders?: Record<string, string>): Record<string, string> {
 		const opts = this.#options;
 		const defaults = opts.defaultHeaders ?? {};
 		const headers: Record<string, string> = {};
@@ -228,6 +230,7 @@ export class AnthropicMessagesClient implements AnthropicMessagesClientLike {
 			headers.Authorization = `Bearer ${opts.authToken}`;
 		}
 		Object.assign(headers, defaults);
+		Object.assign(headers, requestHeaders);
 		return headers;
 	}
@@ -242,7 +245,7 @@ export class AnthropicMessagesClient implements AnthropicMessagesClientLike {
 		const timeoutMs = options?.timeout ?? opts.timeout ?? DEFAULT_TIMEOUT_MS;
 		const maxRetries = Math.max(0, options?.maxRetries ?? opts.maxRetries ?? DEFAULT_MAX_RETRIES);
 		const url = `${opts.baseURL ?? "https://api.anthropic.com"}${path}`;
-		const headers = this.#buildHeaders();
+		const headers = this.#buildHeaders(options?.headers);
 		const body = JSON.stringify(params);
 		for (let attempt = 0; ; attempt++) {

package/src/providers/anthropic.ts CHANGED Viewed

@@ -704,6 +704,8 @@ export function resolveAnthropicMetadataUserId(
 	return generateClaudeJsonUserId(sessionId, accountId);
 }
 const ANTHROPIC_BUILTIN_TOOL_NAMES = new Set(["web_search", "code_execution", "text_editor", "computer"]);
+const UMANS_WEBSEARCH_PROVIDER_HEADER = "X-Umans-Websearch-Provider";
+const UMANS_WEBSEARCH_TOOL_NAME = "web_search";
 export const applyClaudeToolPrefix = (name: string): string => {
 	if (!claudeToolPrefix) return name;
 	if (ANTHROPIC_BUILTIN_TOOL_NAMES.has(name.toLowerCase())) return name;
@@ -721,6 +723,50 @@ export const stripClaudeToolPrefix = (name: string): string => {
 	return name.slice(claudeToolPrefix.length);
 };
+function normalizeUmansWebSearchProvider(value: string | undefined): "native" | "exa" | undefined {
+	const normalized = value?.trim().toLowerCase();
+	return normalized === "native" || normalized === "exa" ? normalized : undefined;
+}
+function getUmansWebSearchProvider(headers: Record<string, string> | undefined): "native" | "exa" | undefined {
+	const explicit = getHeaderCaseInsensitive(headers, UMANS_WEBSEARCH_PROVIDER_HEADER);
+	if (explicit !== undefined) return normalizeUmansWebSearchProvider(explicit);
+	return normalizeUmansWebSearchProvider($env.UMANS_WEBSEARCH_PROVIDER);
+}
+function isUmansAnthropicModel(model: Model<"anthropic-messages">): boolean {
+	return model.provider === "umans" || model.baseUrl.toLowerCase().includes("api.code.umans.ai");
+}
+function getUmansWebSearchHeader(
+	model: Model<"anthropic-messages">,
+	headers: Record<string, string> | undefined,
+): Record<string, string> | undefined {
+	if (!isUmansAnthropicModel(model)) return undefined;
+	const provider = getUmansWebSearchProvider(headers);
+	return provider ? { [UMANS_WEBSEARCH_PROVIDER_HEADER]: provider } : undefined;
+}
+function shouldUseUmansGatewayWebSearch(name: string, enabled: boolean): boolean {
+	return enabled && name.toLowerCase() === UMANS_WEBSEARCH_TOOL_NAME;
+}
+function encodeAnthropicToolName(
+	name: string,
+	isOAuthToken: boolean,
+	escapeBuiltinToolNames: boolean,
+	useUmansGatewayWebSearch = false,
+): string {
+	if (shouldUseUmansGatewayWebSearch(name, useUmansGatewayWebSearch)) return name;
+	if (escapeBuiltinToolNames) return `${claudeToolPrefix}${name}`;
+	return isOAuthToken ? applyClaudeToolPrefix(name) : name;
+}
+function decodeAnthropicToolName(name: string, isOAuthToken: boolean, escapeBuiltinToolNames: boolean): string {
+	if (isOAuthToken || escapeBuiltinToolNames) return stripClaudeToolPrefix(name);
+	return name;
+}
 const ANTHROPIC_MANY_IMAGE_THRESHOLD = 20;
 const ANTHROPIC_MANY_IMAGE_MAX_DIMENSION = 2000;
@@ -1462,6 +1508,19 @@ export function isProviderRetryableError(error: unknown, provider?: string): boo
 	return isRetryableError(error);
 }
+const THINKING_ENVELOPE_OPEN = "<thinking>";
+const THINKING_ENVELOPE_CLOSE = "</thinking>";
+function unwrapAnthropicThinkingEnvelope(text: string): string | undefined {
+	let current = text.trim();
+	let stripped = false;
+	while (current.startsWith(THINKING_ENVELOPE_OPEN) && current.endsWith(THINKING_ENVELOPE_CLOSE)) {
+		current = current.slice(THINKING_ENVELOPE_OPEN.length, current.length - THINKING_ENVELOPE_CLOSE.length).trim();
+		stripped = true;
+	}
+	return stripped ? current : undefined;
+}
 function createEmptyUsage(premiumRequests?: number): Usage {
 	return {
 		input: 0,
@@ -1567,6 +1626,8 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 			let disableStrictTools =
 				(providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
 			let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
+			const mergedCallerHeaders = mergeHeaders(model.headers, options?.headers);
+			const umansGatewayWebSearchHeader = getUmansWebSearchHeader(model, mergedCallerHeaders);
 			let client: AnthropicMessagesClientLike;
 			let isOAuthToken: boolean;
@@ -1628,7 +1689,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 			}
 			const preparedContext = await prepareAnthropicManyImageContext(context, model.input.includes("image"));
 			const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
-				let nextParams = buildParams(model, preparedContext, isOAuthToken, options, disableStrictTools);
+				let nextParams = buildParams(
+					model,
+					preparedContext,
+					isOAuthToken,
+					options,
+					disableStrictTools,
+					umansGatewayWebSearchHeader !== undefined,
+				);
 				if (disableStrictTools) {
 					dropAnthropicStrictTools(nextParams);
 				}
@@ -1668,6 +1736,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 				if (block.type === "text") {
 					stream.push({ type: "text_end", contentIndex, content: block.text, partial: output });
 				} else if (block.type === "thinking") {
+					const unwrappedThinking = unwrapAnthropicThinkingEnvelope(block.thinking);
+					if (unwrappedThinking !== undefined) {
+						block.thinking = unwrappedThinking;
+						block.thinkingSignature = undefined;
+					}
 					stream.push({ type: "thinking_end", contentIndex, content: block.thinking, partial: output });
 				} else if (block.type === "toolCall") {
 					const finalJson =
@@ -1701,7 +1774,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 				// to zero even when no watchdog timeout is configured (the helper only
 				// pins it alongside a timeout; a client retry budget of 5 would otherwise
 				// multiply with PROVIDER_MAX_RETRIES into up to 66 wire attempts).
-				const requestOptions = { ...createSdkStreamRequestOptions(requestSignal, requestTimeoutMs), maxRetries: 0 };
+				const requestOptions = {
+					...createSdkStreamRequestOptions(requestSignal, requestTimeoutMs),
+					maxRetries: 0,
+					...(umansGatewayWebSearchHeader ? { headers: umansGatewayWebSearchHeader } : {}),
+				};
 				const anthropicRequest: unknown =
 					isOAuthToken && client.beta
 						? client.beta.messages.create({ ...params, stream: true }, requestOptions)
@@ -1884,9 +1961,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
 								const block: Block = {
 									type: "toolCall",
 									id: event.content_block.id,
-									name: isOAuthToken
-										? stripClaudeToolPrefix(event.content_block.name)
-										: event.content_block.name,
+									name: decodeAnthropicToolName(
+										event.content_block.name,
+										isOAuthToken,
+										model.compat.escapeBuiltinToolNames,
+									),
 									arguments: event.content_block.input ?? {},
 									partialJson: "",
 									index: event.index,
@@ -2361,7 +2440,13 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
 		isOAuth: oauthToken,
 		extraBetas: betaFeatures,
 		stream,
-		modelHeaders: mergeHeaders(model.headers, foundryCustomHeaders, headers, dynamicHeaders),
+		modelHeaders: mergeHeaders(
+			model.headers,
+			foundryCustomHeaders,
+			getUmansWebSearchHeader(model, mergeHeaders(model.headers, headers)),
+			headers,
+			dynamicHeaders,
+		),
 		isCloudflareAiGateway: model.provider === "cloudflare-ai-gateway",
 		claudeCodeSessionId,
 		claudeCodeBetas: oauthToken
@@ -2382,10 +2467,9 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
 		};
 	}
-	// OpenCode Go's Anthropic-compatible gateway validates API-key auth through
-	// `X-Api-Key`; bearer-only requests reach the endpoint but return
-	// `Missing API key` before token validation.
-	if (model.provider === "opencode-go") {
+	// OpenCode Go and Umans validate Anthropic-compatible API-key auth through
+	// `X-Api-Key`; bearer-only requests reach the endpoint but fail auth.
+	if (model.provider === "opencode-go" || model.provider === "umans") {
 		delete defaultHeaders.Authorization;
 		return {
 			isOAuthToken: false,
@@ -2729,6 +2813,7 @@ function buildParams(
 	isOAuthToken: boolean,
 	options?: AnthropicOptions,
 	disableStrictTools = false,
+	useUmansGatewayWebSearch = false,
 ): MessageCreateParamsStreaming {
 	const { cacheControl } = getCacheControl(model, options?.cacheRetention, isOAuthToken);
@@ -2750,6 +2835,8 @@ function buildParams(
 			isOAuthToken,
 			disableStrictTools || model.provider === "github-copilot",
 			model.compat.supportsEagerToolInputStreaming,
+			model.compat.escapeBuiltinToolNames,
+			useUmansGatewayWebSearch,
 		);
 	} else if (isOAuthToken) {
 		tools = [];
@@ -2875,10 +2962,16 @@ function buildParams(
 	if (options?.toolChoice) {
 		if (typeof options.toolChoice === "string") {
 			params.tool_choice = { type: options.toolChoice };
-		} else if (isOAuthToken && options.toolChoice.name) {
-			params.tool_choice = { ...options.toolChoice, name: applyClaudeToolPrefix(options.toolChoice.name) };
-		} else {
-			params.tool_choice = options.toolChoice;
+		} else if (options.toolChoice.name) {
+			params.tool_choice = {
+				...options.toolChoice,
+				name: encodeAnthropicToolName(
+					options.toolChoice.name,
+					isOAuthToken,
+					model.compat.escapeBuiltinToolNames,
+					useUmansGatewayWebSearch,
+				),
+			};
 		}
 		// Claude Fable/Mythos 5 reject forced tool use outright ("tool_choice forces
 		// tool use is not compatible with this model"). Downgrade any/tool → auto so the
@@ -3083,7 +3176,7 @@ export function convertAnthropicMessages(
 					blocks.push({
 						type: "tool_use",
 						id: block.id,
-						name: isOAuthToken ? applyClaudeToolPrefix(block.name) : block.name,
+						name: encodeAnthropicToolName(block.name, isOAuthToken, model.compat.escapeBuiltinToolNames),
 						// Always sanitize: the model itself can emit lone-surrogate escapes
 						// in tool-argument JSON (streamed out fine, rejected with a 400 on
 						// replay by Anthropic's strict UTF-8 validation). toWellFormedDeep
@@ -3669,6 +3762,8 @@ function convertTools(
 	isOAuthToken: boolean,
 	disableStrictTools = false,
 	supportsEagerToolInputStreaming = true,
+	escapeBuiltinToolNames = false,
+	useUmansGatewayWebSearch = false,
 ): AnthropicWireTool[] {
 	if (!tools) return [];
 	const schemaPlans = buildAnthropicToolSchemaPlans(tools, disableStrictTools);
@@ -3676,7 +3771,7 @@ function convertTools(
 	return tools.map((tool, index) => {
 		const plan = schemaPlans[index];
 		const baseTool = {
-			name: isOAuthToken ? applyClaudeToolPrefix(tool.name) : tool.name,
+			name: encodeAnthropicToolName(tool.name, isOAuthToken, escapeBuiltinToolNames, useUmansGatewayWebSearch),
 			description: tool.description || "",
 			input_schema: plan.inputSchema,
 		};

package/src/providers/azure-openai-responses.ts CHANGED Viewed

@@ -139,8 +139,11 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
 		try {
 			const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
 			const { url, headers } = buildAzureResponsesRequest(model, apiKey, options);
-			const params = buildParams(model, context, options, deploymentName);
-			options?.onPayload?.(params);
+			let params = buildParams(model, context, options, deploymentName);
+			const replacementPayload = await options?.onPayload?.(params, model);
+			if (replacementPayload !== undefined) {
+				params = replacementPayload as typeof params;
+			}
 			const idleTimeoutMs = options?.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs();
 			const firstEventTimeoutMs =
 				options?.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);

package/src/providers/cursor.ts CHANGED Viewed

@@ -2335,9 +2335,10 @@ function buildRootPromptMessagesJson(
 		} else if (msg.role === "toolResult") {
 			const text = toolResultToText(msg);
 			if (!text) continue;
+			const prefix = msg.isError ? "[Tool Error]" : "[Tool Result]";
 			pushJson({
 				role: "user",
-				content: [{ type: "text", text: `[Tool Result]\n${text}` }],
+				content: [{ type: "text", text: `${prefix}\n${text}` }],
 			});
 		}
 	}
@@ -2415,10 +2416,11 @@ function buildConversationTurns(
 				// Include tool results as assistant text for context
 				const text = toolResultToText(stepMsg);
 				if (text) {
+					const prefix = stepMsg.isError ? "[Tool Error]" : "[Tool Result]";
 					const step = create(ConversationStepSchema, {
 						message: {
 							case: "assistantMessage",
-							value: create(AssistantMessageSchema, { text: `[Tool Result]\n${text}` }),
+							value: create(AssistantMessageSchema, { text: `${prefix}\n${text}` }),
 						},
 					});
 					stepBlobIds.push(storeCursorBlob(blobStore, toBinary(ConversationStepSchema, step)));

package/src/providers/google-shared.ts CHANGED Viewed

@@ -793,9 +793,12 @@ export function buildGoogleGenerateContentParams<T extends "google-generative-ai
 	if (context.tools && context.tools.length > 0 && options.toolChoice) {
 		const choice = options.toolChoice;
 		if (typeof choice === "string") {
-			config.toolConfig = {
-				functionCallingConfig: { mode: mapToolChoice(choice) },
-			};
+			const mode = mapToolChoice(choice);
+			if (mode !== "AUTO") {
+				config.toolConfig = {
+					functionCallingConfig: { mode },
+				};
+			}
 		} else {
 			// Named-tool routing — `mode: "ANY"` plus an explicit allow-list. The
 			// caller is responsible for ensuring the names exist in `context.tools`.