@oh-my-pi/pi-ai 16.0.0 → 16.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -0
- package/README.md +3 -0
- package/dist/types/providers/anthropic-client.d.ts +2 -0
- package/dist/types/providers/openai-responses.d.ts +39 -3
- package/dist/types/registry/oauth/openai-codex.d.ts +11 -1
- package/dist/types/registry/registry.d.ts +4 -0
- package/dist/types/registry/umans.d.ts +7 -0
- package/dist/types/utils/overflow.d.ts +2 -1
- package/dist/types/utils/schema/index.d.ts +1 -0
- package/dist/types/utils/schema/strict-tool-validation.d.ts +16 -0
- package/package.json +3 -3
- package/src/dialect/rendering.ts +56 -1
- package/src/providers/anthropic-client.ts +5 -2
- package/src/providers/anthropic.ts +111 -16
- package/src/providers/azure-openai-responses.ts +5 -2
- package/src/providers/cursor.ts +4 -2
- package/src/providers/google-shared.ts +6 -3
- package/src/providers/openai-codex-responses.ts +20 -5
- package/src/providers/openai-completions.ts +121 -19
- package/src/providers/openai-responses-shared.ts +70 -13
- package/src/providers/openai-responses.ts +65 -15
- package/src/registry/oauth/openai-codex.ts +30 -13
- package/src/registry/registry.ts +2 -0
- package/src/registry/umans.ts +23 -0
- package/src/utils/overflow.ts +5 -2
- package/src/utils/schema/index.ts +1 -0
- package/src/utils/schema/normalize.ts +40 -3
- package/src/utils/schema/strict-tool-validation.ts +117 -0
- package/src/utils/schema/wire.ts +18 -3
- package/src/utils/validation.ts +159 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,37 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [16.0.2] - 2026-06-16
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Added `UMANS_WEBSEARCH_PROVIDER=native|exa` support for routing Umans gateway-owned web search requests.
|
|
10
|
+
|
|
11
|
+
### Fixed
|
|
12
|
+
|
|
13
|
+
- A single MCP tool whose input schema can't be emitted as a valid strict tool schema for the active provider no longer fails the whole turn with HTTP 400. `convertTools` (openai-responses) now validates each tool's emitted parameter schema for `enum`/`const`-vs-`type` contradictions that pass structural JSON-Schema validation but the provider rejects — e.g. a non-null `enum` on a `type: "null"` node, or an `enum` on an `array` node — and quarantines just the offending tool with a `logger.warn` naming the tool and schema path, keeping every other tool usable. Adds `findStrictToolSchemaViolation` to `@oh-my-pi/pi-ai/utils/schema` ([#2652](https://github.com/can1357/oh-my-pi/issues/2652))
|
|
14
|
+
- Fixed OpenAI Responses-compatible streams from Ollama/local hosts dropping arguments for parallel tool calls whose deltas use `fc_<call_id>` item ids, which left earlier `ast_grep` calls with `{}` and failed validation. ([#2715](https://github.com/can1357/oh-my-pi/issues/2715))
|
|
15
|
+
- Fixed dialect transcript rendering so literal thinking envelopes are unwrapped before adding the dialect's own thinking tags, preventing nested `<thinking>` output in advisor raw dumps ([#2700](https://github.com/can1357/oh-my-pi/issues/2700)).
|
|
16
|
+
- Fixed Anthropic-compatible Umans requests escaping client tool names and forwarding gateway web search headers so Kimi answers normally instead of returning raw gateway search results.
|
|
17
|
+
- Fixed Google Gemini tool calls with `toolChoice: "auto"` serializing an explicit `toolConfig` AUTO mode, which can cause Gemini-3 models to leak raw planning JSON instead of executing tools. ([#2776](https://github.com/can1357/oh-my-pi/issues/2776))
|
|
18
|
+
- Fixed OpenAI-compatible Ollama completions that return empty `finish_reason:length` after filling `num_ctx` so they surface an actionable context-window error instead of an empty length stop. ([#2774](https://github.com/can1357/oh-my-pi/issues/2774))
|
|
19
|
+
- Fixed Codex browser login issuing credentials for the `opencode` OAuth originator while OMP requests identify as `pi`, which could make the first authenticated Codex request return 401 ([#2696](https://github.com/can1357/oh-my-pi/issues/2696)).
|
|
20
|
+
|
|
21
|
+
## [16.0.1] - 2026-06-15
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
|
|
25
|
+
- Added Umans AI Coding Plan API-key login support and `UMANS_AI_CODING_PLAN_API_KEY` environment fallback ([#2636](https://github.com/can1357/oh-my-pi/pull/2636) by [@oldschoola](https://github.com/oldschoola)).
|
|
26
|
+
|
|
27
|
+
### Fixed
|
|
28
|
+
|
|
29
|
+
- Fixed OpenAI Responses, Azure OpenAI Responses, and Codex Responses providers ignoring async `onPayload` replacement bodies. Provider payload hooks can now transform the actual request body sent upstream, matching the Anthropic/Gemini replacement contract.
|
|
30
|
+
- Fixed OpenAI-compatible chat-completions streams that send object-shaped tool arguments in fragments by deep-merging nested objects and task arrays instead of replacing earlier chunks. ([#2617](https://github.com/can1357/oh-my-pi/issues/2617))
|
|
31
|
+
- Fixed OpenAI Responses strict-mode tool schema normalization for nullable enum MCP parameters so enum constraints are distributed to matching `anyOf` branches instead of being copied onto the `null` branch. ([#1835](https://github.com/can1357/oh-my-pi/issues/1835))
|
|
32
|
+
- Fixed Cursor provider formatting tool errors with the same `[Tool Result]` prefix as successful results, causing Composer models to misinterpret error messages (e.g. "Pattern must not be empty") as directives over long conversations. Errors now use a `[Tool Error]` prefix so the model can distinguish failures from successes in the prompt history. ([#1853](https://github.com/can1357/oh-my-pi/pull/1853))
|
|
33
|
+
- Fixed `validateToolArguments` silently accepting JSON-encoded array strings (e.g. `'["a","b"]'`) against `union(string, array<string>)` schemas — providers that double-serialize tool-call arguments (Z.AI / GLM) caused tools like `search` to receive the literal `["a","b"]` as a single path, producing zero matches (single element) or glob parse errors (multi-element). A new pre-validation pass parses JSON-array-shaped strings when the schema explicitly accepts both shapes. ([#1788](https://github.com/can1357/oh-my-pi/issues/1788))
|
|
34
|
+
- Fixed Anthropic thinking summaries that arrive wrapped in literal `<thinking>` tags so advisor/raw transcript dumps do not render nested thinking tags ([#2695](https://github.com/can1357/oh-my-pi/issues/2695)).
|
|
35
|
+
|
|
5
36
|
## [16.0.0] - 2026-06-15
|
|
6
37
|
|
|
7
38
|
### Breaking Changes
|
package/README.md
CHANGED
|
@@ -68,6 +68,7 @@ Unified LLM API with automatic model discovery, provider configuration, token an
|
|
|
68
68
|
- **Kilo Gateway** (supports OAuth `/login kilo` or `KILO_API_KEY`)
|
|
69
69
|
- **LiteLLM** (requires `LITELLM_API_KEY`)
|
|
70
70
|
- **zAI** (requires `ZAI_API_KEY`)
|
|
71
|
+
- **Umans AI Coding Plan** (supports `/login umans` or `UMANS_AI_CODING_PLAN_API_KEY`)
|
|
71
72
|
- **MiniMax Token Plan** (requires `MINIMAX_CODE_API_KEY` or `MINIMAX_CODE_CN_API_KEY`)
|
|
72
73
|
- **Xiaomi MiMo** (requires `XIAOMI_API_KEY`)
|
|
73
74
|
- **ZenMux** (requires `ZENMUX_API_KEY`)
|
|
@@ -952,6 +953,7 @@ In Node.js environments, you can set environment variables to avoid passing API
|
|
|
952
953
|
| Ollama Cloud | `OLLAMA_CLOUD_API_KEY` |
|
|
953
954
|
| Qwen Portal | `QWEN_OAUTH_TOKEN` or `QWEN_PORTAL_API_KEY` |
|
|
954
955
|
| zAI | `ZAI_API_KEY` |
|
|
956
|
+
| Umans AI Coding Plan | `UMANS_AI_CODING_PLAN_API_KEY` |
|
|
955
957
|
| MiniMax Code | `MINIMAX_CODE_API_KEY` (international) or `MINIMAX_CODE_CN_API_KEY` (China) |
|
|
956
958
|
| Xiaomi MiMo | `XIAOMI_API_KEY` |
|
|
957
959
|
| ZenMux | `ZENMUX_API_KEY` |
|
|
@@ -978,6 +980,7 @@ Provider endpoint defaults for the current OpenAI-compatible integrations:
|
|
|
978
980
|
- Xiaomi MiMo: `https://api.xiaomimimo.com/anthropic`
|
|
979
981
|
- ZenMux (OpenAI): `https://zenmux.ai/api/v1`
|
|
980
982
|
- ZenMux (Anthropic models): `https://zenmux.ai/api/anthropic`
|
|
983
|
+
- Umans AI Coding Plan: `https://api.code.umans.ai`
|
|
981
984
|
- vLLM: `http://127.0.0.1:8000/v1`
|
|
982
985
|
- Ollama: local OpenAI-compatible runtime (`http://127.0.0.1:11434/v1`)
|
|
983
986
|
- Ollama Cloud: native Ollama API host (`https://ollama.com/api`, configured here as base URL `https://ollama.com`)
|
|
@@ -8,6 +8,8 @@ export interface AnthropicRequestOptions {
|
|
|
8
8
|
timeout?: number;
|
|
9
9
|
/** Per-request retry budget override. */
|
|
10
10
|
maxRetries?: number;
|
|
11
|
+
/** Per-request headers merged after client defaults. */
|
|
12
|
+
headers?: Record<string, string>;
|
|
11
13
|
}
|
|
12
14
|
/**
|
|
13
15
|
* Extra `RequestInit` fields merged into every fetch call. Bun extends
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import type { Model, ServiceTier, StreamFunction, StreamOptions, Tool, ToolChoice } from "../types";
|
|
1
|
+
import type { Context, Model, ProviderSessionState, ServiceTier, StreamFunction, StreamOptions, Tool, ToolChoice } from "../types";
|
|
2
2
|
import { type OpenAIResponsesToolChoice } from "../utils/tool-choice";
|
|
3
|
-
import type { Tool as OpenAITool } from "./openai-responses-wire";
|
|
3
|
+
import type { Tool as OpenAITool, ResponseCreateParamsStreaming, ResponseInput } from "./openai-responses-wire";
|
|
4
4
|
export declare function normalizeOpenAIResponsesPromptCacheKey(sessionId: string | undefined): string | undefined;
|
|
5
5
|
export interface OpenAIResponsesOptions extends StreamOptions {
|
|
6
6
|
reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
@@ -56,11 +56,46 @@ export interface OpenAIResponsesOptions extends StreamOptions {
|
|
|
56
56
|
*/
|
|
57
57
|
extraBody?: Record<string, unknown>;
|
|
58
58
|
}
|
|
59
|
+
interface OpenAIResponsesProviderSessionState extends ProviderSessionState {
|
|
60
|
+
nativeHistoryReplayWarmed: boolean;
|
|
61
|
+
/** Stateful `previous_response_id` chain baselines, keyed by baseUrl/model/session. */
|
|
62
|
+
chains: Map<string, OpenAIResponsesChainState>;
|
|
63
|
+
}
|
|
64
|
+
interface OpenAIResponsesChainState {
|
|
65
|
+
/**
|
|
66
|
+
* Wire params of the last successful turn, with per-turn trailing
|
|
67
|
+
* scaffolding stripped from `input` (never carries previous_response_id).
|
|
68
|
+
*/
|
|
69
|
+
lastParams?: OpenAIResponsesSamplingParams;
|
|
70
|
+
lastResponseId?: string;
|
|
71
|
+
/** Output items of the last response, in replay-sanitized form (matches next-turn input). */
|
|
72
|
+
lastResponseItems?: ResponseInput;
|
|
73
|
+
canAppend: boolean;
|
|
74
|
+
/** Consecutive stale-previous-response failures; reset on a successful chained completion. */
|
|
75
|
+
staleFailures: number;
|
|
76
|
+
/** Set once chaining is judged unsupported for this session (circuit breaker). */
|
|
77
|
+
disabled: boolean;
|
|
78
|
+
}
|
|
79
|
+
type OpenAIResponsesSamplingParams = ResponseCreateParamsStreaming & {
|
|
80
|
+
top_p?: number;
|
|
81
|
+
top_k?: number;
|
|
82
|
+
min_p?: number;
|
|
83
|
+
presence_penalty?: number;
|
|
84
|
+
repetition_penalty?: number;
|
|
85
|
+
stream_options?: {
|
|
86
|
+
include_obfuscation?: boolean;
|
|
87
|
+
};
|
|
88
|
+
};
|
|
59
89
|
/**
|
|
60
90
|
* Generate function for OpenAI Responses API
|
|
61
91
|
*/
|
|
62
92
|
export declare const streamOpenAIResponses: StreamFunction<"openai-responses">;
|
|
63
93
|
export declare function getOpenAIResponsesCacheSessionId(options: Pick<OpenAIResponsesOptions, "cacheRetention" | "sessionId" | "promptCacheKey"> | undefined): string | undefined;
|
|
94
|
+
/** @internal Exported for tests. */
|
|
95
|
+
export declare function buildParams(model: Model<"openai-responses">, context: Context, options: OpenAIResponsesOptions | undefined, providerSessionState: OpenAIResponsesProviderSessionState | undefined): {
|
|
96
|
+
params: OpenAIResponsesSamplingParams;
|
|
97
|
+
trailingScaffoldingItems: number;
|
|
98
|
+
};
|
|
64
99
|
/**
|
|
65
100
|
* Whether this model should get the OpenAI custom-tool grammar variant
|
|
66
101
|
* for `apply_patch`. The generated model catalog sets
|
|
@@ -72,4 +107,5 @@ export declare function supportsFreeformApplyPatch(model: Model<"openai-response
|
|
|
72
107
|
/** @internal Exported for tests. */
|
|
73
108
|
export declare function mapOpenAIResponsesToolChoiceForTools(choice: ToolChoice | undefined, tools: Tool[], model: Model<"openai-responses">): OpenAIResponsesToolChoice;
|
|
74
109
|
/** @internal Exported for tests. */
|
|
75
|
-
export declare function convertTools(tools: Tool[], strictMode: boolean, model: Model<"openai-responses"
|
|
110
|
+
export declare function convertTools(tools: Tool[], strictMode: boolean, model: Model<"openai-responses">, onQuarantine?: (toolName: string, schemaPath: string) => void): OpenAITool[];
|
|
111
|
+
export {};
|
|
@@ -1,10 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI Codex (ChatGPT OAuth) flow — browser and device-code flows.
|
|
3
|
+
*/
|
|
1
4
|
import type { OAuthController, OAuthCredentials } from "./types";
|
|
2
5
|
export declare function decodeJwt<T = Record<string, unknown>>(token: string): T | null;
|
|
6
|
+
/** Builds the Codex browser OAuth URL used by browser login; exported for auth regression tests. */
|
|
7
|
+
export declare function createOpenAICodexAuthorizationUrl(args: {
|
|
8
|
+
state: string;
|
|
9
|
+
redirectUri: string;
|
|
10
|
+
challenge: string;
|
|
11
|
+
originator?: string;
|
|
12
|
+
}): string;
|
|
3
13
|
/**
|
|
4
14
|
* Login with OpenAI Codex OAuth
|
|
5
15
|
*/
|
|
6
16
|
export type OpenAICodexLoginOptions = OAuthController & {
|
|
7
|
-
/** Optional originator value for OpenAI Codex OAuth. Default
|
|
17
|
+
/** Optional originator value for OpenAI Codex OAuth. Default matches OMP Codex request headers. */
|
|
8
18
|
originator?: string;
|
|
9
19
|
};
|
|
10
20
|
export declare function loginOpenAICodex(options: OpenAICodexLoginOptions): Promise<OAuthCredentials>;
|
|
@@ -208,6 +208,10 @@ declare const ALL: ({
|
|
|
208
208
|
readonly id: "together";
|
|
209
209
|
readonly name: "Together";
|
|
210
210
|
readonly login: (cb: Parameters<typeof import("./together").loginTogether>[0]) => Promise<string>;
|
|
211
|
+
} | {
|
|
212
|
+
readonly id: "umans";
|
|
213
|
+
readonly name: "Umans AI Coding Plan";
|
|
214
|
+
readonly login: (cb: import("./oauth").OAuthLoginCallbacks) => Promise<string>;
|
|
211
215
|
} | {
|
|
212
216
|
readonly id: "venice";
|
|
213
217
|
readonly name: "Venice";
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { OAuthLoginCallbacks } from "./oauth/types";
|
|
2
|
+
export declare const loginUmans: (options: import("./oauth").OAuthController) => Promise<string>;
|
|
3
|
+
export declare const umansProvider: {
|
|
4
|
+
readonly id: "umans";
|
|
5
|
+
readonly name: "Umans AI Coding Plan";
|
|
6
|
+
readonly login: (cb: OAuthLoginCallbacks) => Promise<string>;
|
|
7
|
+
};
|
|
@@ -25,11 +25,12 @@ import type { AssistantMessage } from "../types";
|
|
|
25
25
|
* - Kimi For Coding: "exceeded model token limit: X (requested: Y)"
|
|
26
26
|
* - Anthropic 413: "request_too_large" (request body exceeds size limit)
|
|
27
27
|
* - HTTP 413: "Payload Too Large" / "Request Entity Too Large"
|
|
28
|
+
* - Ollama OpenAI-compatible: "prompt filled the context window"
|
|
28
29
|
*
|
|
29
30
|
* **Unreliable detection:**
|
|
30
31
|
* - z.ai: Sometimes accepts overflow silently (detectable via usage.input > contextWindow),
|
|
31
32
|
* sometimes returns rate limit errors. Pass contextWindow param to detect silent overflow.
|
|
32
|
-
* - Ollama: Silently truncates input without error. Cannot be detected via this function.
|
|
33
|
+
* - Ollama native: Silently truncates input without error. Cannot be detected via this function.
|
|
33
34
|
* The response will have usage.input < expected, but we don't know the expected value.
|
|
34
35
|
*
|
|
35
36
|
* ## Custom Providers
|
|
@@ -8,6 +8,7 @@ export * from "./json-schema-validator";
|
|
|
8
8
|
export * from "./meta-validator";
|
|
9
9
|
export * from "./normalize";
|
|
10
10
|
export * from "./spill";
|
|
11
|
+
export * from "./strict-tool-validation";
|
|
11
12
|
export * from "./types";
|
|
12
13
|
export * from "./typescript";
|
|
13
14
|
export * from "./wire";
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detects tool-parameter schemas that pass structural JSON-Schema validation
|
|
3
|
+
* (so {@link isValidJsonSchema} accepts them) yet make OpenAI-style providers
|
|
4
|
+
* reject the whole request with HTTP 400 — namely an `enum`/`const` whose
|
|
5
|
+
* value(s) cannot satisfy the node's declared `type`. MCP servers emit these
|
|
6
|
+
* when a nullable/array branch is built incorrectly (e.g. a non-null `enum`
|
|
7
|
+
* copied onto a `type: "null"` branch, or an `enum` placed on an `array`
|
|
8
|
+
* schema instead of its `items`). One such tool 400s the entire turn, so
|
|
9
|
+
* callers quarantine just the offending tool. See issue #2652.
|
|
10
|
+
*/
|
|
11
|
+
/**
|
|
12
|
+
* Walk a tool parameter schema for OpenAI-strict `enum`/`const`-vs-`type`
|
|
13
|
+
* contradictions. Returns a JSON-pointer-ish path to the first offending node,
|
|
14
|
+
* or `null` when the schema is safe to emit.
|
|
15
|
+
*/
|
|
16
|
+
export declare function findStrictToolSchemaViolation(schema: unknown, path?: string): string | null;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "16.0.
|
|
4
|
+
"version": "16.0.2",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -38,8 +38,8 @@
|
|
|
38
38
|
},
|
|
39
39
|
"dependencies": {
|
|
40
40
|
"@bufbuild/protobuf": "^2.12.0",
|
|
41
|
-
"@oh-my-pi/pi-catalog": "16.0.
|
|
42
|
-
"@oh-my-pi/pi-utils": "16.0.
|
|
41
|
+
"@oh-my-pi/pi-catalog": "16.0.2",
|
|
42
|
+
"@oh-my-pi/pi-utils": "16.0.2",
|
|
43
43
|
"partial-json": "^0.1.7",
|
|
44
44
|
"zod": "^4"
|
|
45
45
|
},
|
package/src/dialect/rendering.ts
CHANGED
|
@@ -157,9 +157,64 @@ export function messageContentText(
|
|
|
157
157
|
return text;
|
|
158
158
|
}
|
|
159
159
|
|
|
160
|
+
function isAsciiWhitespace(code: number): boolean {
|
|
161
|
+
return code === 9 || code === 10 || code === 11 || code === 12 || code === 13 || code === 32;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function trimAsciiStart(text: string, start: number, end: number): number {
|
|
165
|
+
let cursor = start;
|
|
166
|
+
while (cursor < end && isAsciiWhitespace(text.charCodeAt(cursor))) cursor++;
|
|
167
|
+
return cursor;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function trimAsciiEnd(text: string, start: number, end: number): number {
|
|
171
|
+
let cursor = end;
|
|
172
|
+
while (cursor > start && isAsciiWhitespace(text.charCodeAt(cursor - 1))) cursor--;
|
|
173
|
+
return cursor;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function findDelimitedThinkingClose(open: string, close: string, text: string, start: number, end: number): number {
|
|
177
|
+
let depth = 1;
|
|
178
|
+
let cursor = start;
|
|
179
|
+
while (cursor < end) {
|
|
180
|
+
const nextClose = text.indexOf(close, cursor);
|
|
181
|
+
if (nextClose < 0 || nextClose >= end) return -1;
|
|
182
|
+
const nextOpen = text.indexOf(open, cursor);
|
|
183
|
+
if (nextOpen >= 0 && nextOpen < nextClose) {
|
|
184
|
+
depth++;
|
|
185
|
+
cursor = nextOpen + open.length;
|
|
186
|
+
continue;
|
|
187
|
+
}
|
|
188
|
+
depth--;
|
|
189
|
+
if (depth === 0) return nextClose;
|
|
190
|
+
cursor = nextClose + close.length;
|
|
191
|
+
}
|
|
192
|
+
return -1;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function unwrapDelimitedThinking(open: string, close: string, text: string): string {
|
|
196
|
+
const end = trimAsciiEnd(text, 0, text.length);
|
|
197
|
+
let cursor = trimAsciiStart(text, 0, end);
|
|
198
|
+
if (cursor >= end || !text.startsWith(open, cursor)) return text;
|
|
199
|
+
|
|
200
|
+
const segments: string[] = [];
|
|
201
|
+
while (cursor < end) {
|
|
202
|
+
if (!text.startsWith(open, cursor)) return text;
|
|
203
|
+
const innerStart = cursor + open.length;
|
|
204
|
+
const innerEnd = findDelimitedThinkingClose(open, close, text, innerStart, end);
|
|
205
|
+
if (innerEnd < 0) return text;
|
|
206
|
+
|
|
207
|
+
const trimmedInnerEnd = trimAsciiEnd(text, innerStart, innerEnd);
|
|
208
|
+
const trimmedInnerStart = trimAsciiStart(text, innerStart, trimmedInnerEnd);
|
|
209
|
+
segments.push(unwrapDelimitedThinking(open, close, text.slice(trimmedInnerStart, trimmedInnerEnd)));
|
|
210
|
+
cursor = trimAsciiStart(text, innerEnd + close.length, end);
|
|
211
|
+
}
|
|
212
|
+
return segments.join("\n");
|
|
213
|
+
}
|
|
214
|
+
|
|
160
215
|
export function renderDelimitedThinking(open: string, close: string, text: string): string {
|
|
161
216
|
if (!text) return "";
|
|
162
|
-
return `${open}\n${text}\n${close}`;
|
|
217
|
+
return `${open}\n${unwrapDelimitedThinking(open, close, text)}\n${close}`;
|
|
163
218
|
}
|
|
164
219
|
|
|
165
220
|
export function chatMlTurn(role: "assistant" | "system" | "tool" | "user", body: string): string {
|
|
@@ -39,6 +39,8 @@ export interface AnthropicRequestOptions {
|
|
|
39
39
|
timeout?: number;
|
|
40
40
|
/** Per-request retry budget override. */
|
|
41
41
|
maxRetries?: number;
|
|
42
|
+
/** Per-request headers merged after client defaults. */
|
|
43
|
+
headers?: Record<string, string>;
|
|
42
44
|
}
|
|
43
45
|
|
|
44
46
|
/**
|
|
@@ -217,7 +219,7 @@ export class AnthropicMessagesClient implements AnthropicMessagesClientLike {
|
|
|
217
219
|
return new AnthropicApiRequest(() => this.#send(path, params, options));
|
|
218
220
|
}
|
|
219
221
|
|
|
220
|
-
#buildHeaders(): Record<string, string> {
|
|
222
|
+
#buildHeaders(requestHeaders?: Record<string, string>): Record<string, string> {
|
|
221
223
|
const opts = this.#options;
|
|
222
224
|
const defaults = opts.defaultHeaders ?? {};
|
|
223
225
|
const headers: Record<string, string> = {};
|
|
@@ -228,6 +230,7 @@ export class AnthropicMessagesClient implements AnthropicMessagesClientLike {
|
|
|
228
230
|
headers.Authorization = `Bearer ${opts.authToken}`;
|
|
229
231
|
}
|
|
230
232
|
Object.assign(headers, defaults);
|
|
233
|
+
Object.assign(headers, requestHeaders);
|
|
231
234
|
return headers;
|
|
232
235
|
}
|
|
233
236
|
|
|
@@ -242,7 +245,7 @@ export class AnthropicMessagesClient implements AnthropicMessagesClientLike {
|
|
|
242
245
|
const timeoutMs = options?.timeout ?? opts.timeout ?? DEFAULT_TIMEOUT_MS;
|
|
243
246
|
const maxRetries = Math.max(0, options?.maxRetries ?? opts.maxRetries ?? DEFAULT_MAX_RETRIES);
|
|
244
247
|
const url = `${opts.baseURL ?? "https://api.anthropic.com"}${path}`;
|
|
245
|
-
const headers = this.#buildHeaders();
|
|
248
|
+
const headers = this.#buildHeaders(options?.headers);
|
|
246
249
|
const body = JSON.stringify(params);
|
|
247
250
|
|
|
248
251
|
for (let attempt = 0; ; attempt++) {
|
|
@@ -704,6 +704,8 @@ export function resolveAnthropicMetadataUserId(
|
|
|
704
704
|
return generateClaudeJsonUserId(sessionId, accountId);
|
|
705
705
|
}
|
|
706
706
|
const ANTHROPIC_BUILTIN_TOOL_NAMES = new Set(["web_search", "code_execution", "text_editor", "computer"]);
|
|
707
|
+
const UMANS_WEBSEARCH_PROVIDER_HEADER = "X-Umans-Websearch-Provider";
|
|
708
|
+
const UMANS_WEBSEARCH_TOOL_NAME = "web_search";
|
|
707
709
|
export const applyClaudeToolPrefix = (name: string): string => {
|
|
708
710
|
if (!claudeToolPrefix) return name;
|
|
709
711
|
if (ANTHROPIC_BUILTIN_TOOL_NAMES.has(name.toLowerCase())) return name;
|
|
@@ -721,6 +723,50 @@ export const stripClaudeToolPrefix = (name: string): string => {
|
|
|
721
723
|
return name.slice(claudeToolPrefix.length);
|
|
722
724
|
};
|
|
723
725
|
|
|
726
|
+
function normalizeUmansWebSearchProvider(value: string | undefined): "native" | "exa" | undefined {
|
|
727
|
+
const normalized = value?.trim().toLowerCase();
|
|
728
|
+
return normalized === "native" || normalized === "exa" ? normalized : undefined;
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
function getUmansWebSearchProvider(headers: Record<string, string> | undefined): "native" | "exa" | undefined {
|
|
732
|
+
const explicit = getHeaderCaseInsensitive(headers, UMANS_WEBSEARCH_PROVIDER_HEADER);
|
|
733
|
+
if (explicit !== undefined) return normalizeUmansWebSearchProvider(explicit);
|
|
734
|
+
return normalizeUmansWebSearchProvider($env.UMANS_WEBSEARCH_PROVIDER);
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
function isUmansAnthropicModel(model: Model<"anthropic-messages">): boolean {
|
|
738
|
+
return model.provider === "umans" || model.baseUrl.toLowerCase().includes("api.code.umans.ai");
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
function getUmansWebSearchHeader(
|
|
742
|
+
model: Model<"anthropic-messages">,
|
|
743
|
+
headers: Record<string, string> | undefined,
|
|
744
|
+
): Record<string, string> | undefined {
|
|
745
|
+
if (!isUmansAnthropicModel(model)) return undefined;
|
|
746
|
+
const provider = getUmansWebSearchProvider(headers);
|
|
747
|
+
return provider ? { [UMANS_WEBSEARCH_PROVIDER_HEADER]: provider } : undefined;
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
function shouldUseUmansGatewayWebSearch(name: string, enabled: boolean): boolean {
|
|
751
|
+
return enabled && name.toLowerCase() === UMANS_WEBSEARCH_TOOL_NAME;
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
function encodeAnthropicToolName(
|
|
755
|
+
name: string,
|
|
756
|
+
isOAuthToken: boolean,
|
|
757
|
+
escapeBuiltinToolNames: boolean,
|
|
758
|
+
useUmansGatewayWebSearch = false,
|
|
759
|
+
): string {
|
|
760
|
+
if (shouldUseUmansGatewayWebSearch(name, useUmansGatewayWebSearch)) return name;
|
|
761
|
+
if (escapeBuiltinToolNames) return `${claudeToolPrefix}${name}`;
|
|
762
|
+
return isOAuthToken ? applyClaudeToolPrefix(name) : name;
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
function decodeAnthropicToolName(name: string, isOAuthToken: boolean, escapeBuiltinToolNames: boolean): string {
|
|
766
|
+
if (isOAuthToken || escapeBuiltinToolNames) return stripClaudeToolPrefix(name);
|
|
767
|
+
return name;
|
|
768
|
+
}
|
|
769
|
+
|
|
724
770
|
const ANTHROPIC_MANY_IMAGE_THRESHOLD = 20;
|
|
725
771
|
const ANTHROPIC_MANY_IMAGE_MAX_DIMENSION = 2000;
|
|
726
772
|
|
|
@@ -1462,6 +1508,19 @@ export function isProviderRetryableError(error: unknown, provider?: string): boo
|
|
|
1462
1508
|
return isRetryableError(error);
|
|
1463
1509
|
}
|
|
1464
1510
|
|
|
1511
|
+
const THINKING_ENVELOPE_OPEN = "<thinking>";
|
|
1512
|
+
const THINKING_ENVELOPE_CLOSE = "</thinking>";
|
|
1513
|
+
|
|
1514
|
+
function unwrapAnthropicThinkingEnvelope(text: string): string | undefined {
|
|
1515
|
+
let current = text.trim();
|
|
1516
|
+
let stripped = false;
|
|
1517
|
+
while (current.startsWith(THINKING_ENVELOPE_OPEN) && current.endsWith(THINKING_ENVELOPE_CLOSE)) {
|
|
1518
|
+
current = current.slice(THINKING_ENVELOPE_OPEN.length, current.length - THINKING_ENVELOPE_CLOSE.length).trim();
|
|
1519
|
+
stripped = true;
|
|
1520
|
+
}
|
|
1521
|
+
return stripped ? current : undefined;
|
|
1522
|
+
}
|
|
1523
|
+
|
|
1465
1524
|
function createEmptyUsage(premiumRequests?: number): Usage {
|
|
1466
1525
|
return {
|
|
1467
1526
|
input: 0,
|
|
@@ -1567,6 +1626,8 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1567
1626
|
let disableStrictTools =
|
|
1568
1627
|
(providerSessionState?.strictToolsDisabled ?? false) || (model.compat?.disableStrictTools ?? false);
|
|
1569
1628
|
let dropFastMode = providerSessionState?.fastModeDisabled ?? false;
|
|
1629
|
+
const mergedCallerHeaders = mergeHeaders(model.headers, options?.headers);
|
|
1630
|
+
const umansGatewayWebSearchHeader = getUmansWebSearchHeader(model, mergedCallerHeaders);
|
|
1570
1631
|
|
|
1571
1632
|
let client: AnthropicMessagesClientLike;
|
|
1572
1633
|
let isOAuthToken: boolean;
|
|
@@ -1628,7 +1689,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1628
1689
|
}
|
|
1629
1690
|
const preparedContext = await prepareAnthropicManyImageContext(context, model.input.includes("image"));
|
|
1630
1691
|
const prepareParams = async (): Promise<MessageCreateParamsStreaming> => {
|
|
1631
|
-
let nextParams = buildParams(
|
|
1692
|
+
let nextParams = buildParams(
|
|
1693
|
+
model,
|
|
1694
|
+
preparedContext,
|
|
1695
|
+
isOAuthToken,
|
|
1696
|
+
options,
|
|
1697
|
+
disableStrictTools,
|
|
1698
|
+
umansGatewayWebSearchHeader !== undefined,
|
|
1699
|
+
);
|
|
1632
1700
|
if (disableStrictTools) {
|
|
1633
1701
|
dropAnthropicStrictTools(nextParams);
|
|
1634
1702
|
}
|
|
@@ -1668,6 +1736,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1668
1736
|
if (block.type === "text") {
|
|
1669
1737
|
stream.push({ type: "text_end", contentIndex, content: block.text, partial: output });
|
|
1670
1738
|
} else if (block.type === "thinking") {
|
|
1739
|
+
const unwrappedThinking = unwrapAnthropicThinkingEnvelope(block.thinking);
|
|
1740
|
+
if (unwrappedThinking !== undefined) {
|
|
1741
|
+
block.thinking = unwrappedThinking;
|
|
1742
|
+
block.thinkingSignature = undefined;
|
|
1743
|
+
}
|
|
1671
1744
|
stream.push({ type: "thinking_end", contentIndex, content: block.thinking, partial: output });
|
|
1672
1745
|
} else if (block.type === "toolCall") {
|
|
1673
1746
|
const finalJson =
|
|
@@ -1701,7 +1774,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1701
1774
|
// to zero even when no watchdog timeout is configured (the helper only
|
|
1702
1775
|
// pins it alongside a timeout; a client retry budget of 5 would otherwise
|
|
1703
1776
|
// multiply with PROVIDER_MAX_RETRIES into up to 66 wire attempts).
|
|
1704
|
-
const requestOptions = {
|
|
1777
|
+
const requestOptions = {
|
|
1778
|
+
...createSdkStreamRequestOptions(requestSignal, requestTimeoutMs),
|
|
1779
|
+
maxRetries: 0,
|
|
1780
|
+
...(umansGatewayWebSearchHeader ? { headers: umansGatewayWebSearchHeader } : {}),
|
|
1781
|
+
};
|
|
1705
1782
|
const anthropicRequest: unknown =
|
|
1706
1783
|
isOAuthToken && client.beta
|
|
1707
1784
|
? client.beta.messages.create({ ...params, stream: true }, requestOptions)
|
|
@@ -1884,9 +1961,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
1884
1961
|
const block: Block = {
|
|
1885
1962
|
type: "toolCall",
|
|
1886
1963
|
id: event.content_block.id,
|
|
1887
|
-
name:
|
|
1888
|
-
|
|
1889
|
-
|
|
1964
|
+
name: decodeAnthropicToolName(
|
|
1965
|
+
event.content_block.name,
|
|
1966
|
+
isOAuthToken,
|
|
1967
|
+
model.compat.escapeBuiltinToolNames,
|
|
1968
|
+
),
|
|
1890
1969
|
arguments: event.content_block.input ?? {},
|
|
1891
1970
|
partialJson: "",
|
|
1892
1971
|
index: event.index,
|
|
@@ -2361,7 +2440,13 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
2361
2440
|
isOAuth: oauthToken,
|
|
2362
2441
|
extraBetas: betaFeatures,
|
|
2363
2442
|
stream,
|
|
2364
|
-
modelHeaders: mergeHeaders(
|
|
2443
|
+
modelHeaders: mergeHeaders(
|
|
2444
|
+
model.headers,
|
|
2445
|
+
foundryCustomHeaders,
|
|
2446
|
+
getUmansWebSearchHeader(model, mergeHeaders(model.headers, headers)),
|
|
2447
|
+
headers,
|
|
2448
|
+
dynamicHeaders,
|
|
2449
|
+
),
|
|
2365
2450
|
isCloudflareAiGateway: model.provider === "cloudflare-ai-gateway",
|
|
2366
2451
|
claudeCodeSessionId,
|
|
2367
2452
|
claudeCodeBetas: oauthToken
|
|
@@ -2382,10 +2467,9 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
2382
2467
|
};
|
|
2383
2468
|
}
|
|
2384
2469
|
|
|
2385
|
-
// OpenCode Go
|
|
2386
|
-
// `X-Api-Key`; bearer-only requests reach the endpoint but
|
|
2387
|
-
|
|
2388
|
-
if (model.provider === "opencode-go") {
|
|
2470
|
+
// OpenCode Go and Umans validate Anthropic-compatible API-key auth through
|
|
2471
|
+
// `X-Api-Key`; bearer-only requests reach the endpoint but fail auth.
|
|
2472
|
+
if (model.provider === "opencode-go" || model.provider === "umans") {
|
|
2389
2473
|
delete defaultHeaders.Authorization;
|
|
2390
2474
|
return {
|
|
2391
2475
|
isOAuthToken: false,
|
|
@@ -2729,6 +2813,7 @@ function buildParams(
|
|
|
2729
2813
|
isOAuthToken: boolean,
|
|
2730
2814
|
options?: AnthropicOptions,
|
|
2731
2815
|
disableStrictTools = false,
|
|
2816
|
+
useUmansGatewayWebSearch = false,
|
|
2732
2817
|
): MessageCreateParamsStreaming {
|
|
2733
2818
|
const { cacheControl } = getCacheControl(model, options?.cacheRetention, isOAuthToken);
|
|
2734
2819
|
|
|
@@ -2750,6 +2835,8 @@ function buildParams(
|
|
|
2750
2835
|
isOAuthToken,
|
|
2751
2836
|
disableStrictTools || model.provider === "github-copilot",
|
|
2752
2837
|
model.compat.supportsEagerToolInputStreaming,
|
|
2838
|
+
model.compat.escapeBuiltinToolNames,
|
|
2839
|
+
useUmansGatewayWebSearch,
|
|
2753
2840
|
);
|
|
2754
2841
|
} else if (isOAuthToken) {
|
|
2755
2842
|
tools = [];
|
|
@@ -2875,10 +2962,16 @@ function buildParams(
|
|
|
2875
2962
|
if (options?.toolChoice) {
|
|
2876
2963
|
if (typeof options.toolChoice === "string") {
|
|
2877
2964
|
params.tool_choice = { type: options.toolChoice };
|
|
2878
|
-
} else if (
|
|
2879
|
-
params.tool_choice = {
|
|
2880
|
-
|
|
2881
|
-
|
|
2965
|
+
} else if (options.toolChoice.name) {
|
|
2966
|
+
params.tool_choice = {
|
|
2967
|
+
...options.toolChoice,
|
|
2968
|
+
name: encodeAnthropicToolName(
|
|
2969
|
+
options.toolChoice.name,
|
|
2970
|
+
isOAuthToken,
|
|
2971
|
+
model.compat.escapeBuiltinToolNames,
|
|
2972
|
+
useUmansGatewayWebSearch,
|
|
2973
|
+
),
|
|
2974
|
+
};
|
|
2882
2975
|
}
|
|
2883
2976
|
// Claude Fable/Mythos 5 reject forced tool use outright ("tool_choice forces
|
|
2884
2977
|
// tool use is not compatible with this model"). Downgrade any/tool → auto so the
|
|
@@ -3083,7 +3176,7 @@ export function convertAnthropicMessages(
|
|
|
3083
3176
|
blocks.push({
|
|
3084
3177
|
type: "tool_use",
|
|
3085
3178
|
id: block.id,
|
|
3086
|
-
name:
|
|
3179
|
+
name: encodeAnthropicToolName(block.name, isOAuthToken, model.compat.escapeBuiltinToolNames),
|
|
3087
3180
|
// Always sanitize: the model itself can emit lone-surrogate escapes
|
|
3088
3181
|
// in tool-argument JSON (streamed out fine, rejected with a 400 on
|
|
3089
3182
|
// replay by Anthropic's strict UTF-8 validation). toWellFormedDeep
|
|
@@ -3669,6 +3762,8 @@ function convertTools(
|
|
|
3669
3762
|
isOAuthToken: boolean,
|
|
3670
3763
|
disableStrictTools = false,
|
|
3671
3764
|
supportsEagerToolInputStreaming = true,
|
|
3765
|
+
escapeBuiltinToolNames = false,
|
|
3766
|
+
useUmansGatewayWebSearch = false,
|
|
3672
3767
|
): AnthropicWireTool[] {
|
|
3673
3768
|
if (!tools) return [];
|
|
3674
3769
|
const schemaPlans = buildAnthropicToolSchemaPlans(tools, disableStrictTools);
|
|
@@ -3676,7 +3771,7 @@ function convertTools(
|
|
|
3676
3771
|
return tools.map((tool, index) => {
|
|
3677
3772
|
const plan = schemaPlans[index];
|
|
3678
3773
|
const baseTool = {
|
|
3679
|
-
name:
|
|
3774
|
+
name: encodeAnthropicToolName(tool.name, isOAuthToken, escapeBuiltinToolNames, useUmansGatewayWebSearch),
|
|
3680
3775
|
description: tool.description || "",
|
|
3681
3776
|
input_schema: plan.inputSchema,
|
|
3682
3777
|
};
|
|
@@ -139,8 +139,11 @@ export const streamAzureOpenAIResponses: StreamFunction<"azure-openai-responses"
|
|
|
139
139
|
try {
|
|
140
140
|
const apiKey = options?.apiKey || getEnvApiKey(model.provider) || "";
|
|
141
141
|
const { url, headers } = buildAzureResponsesRequest(model, apiKey, options);
|
|
142
|
-
|
|
143
|
-
options?.onPayload?.(params);
|
|
142
|
+
let params = buildParams(model, context, options, deploymentName);
|
|
143
|
+
const replacementPayload = await options?.onPayload?.(params, model);
|
|
144
|
+
if (replacementPayload !== undefined) {
|
|
145
|
+
params = replacementPayload as typeof params;
|
|
146
|
+
}
|
|
144
147
|
const idleTimeoutMs = options?.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs();
|
|
145
148
|
const firstEventTimeoutMs =
|
|
146
149
|
options?.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
|
package/src/providers/cursor.ts
CHANGED
|
@@ -2335,9 +2335,10 @@ function buildRootPromptMessagesJson(
|
|
|
2335
2335
|
} else if (msg.role === "toolResult") {
|
|
2336
2336
|
const text = toolResultToText(msg);
|
|
2337
2337
|
if (!text) continue;
|
|
2338
|
+
const prefix = msg.isError ? "[Tool Error]" : "[Tool Result]";
|
|
2338
2339
|
pushJson({
|
|
2339
2340
|
role: "user",
|
|
2340
|
-
content: [{ type: "text", text:
|
|
2341
|
+
content: [{ type: "text", text: `${prefix}\n${text}` }],
|
|
2341
2342
|
});
|
|
2342
2343
|
}
|
|
2343
2344
|
}
|
|
@@ -2415,10 +2416,11 @@ function buildConversationTurns(
|
|
|
2415
2416
|
// Include tool results as assistant text for context
|
|
2416
2417
|
const text = toolResultToText(stepMsg);
|
|
2417
2418
|
if (text) {
|
|
2419
|
+
const prefix = stepMsg.isError ? "[Tool Error]" : "[Tool Result]";
|
|
2418
2420
|
const step = create(ConversationStepSchema, {
|
|
2419
2421
|
message: {
|
|
2420
2422
|
case: "assistantMessage",
|
|
2421
|
-
value: create(AssistantMessageSchema, { text:
|
|
2423
|
+
value: create(AssistantMessageSchema, { text: `${prefix}\n${text}` }),
|
|
2422
2424
|
},
|
|
2423
2425
|
});
|
|
2424
2426
|
stepBlobIds.push(storeCursorBlob(blobStore, toBinary(ConversationStepSchema, step)));
|
|
@@ -793,9 +793,12 @@ export function buildGoogleGenerateContentParams<T extends "google-generative-ai
|
|
|
793
793
|
if (context.tools && context.tools.length > 0 && options.toolChoice) {
|
|
794
794
|
const choice = options.toolChoice;
|
|
795
795
|
if (typeof choice === "string") {
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
796
|
+
const mode = mapToolChoice(choice);
|
|
797
|
+
if (mode !== "AUTO") {
|
|
798
|
+
config.toolConfig = {
|
|
799
|
+
functionCallingConfig: { mode },
|
|
800
|
+
};
|
|
801
|
+
}
|
|
799
802
|
} else {
|
|
800
803
|
// Named-tool routing — `mode: "ANY"` plus an explicit allow-list. The
|
|
801
804
|
// caller is responsible for ensuring the names exist in `context.tools`.
|