@oh-my-pi/pi-ai 15.12.4 → 15.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -1
- package/dist/types/providers/anthropic-client.d.ts +2 -0
- package/dist/types/providers/google-gemini-cli.d.ts +1 -1
- package/package.json +3 -3
- package/src/providers/amazon-bedrock.ts +19 -1
- package/src/providers/anthropic-client.ts +2 -0
- package/src/providers/anthropic.ts +14 -7
- package/src/providers/azure-openai-responses.ts +9 -1
- package/src/providers/google-gemini-cli.ts +35 -3
- package/src/providers/google-shared.ts +14 -2
- package/src/providers/ollama.ts +19 -1
- package/src/providers/openai-codex-responses.ts +27 -4
- package/src/providers/openai-completions.ts +40 -7
- package/src/providers/openai-responses-shared.ts +4 -1
- package/src/providers/openai-responses.ts +9 -4
- package/src/registry/oauth/gitlab-duo.ts +8 -3
- package/src/registry/zai.ts +1 -1
- package/src/utils/openai-http.ts +4 -0
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,31 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.13.0] - 2026-06-14
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
- Fixed OpenAI Responses/Realtime SSE stream handler crashing with "Error Code undefined: undefined" when parsing error events with nested error details by falling back to the nested error object fields.
|
|
9
|
+
|
|
10
|
+
- Fixed OpenAI-compatible providers that reject forced `tool_choice` on thinking-required models by downgrading unsupported forced choices to `auto` while keeping tools available ([#2546](https://github.com/can1357/oh-my-pi/issues/2546)).
|
|
11
|
+
- Fixed GitHub Copilot Anthropic transport (`api.githubcopilot.com/v1/messages`) returning `400 tools.0.custom.eager_input_streaming: Extra inputs are not permitted` on every tool-bearing turn by stopping the emission of the per-tool `eager_input_streaming` flag and the `fine-grained-tool-streaming-2025-05-14` beta header on the Copilot transport — the proxy whitelists neither ([#2558](https://github.com/can1357/oh-my-pi/issues/2558)).
|
|
12
|
+
- Disabled Bun's native ~300s pre-response `fetch` timeout in every streaming provider (OpenAI completions/responses, Azure responses, Anthropic, Codex SSE, Bedrock, Gemini CLI, Ollama). The configurable first-event/idle/SDK watchdogs (`PI_STREAM_FIRST_EVENT_TIMEOUT_MS`, `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS`, `compat.streamIdleTimeoutMs`) were silently capped by Bun's hidden ceiling, so cold large-context streams (e.g. self-hosted vLLM at multi-hundred-K prompts) died at exactly 300s with `TimeoutError: The operation timed out.` Direct callers of `./providers/{amazon-bedrock,google-gemini-cli,ollama,openai-codex-responses}` (which bypass `register-builtins`' iterator-level watchdog) now install a pre-response `AbortSignal.timeout(firstEventTimeoutMs)` alongside the disable, so a stalled upstream still fails within the configured budget instead of hanging forever ([#2422](https://github.com/can1357/oh-my-pi/issues/2422))
|
|
13
|
+
- Fixed Gemini / Antigravity streams (Google Cloud Code Assist API) creating a trailing empty text block and emitting redundant `text_start`/`text_delta`/`text_end` events at the end of the turn when the final SSE chunk contains an empty text part (`text: ""`). The parser now ignores empty text parts, preserving the active transcript block state and ensuring proper nesting and rendering of subsequent background jobs or new turns.
|
|
14
|
+
- Preserved terminal Google `thoughtSignature`s by still extracting and applying the signature on the active block even when the text part is empty or undefined.
|
|
15
|
+
- Stopped Gemini Antigravity sessions (`gemini-3*` / Claude under Cloud Code Assist) from leaking system rule reminders and personality preambles into the final response, by appending an explicit 'do not output rule checks' instruction to the injected system parts.
|
|
16
|
+
- Fixed Gemini / Antigravity streams (Google Cloud Code Assist API) letting a `functionCall` part's own `thoughtSignature` clobber the preceding text or thinking block's signature on `think → tool` and `text → tool` turns. A signed function-call part has `text: undefined`, so it fell into the terminal-signature branch while the prior block was still active; that branch now skips function-call parts, leaving the tool call's signature on the tool call where it belongs and preventing corrupted signatures on same-model replay.
|
|
17
|
+
- Fixed MiniMax-M3 OpenAI-compatible streams rendering reasoning twice when the same chunk carried both `<think>…</think>` content and structured `reasoning_content`; structured reasoning now wins and cumulative MiniMax reasoning snapshots are collapsed to deltas using a per-signature snapshot tracker that survives the `</think>`-to-text block transition (so post-answer cumulative snapshots don't reinstate a duplicate thinking block). ([#2433](https://github.com/can1357/oh-my-pi/issues/2433))
|
|
18
|
+
|
|
19
|
+
## [15.12.6] - 2026-06-14
|
|
20
|
+
|
|
21
|
+
### Changed
|
|
22
|
+
|
|
23
|
+
- Bumped Z.AI (GLM Coding Plan) API key validation probe to glm-5.2.
|
|
24
|
+
|
|
25
|
+
### Fixed
|
|
26
|
+
|
|
27
|
+
- Fixed tool schema conversion for non-Cloud Code Assist Google Gemini models by normalizing parameters with `normalizeSchemaForGoogle` to prevent un-normalized schema properties (such as `additionalProperties: false` or type arrays) from causing Gemini API errors.
|
|
28
|
+
- Fixed OpenAI-family request builders dropping forced named `tool_choice` directives when the named tool is absent from the serialized `tools` array, preventing spec-strict providers from rejecting self-inconsistent requests. ([#1701](https://github.com/can1357/oh-my-pi/issues/1701))
|
|
29
|
+
|
|
5
30
|
## [15.12.4] - 2026-06-13
|
|
6
31
|
|
|
7
32
|
### Added
|
|
@@ -3392,4 +3417,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
|
|
|
3392
3417
|
|
|
3393
3418
|
## [0.9.4] - 2025-11-26
|
|
3394
3419
|
|
|
3395
|
-
Initial release with multi-provider LLM support.
|
|
3420
|
+
Initial release with multi-provider LLM support.
|
|
@@ -25,6 +25,8 @@ export type AnthropicFetchOptions = RequestInit & {
|
|
|
25
25
|
cert?: string;
|
|
26
26
|
key?: string;
|
|
27
27
|
};
|
|
28
|
+
/** Bun extension: see {@link FetchWithRetryOptions.timeout} — `false` disables Bun's native fetch TTFT timeout (issue #2422). */
|
|
29
|
+
timeout?: number | false;
|
|
28
30
|
};
|
|
29
31
|
export interface AnthropicClientOptions {
|
|
30
32
|
/** Sent as `X-Api-Key` unless the header is already present in `defaultHeaders`. */
|
|
@@ -53,7 +53,7 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
|
|
|
53
53
|
requestModelId?: string;
|
|
54
54
|
projectId?: string;
|
|
55
55
|
}
|
|
56
|
-
export { ANTIGRAVITY_SYSTEM_INSTRUCTION, getAntigravityUserAgent, getGeminiCliHeaders, getGeminiCliUserAgent, } from "@oh-my-pi/pi-catalog/wire/gemini-headers";
|
|
56
|
+
export { ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION, ANTIGRAVITY_SYSTEM_INSTRUCTION, getAntigravityUserAgent, getGeminiCliHeaders, getGeminiCliUserAgent, } from "@oh-my-pi/pi-catalog/wire/gemini-headers";
|
|
57
57
|
interface ParsedGeminiCliCredentials {
|
|
58
58
|
accessToken: string;
|
|
59
59
|
projectId: string;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "15.
|
|
4
|
+
"version": "15.13.0",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -38,8 +38,8 @@
|
|
|
38
38
|
},
|
|
39
39
|
"dependencies": {
|
|
40
40
|
"@bufbuild/protobuf": "^2.12.0",
|
|
41
|
-
"@oh-my-pi/pi-catalog": "15.
|
|
42
|
-
"@oh-my-pi/pi-utils": "15.
|
|
41
|
+
"@oh-my-pi/pi-catalog": "15.13.0",
|
|
42
|
+
"@oh-my-pi/pi-utils": "15.13.0",
|
|
43
43
|
"partial-json": "^0.1.7",
|
|
44
44
|
"zod": "^4"
|
|
45
45
|
},
|
|
@@ -31,6 +31,7 @@ import type {
|
|
|
31
31
|
import { normalizeToolCallId, resolveCacheRetention } from "../utils";
|
|
32
32
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
33
33
|
import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump } from "../utils/http-inspector";
|
|
34
|
+
import { getStreamFirstEventTimeoutMs } from "../utils/idle-iterator";
|
|
34
35
|
import { parseStreamingJson, parseStreamingJsonThrottled } from "../utils/json-parse";
|
|
35
36
|
import { toolWireSchema } from "../utils/schema/wire";
|
|
36
37
|
import { invalidateAwsCredentialCache, resolveAwsCredentials } from "./aws-credentials";
|
|
@@ -282,12 +283,29 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
|
|
|
282
283
|
requestHeaders = { ...baseHeaders, ...signed };
|
|
283
284
|
}
|
|
284
285
|
|
|
286
|
+
// Bun's native fetch ceiling is disabled below (`timeout: false`) so
|
|
287
|
+
// configurable watchdogs govern slow-prefill streams (issue #2422).
|
|
288
|
+
// Direct callers that bypass `register-builtins` (which installs the
|
|
289
|
+
// iterator-level first-event watchdog) still need a pre-response
|
|
290
|
+
// timer, otherwise a Bedrock/proxy that accepts the POST and never
|
|
291
|
+
// sends headers would hang forever.
|
|
292
|
+
const firstEventTimeoutMs = options.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs();
|
|
293
|
+
const preResponseWatchdog =
|
|
294
|
+
firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
|
|
295
|
+
? AbortSignal.timeout(firstEventTimeoutMs)
|
|
296
|
+
: undefined;
|
|
297
|
+
const fetchSignal = preResponseWatchdog
|
|
298
|
+
? options.signal
|
|
299
|
+
? AbortSignal.any([options.signal, preResponseWatchdog])
|
|
300
|
+
: preResponseWatchdog
|
|
301
|
+
: options.signal;
|
|
285
302
|
const response = await fetchWithRetry(url, {
|
|
286
303
|
method: "POST",
|
|
287
304
|
headers: requestHeaders,
|
|
288
305
|
body,
|
|
289
|
-
signal:
|
|
306
|
+
signal: fetchSignal,
|
|
290
307
|
fetch: options.fetch,
|
|
308
|
+
timeout: false,
|
|
291
309
|
});
|
|
292
310
|
|
|
293
311
|
if (!response.ok) {
|
|
@@ -57,6 +57,8 @@ export type AnthropicFetchOptions = RequestInit & {
|
|
|
57
57
|
cert?: string;
|
|
58
58
|
key?: string;
|
|
59
59
|
};
|
|
60
|
+
/** Bun extension: see {@link FetchWithRetryOptions.timeout} — `false` disables Bun's native fetch TTFT timeout (issue #2422). */
|
|
61
|
+
timeout?: number | false;
|
|
60
62
|
};
|
|
61
63
|
|
|
62
64
|
export interface AnthropicClientOptions {
|
|
@@ -2305,16 +2305,22 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
2305
2305
|
const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
|
|
2306
2306
|
const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
|
|
2307
2307
|
const tlsFetchOptions = buildClaudeCodeTlsFetchOptions(model, baseUrl);
|
|
2308
|
+
// Disable Bun's native ~300s pre-response fetch timeout (issue #2422).
|
|
2309
|
+
// `AnthropicMessagesClient` already arms its own DEFAULT_TIMEOUT_MS timer
|
|
2310
|
+
// per request, so the native ceiling can only short-circuit slow-prefill
|
|
2311
|
+
// streams before the configured watchdog gets to govern them.
|
|
2312
|
+
const fetchOptions: AnthropicFetchOptions = { ...(tlsFetchOptions ?? {}), timeout: false };
|
|
2308
2313
|
const baseFetch = args.fetch ?? fetch;
|
|
2309
2314
|
// Only OAuth requests inject the CC billing header; no API-key request can ever
|
|
2310
2315
|
// contain it, so there is no need to install the rewriter for those.
|
|
2311
2316
|
const cchFetch = oauthToken ? wrapFetchForCch(baseFetch) : baseFetch;
|
|
2312
2317
|
if (model.provider === "github-copilot") {
|
|
2313
2318
|
const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
|
|
2319
|
+
// The GitHub Copilot Anthropic proxy doesn't accept Anthropic beta
|
|
2320
|
+
// features (and the catalog already forces `supportsEagerToolInputStreaming
|
|
2321
|
+
// = false` for this host, so `needsFineGrainedToolStreamingBeta` is true
|
|
2322
|
+
// whenever tools are present). Forward only caller-supplied betas.
|
|
2314
2323
|
const betaFeatures = [...extraBetas];
|
|
2315
|
-
if (needsFineGrainedToolStreamingBeta) {
|
|
2316
|
-
betaFeatures.push(fineGrainedToolStreamingBeta);
|
|
2317
|
-
}
|
|
2318
2324
|
const defaultHeaders = mergeHeaders(
|
|
2319
2325
|
{
|
|
2320
2326
|
Accept: stream ? "text/event-stream" : "application/json",
|
|
@@ -2337,7 +2343,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
2337
2343
|
maxRetries: 5,
|
|
2338
2344
|
defaultHeaders,
|
|
2339
2345
|
fetch: cchFetch,
|
|
2340
|
-
|
|
2346
|
+
fetchOptions,
|
|
2341
2347
|
};
|
|
2342
2348
|
}
|
|
2343
2349
|
|
|
@@ -2372,6 +2378,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
2372
2378
|
maxRetries: 5,
|
|
2373
2379
|
defaultHeaders,
|
|
2374
2380
|
fetch: cchFetch,
|
|
2381
|
+
fetchOptions,
|
|
2375
2382
|
};
|
|
2376
2383
|
}
|
|
2377
2384
|
|
|
@@ -2388,7 +2395,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
2388
2395
|
maxRetries: 5,
|
|
2389
2396
|
defaultHeaders,
|
|
2390
2397
|
fetch: cchFetch,
|
|
2391
|
-
|
|
2398
|
+
fetchOptions,
|
|
2392
2399
|
};
|
|
2393
2400
|
}
|
|
2394
2401
|
// OpenCode Zen's Anthropic-compatible gateway accepts bearer auth only;
|
|
@@ -2402,7 +2409,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
2402
2409
|
maxRetries: 5,
|
|
2403
2410
|
defaultHeaders,
|
|
2404
2411
|
fetch: cchFetch,
|
|
2405
|
-
|
|
2412
|
+
fetchOptions,
|
|
2406
2413
|
};
|
|
2407
2414
|
}
|
|
2408
2415
|
|
|
@@ -2421,7 +2428,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
2421
2428
|
maxRetries: 5,
|
|
2422
2429
|
defaultHeaders,
|
|
2423
2430
|
fetch: cchFetch,
|
|
2424
|
-
|
|
2431
|
+
fetchOptions,
|
|
2425
2432
|
};
|
|
2426
2433
|
}
|
|
2427
2434
|
|
|
@@ -336,7 +336,15 @@ function buildParams(
|
|
|
336
336
|
if (context.tools) {
|
|
337
337
|
params.tools = convertTools(context.tools);
|
|
338
338
|
if (options?.toolChoice) {
|
|
339
|
-
|
|
339
|
+
const toolChoice = mapToOpenAIResponsesToolChoice(options.toolChoice);
|
|
340
|
+
if (
|
|
341
|
+
toolChoice &&
|
|
342
|
+
(typeof toolChoice === "string" ||
|
|
343
|
+
toolChoice.type !== "function" ||
|
|
344
|
+
context.tools.some(tool => tool.name === toolChoice.name))
|
|
345
|
+
) {
|
|
346
|
+
params.tool_choice = toolChoice;
|
|
347
|
+
}
|
|
340
348
|
}
|
|
341
349
|
}
|
|
342
350
|
|
|
@@ -7,6 +7,7 @@ import { createHash, randomBytes, randomUUID } from "node:crypto";
|
|
|
7
7
|
import { scheduler } from "node:timers/promises";
|
|
8
8
|
import { calculateCost } from "@oh-my-pi/pi-catalog/models";
|
|
9
9
|
import {
|
|
10
|
+
ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION,
|
|
10
11
|
ANTIGRAVITY_SYSTEM_INSTRUCTION,
|
|
11
12
|
getAntigravityUserAgent,
|
|
12
13
|
getGeminiCliHeaders,
|
|
@@ -27,6 +28,7 @@ import type {
|
|
|
27
28
|
import { normalizeSystemPrompts } from "../utils";
|
|
28
29
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
29
30
|
import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump } from "../utils/http-inspector";
|
|
31
|
+
import { getStreamFirstEventTimeoutMs } from "../utils/idle-iterator";
|
|
30
32
|
// Refresh is the sole responsibility of AuthStorage (broker-aware, single-flighted);
|
|
31
33
|
// the stream provider trusts the access token threaded through `options.apiKey`.
|
|
32
34
|
import { normalizeSchemaForCCA } from "../utils/schema";
|
|
@@ -101,6 +103,7 @@ const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googlea
|
|
|
101
103
|
const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, ANTIGRAVITY_SANDBOX_ENDPOINT] as const;
|
|
102
104
|
|
|
103
105
|
export {
|
|
106
|
+
ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION,
|
|
104
107
|
ANTIGRAVITY_SYSTEM_INSTRUCTION,
|
|
105
108
|
getAntigravityUserAgent,
|
|
106
109
|
getGeminiCliHeaders,
|
|
@@ -365,17 +368,34 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
365
368
|
headers: requestHeaders,
|
|
366
369
|
};
|
|
367
370
|
|
|
371
|
+
// Direct callers that skip `register-builtins` (which installs the
|
|
372
|
+
// iterator-level watchdog) need a pre-response timer alongside
|
|
373
|
+
// `timeout: false`; otherwise a stalled Cloud Code Assist proxy
|
|
374
|
+
// would hang forever. Floor matches the lazy wrapper's 5min default.
|
|
375
|
+
const firstEventTimeoutMs =
|
|
376
|
+
options?.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs(undefined, 300_000);
|
|
377
|
+
const preResponseWatchdog =
|
|
378
|
+
firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
|
|
379
|
+
? AbortSignal.timeout(firstEventTimeoutMs)
|
|
380
|
+
: undefined;
|
|
381
|
+
const callerSignal = options?.signal;
|
|
382
|
+
const fetchSignal = preResponseWatchdog
|
|
383
|
+
? callerSignal
|
|
384
|
+
? AbortSignal.any([callerSignal, preResponseWatchdog])
|
|
385
|
+
: preResponseWatchdog
|
|
386
|
+
: callerSignal;
|
|
368
387
|
const response = await fetchWithRetry(
|
|
369
388
|
attempt => `${endpoints[Math.min(attempt, endpoints.length - 1)]}/v1internal:streamGenerateContent?alt=sse`,
|
|
370
389
|
{
|
|
371
390
|
method: "POST",
|
|
372
391
|
headers: requestHeaders,
|
|
373
392
|
body: requestBodyJson,
|
|
374
|
-
signal:
|
|
393
|
+
signal: fetchSignal,
|
|
375
394
|
maxAttempts: MAX_RETRIES + 1,
|
|
376
395
|
defaultDelayMs: attempt => BASE_DELAY_MS * 2 ** attempt,
|
|
377
396
|
maxDelayMs: options?.maxRetryDelayMs ?? RATE_LIMIT_BUDGET_MS,
|
|
378
397
|
fetch: options?.fetch,
|
|
398
|
+
timeout: false,
|
|
379
399
|
},
|
|
380
400
|
);
|
|
381
401
|
if (!response.ok) {
|
|
@@ -447,7 +467,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
447
467
|
const candidate = responseData.candidates?.[0];
|
|
448
468
|
if (candidate?.content?.parts) {
|
|
449
469
|
for (const part of candidate.content.parts) {
|
|
450
|
-
if (part.text !== undefined) {
|
|
470
|
+
if (part.text !== undefined && part.text !== "") {
|
|
451
471
|
const isThinking = isThinkingPart(part);
|
|
452
472
|
if (
|
|
453
473
|
!currentBlock ||
|
|
@@ -484,6 +504,18 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
484
504
|
partial: output,
|
|
485
505
|
});
|
|
486
506
|
}
|
|
507
|
+
} else if (part.text === "" && part.thoughtSignature && currentBlock && !part.functionCall) {
|
|
508
|
+
if (currentBlock.type === "thinking") {
|
|
509
|
+
currentBlock.thinkingSignature = retainThoughtSignature(
|
|
510
|
+
currentBlock.thinkingSignature,
|
|
511
|
+
part.thoughtSignature,
|
|
512
|
+
);
|
|
513
|
+
} else {
|
|
514
|
+
currentBlock.textSignature = retainThoughtSignature(
|
|
515
|
+
currentBlock.textSignature,
|
|
516
|
+
part.thoughtSignature,
|
|
517
|
+
);
|
|
518
|
+
}
|
|
487
519
|
}
|
|
488
520
|
|
|
489
521
|
if (part.functionCall) {
|
|
@@ -849,10 +881,10 @@ export function buildRequest(
|
|
|
849
881
|
if (isAntigravity && shouldInjectAntigravitySystemInstruction(model.id)) {
|
|
850
882
|
const existingParts = request.systemInstruction?.parts ?? [];
|
|
851
883
|
request.systemInstruction = {
|
|
852
|
-
role: "user",
|
|
853
884
|
parts: [
|
|
854
885
|
{ text: ANTIGRAVITY_SYSTEM_INSTRUCTION },
|
|
855
886
|
{ text: `Please ignore following [ignore]${ANTIGRAVITY_SYSTEM_INSTRUCTION}[/ignore]` },
|
|
887
|
+
{ text: ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION },
|
|
856
888
|
...existingParts,
|
|
857
889
|
],
|
|
858
890
|
};
|
|
@@ -372,7 +372,7 @@ export function convertTools(
|
|
|
372
372
|
description: tool.description || "",
|
|
373
373
|
...(useParameters
|
|
374
374
|
? { parameters: normalizeSchemaForCCA(toolWireSchema(tool)) }
|
|
375
|
-
: { parametersJsonSchema: toolWireSchema(tool) }),
|
|
375
|
+
: { parametersJsonSchema: normalizeSchemaForGoogle(toolWireSchema(tool)) }),
|
|
376
376
|
})),
|
|
377
377
|
},
|
|
378
378
|
];
|
|
@@ -609,7 +609,7 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
|
|
|
609
609
|
const candidate = chunk.candidates?.[0];
|
|
610
610
|
if (candidate?.content?.parts) {
|
|
611
611
|
for (const part of candidate.content.parts) {
|
|
612
|
-
if (part.text !== undefined) {
|
|
612
|
+
if (part.text !== undefined && part.text !== "") {
|
|
613
613
|
if (!firstTokenSeen) {
|
|
614
614
|
firstTokenSeen = true;
|
|
615
615
|
onFirstToken?.();
|
|
@@ -650,6 +650,18 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
|
|
|
650
650
|
partial: output,
|
|
651
651
|
});
|
|
652
652
|
}
|
|
653
|
+
} else if (part.text === "" && part.thoughtSignature && currentBlock && !part.functionCall) {
|
|
654
|
+
if (currentBlock.type === "thinking") {
|
|
655
|
+
currentBlock.thinkingSignature = retainThoughtSignature(
|
|
656
|
+
currentBlock.thinkingSignature,
|
|
657
|
+
part.thoughtSignature,
|
|
658
|
+
);
|
|
659
|
+
} else if (retainTextSignature) {
|
|
660
|
+
currentBlock.textSignature = retainThoughtSignature(
|
|
661
|
+
currentBlock.textSignature,
|
|
662
|
+
part.thoughtSignature,
|
|
663
|
+
);
|
|
664
|
+
}
|
|
653
665
|
}
|
|
654
666
|
|
|
655
667
|
if (part.functionCall) {
|
package/src/providers/ollama.ts
CHANGED
|
@@ -18,6 +18,7 @@ import type {
|
|
|
18
18
|
import { normalizeSystemPrompts } from "../utils";
|
|
19
19
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
20
20
|
import { type CapturedHttpErrorResponse, finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
|
|
21
|
+
import { getOpenAIStreamFirstEventTimeoutMs, getOpenAIStreamIdleTimeoutMs } from "../utils/idle-iterator";
|
|
21
22
|
import { parseStreamingJson } from "../utils/json-parse";
|
|
22
23
|
import { toolWireSchema } from "../utils/schema/wire";
|
|
23
24
|
import {
|
|
@@ -525,6 +526,22 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
|
|
|
525
526
|
url: `${baseUrl}/api/chat`,
|
|
526
527
|
body,
|
|
527
528
|
};
|
|
529
|
+
// Direct callers that bypass `register-builtins` (which installs
|
|
530
|
+
// the iterator-level watchdog) need a pre-response timer alongside
|
|
531
|
+
// `timeout: false`; otherwise an Ollama server that accepts the
|
|
532
|
+
// POST and never streams headers would hang forever (issue #2422).
|
|
533
|
+
const idleTimeoutMs = options.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs();
|
|
534
|
+
const firstEventTimeoutMs =
|
|
535
|
+
options.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
|
|
536
|
+
const preResponseWatchdog =
|
|
537
|
+
firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
|
|
538
|
+
? AbortSignal.timeout(firstEventTimeoutMs)
|
|
539
|
+
: undefined;
|
|
540
|
+
const fetchSignal = preResponseWatchdog
|
|
541
|
+
? options.signal
|
|
542
|
+
? AbortSignal.any([options.signal, preResponseWatchdog])
|
|
543
|
+
: preResponseWatchdog
|
|
544
|
+
: options.signal;
|
|
528
545
|
const response = await fetchWithRetry(`${baseUrl}/api/chat`, {
|
|
529
546
|
method: "POST",
|
|
530
547
|
headers: {
|
|
@@ -534,9 +551,10 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
|
|
|
534
551
|
"Content-Type": "application/json",
|
|
535
552
|
},
|
|
536
553
|
body: JSON.stringify(body),
|
|
537
|
-
signal:
|
|
554
|
+
signal: fetchSignal,
|
|
538
555
|
defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
|
|
539
556
|
fetch: options.fetch,
|
|
557
|
+
timeout: false,
|
|
540
558
|
});
|
|
541
559
|
if (!response.ok) {
|
|
542
560
|
capturedErrorResponse = await captureHttpErrorResponse(response);
|
|
@@ -272,6 +272,7 @@ interface CodexRequestSetup {
|
|
|
272
272
|
requestSignal: AbortSignal;
|
|
273
273
|
wrapCodexSseStream: (source: AsyncGenerator<Record<string, unknown>>) => AsyncGenerator<Record<string, unknown>>;
|
|
274
274
|
requestAbortController: AbortController;
|
|
275
|
+
firstEventTimeoutMs: number | undefined;
|
|
275
276
|
websocketIdleTimeoutMs: number | undefined;
|
|
276
277
|
websocketFirstEventTimeoutMs: number | undefined;
|
|
277
278
|
}
|
|
@@ -554,13 +555,16 @@ export function normalizeCodexToolChoice(
|
|
|
554
555
|
if (!choice) return undefined;
|
|
555
556
|
if (typeof choice === "string") return choice;
|
|
556
557
|
const allowFreeform = model ? supportsFreeformApplyPatchCodex(model) : false;
|
|
557
|
-
const mapName = (name: string): Record<string, string> => {
|
|
558
|
+
const mapName = (name: string): Record<string, string> | undefined => {
|
|
559
|
+
const directTool = tools.find(tool => tool.name === name);
|
|
558
560
|
const customTool = allowFreeform
|
|
559
561
|
? tools.find(tool => tool.customFormat && (tool.name === name || tool.customWireName === name))
|
|
560
562
|
: undefined;
|
|
563
|
+
const offeredTool = customTool ?? directTool;
|
|
564
|
+
if (!offeredTool) return undefined;
|
|
561
565
|
return customTool
|
|
562
566
|
? { type: "custom", name: customTool.customWireName ?? customTool.name }
|
|
563
|
-
: { type: "function", name };
|
|
567
|
+
: { type: "function", name: offeredTool.name };
|
|
564
568
|
};
|
|
565
569
|
if (choice.type === "function") {
|
|
566
570
|
if ("function" in choice && choice.function?.name) {
|
|
@@ -687,6 +691,7 @@ function createRequestSetup(options: OpenAICodexResponsesOptions | undefined): C
|
|
|
687
691
|
requestAbortController,
|
|
688
692
|
requestSignal,
|
|
689
693
|
wrapCodexSseStream,
|
|
694
|
+
firstEventTimeoutMs,
|
|
690
695
|
websocketIdleTimeoutMs,
|
|
691
696
|
websocketFirstEventTimeoutMs,
|
|
692
697
|
};
|
|
@@ -983,6 +988,7 @@ async function openCodexSseTransport(
|
|
|
983
988
|
state,
|
|
984
989
|
requestContext.responsesLite,
|
|
985
990
|
requestSetup.requestSignal,
|
|
991
|
+
requestSetup.firstEventTimeoutMs,
|
|
986
992
|
event => options?.onSseEvent?.(event, model),
|
|
987
993
|
options?.fetch,
|
|
988
994
|
),
|
|
@@ -3016,7 +3022,8 @@ async function openCodexSseEventStream(
|
|
|
3016
3022
|
body: RequestBody,
|
|
3017
3023
|
state: CodexWebSocketSessionState | undefined,
|
|
3018
3024
|
responsesLite: boolean,
|
|
3019
|
-
signal
|
|
3025
|
+
signal: AbortSignal | undefined,
|
|
3026
|
+
firstEventTimeoutMs: number | undefined,
|
|
3020
3027
|
onSseEvent?: OpenAICodexResponsesOptions["onSseEvent"],
|
|
3021
3028
|
fetchOverride?: FetchImpl,
|
|
3022
3029
|
): Promise<AsyncGenerator<Record<string, unknown>>> {
|
|
@@ -3028,15 +3035,31 @@ async function openCodexSseEventStream(
|
|
|
3028
3035
|
sentTurnStateHeader: headers.has(X_CODEX_TURN_STATE_HEADER),
|
|
3029
3036
|
sentModelsEtagHeader: headers.has(X_MODELS_ETAG_HEADER),
|
|
3030
3037
|
});
|
|
3038
|
+
// `wrapCodexSseStream` arms a first-event watchdog only after this fetch
|
|
3039
|
+
// resolves (it wraps the SSE generator). With `timeout: false` disabling
|
|
3040
|
+
// Bun's native 300s ceiling, a stalled pre-response request needs its own
|
|
3041
|
+
// watchdog — combine the caller signal with a fresh
|
|
3042
|
+
// `AbortSignal.timeout(firstEventTimeoutMs)` so headers must arrive
|
|
3043
|
+
// within the configured budget (issue #2422).
|
|
3044
|
+
const preResponseWatchdog =
|
|
3045
|
+
firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
|
|
3046
|
+
? AbortSignal.timeout(firstEventTimeoutMs)
|
|
3047
|
+
: undefined;
|
|
3048
|
+
const fetchSignal = preResponseWatchdog
|
|
3049
|
+
? signal
|
|
3050
|
+
? AbortSignal.any([signal, preResponseWatchdog])
|
|
3051
|
+
: preResponseWatchdog
|
|
3052
|
+
: signal;
|
|
3031
3053
|
const response = await fetchWithRetry(url, {
|
|
3032
3054
|
method: "POST",
|
|
3033
3055
|
headers,
|
|
3034
3056
|
body: JSON.stringify(body),
|
|
3035
|
-
signal,
|
|
3057
|
+
signal: fetchSignal,
|
|
3036
3058
|
maxAttempts: CODEX_MAX_RETRIES + 1,
|
|
3037
3059
|
defaultDelayMs: attempt => CODEX_RETRY_DELAY_MS * (attempt + 1),
|
|
3038
3060
|
maxDelayMs: CODEX_RATE_LIMIT_BUDGET_MS,
|
|
3039
3061
|
fetch: fetchOverride,
|
|
3062
|
+
timeout: false,
|
|
3040
3063
|
});
|
|
3041
3064
|
logCodexDebug("codex response", {
|
|
3042
3065
|
url: response.url,
|
|
@@ -699,6 +699,14 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
699
699
|
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
700
700
|
appendText(output, stream, text);
|
|
701
701
|
};
|
|
702
|
+
// Tracks the last full cumulative reasoning snapshot per signature (the
|
|
703
|
+
// reasoning field name) so dedup survives block transitions. Required
|
|
704
|
+
// for MiniMax-M3: once `</think>` and visible text arrive, currentBlock
|
|
705
|
+
// flips to "text", but later chunks keep carrying the same cumulative
|
|
706
|
+
// `reasoning_content` snapshot. Without an external tracker the guard
|
|
707
|
+
// below misses and the snapshot gets re-emitted as a fresh thinking
|
|
708
|
+
// block after the answer has started.
|
|
709
|
+
const lastCumulativeReasoningBySignature = new Map<string, string>();
|
|
702
710
|
const appendThinkingDelta = (
|
|
703
711
|
thinking: string,
|
|
704
712
|
signature?: string,
|
|
@@ -706,13 +714,13 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
706
714
|
): void => {
|
|
707
715
|
if (!thinking) return;
|
|
708
716
|
let emittedThinking = thinking;
|
|
709
|
-
if (
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
(
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
717
|
+
if (source === "cumulative") {
|
|
718
|
+
const key = signature ?? "";
|
|
719
|
+
const lastSnapshot = lastCumulativeReasoningBySignature.get(key) ?? "";
|
|
720
|
+
if (thinking.startsWith(lastSnapshot)) {
|
|
721
|
+
emittedThinking = thinking.slice(lastSnapshot.length);
|
|
722
|
+
}
|
|
723
|
+
lastCumulativeReasoningBySignature.set(key, thinking);
|
|
716
724
|
if (!emittedThinking) return;
|
|
717
725
|
}
|
|
718
726
|
if (!firstTokenTime) firstTokenTime = Date.now();
|
|
@@ -1217,6 +1225,11 @@ async function createRequestSetup(
|
|
|
1217
1225
|
};
|
|
1218
1226
|
}
|
|
1219
1227
|
|
|
1228
|
+
function getForcedCompletionsToolName(toolChoice: OpenAICompletionsParams["tool_choice"]): string | undefined {
|
|
1229
|
+
if (typeof toolChoice !== "object" || toolChoice === null || !("function" in toolChoice)) return undefined;
|
|
1230
|
+
return toolChoice.function.name;
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1220
1233
|
function buildParams(
|
|
1221
1234
|
model: Model<"openai-completions">,
|
|
1222
1235
|
context: Context,
|
|
@@ -1228,6 +1241,7 @@ function buildParams(
|
|
|
1228
1241
|
Boolean(options?.reasoning) && !options?.disableReasoning && Boolean(model.reasoning);
|
|
1229
1242
|
const forcedToolChoiceSuppressesThinking =
|
|
1230
1243
|
compat.disableReasoningOnForcedToolChoice &&
|
|
1244
|
+
compat.supportsForcedToolChoice &&
|
|
1231
1245
|
isForcedToolChoice(mapToOpenAICompletionsToolChoice(options?.toolChoice));
|
|
1232
1246
|
if (compat.whenThinking && thinkingEnabledForRequest && !forcedToolChoiceSuppressesThinking) {
|
|
1233
1247
|
compat = compat.whenThinking; // precomputed at model build — pointer swap, no allocation
|
|
@@ -1329,6 +1343,12 @@ function buildParams(
|
|
|
1329
1343
|
if (options?.toolChoice && compat.supportsToolChoice) {
|
|
1330
1344
|
params.tool_choice = mapToOpenAICompletionsToolChoice(options.toolChoice);
|
|
1331
1345
|
}
|
|
1346
|
+
if (isForcedToolChoice(params.tool_choice) && !compat.supportsForcedToolChoice) {
|
|
1347
|
+
// Some thinking-required OpenAI-compatible models reject forced
|
|
1348
|
+
// `tool_choice` while still accepting tools with the default auto
|
|
1349
|
+
// selector. Keep the tool available and let the model choose it.
|
|
1350
|
+
params.tool_choice = "auto";
|
|
1351
|
+
}
|
|
1332
1352
|
|
|
1333
1353
|
if (params.tool_choice === "none" && (!Array.isArray(params.tools) || params.tools.length === 0)) {
|
|
1334
1354
|
// `tool_choice: "none"` with no tools to gate is redundant and also
|
|
@@ -1342,6 +1362,19 @@ function buildParams(
|
|
|
1342
1362
|
delete params.tool_choice;
|
|
1343
1363
|
}
|
|
1344
1364
|
|
|
1365
|
+
const forcedToolName = getForcedCompletionsToolName(params.tool_choice);
|
|
1366
|
+
if (
|
|
1367
|
+
forcedToolName !== undefined &&
|
|
1368
|
+
(!Array.isArray(params.tools) ||
|
|
1369
|
+
!params.tools.some(tool => tool.type === "function" && tool.function.name === forcedToolName))
|
|
1370
|
+
) {
|
|
1371
|
+
// A forced named tool_choice is only valid when the same request offers
|
|
1372
|
+
// that function in `tools`. Active-tool filtering normally enforces this
|
|
1373
|
+
// before provider dispatch; this guard keeps raw provider callers from
|
|
1374
|
+
// emitting a self-inconsistent OpenAI-compatible payload.
|
|
1375
|
+
delete params.tool_choice;
|
|
1376
|
+
}
|
|
1377
|
+
|
|
1345
1378
|
if (supportsReasoningParams && compat.thinkingFormat === "zai" && model.reasoning) {
|
|
1346
1379
|
// Z.ai uses binary thinking: { type: "enabled" | "disabled" }
|
|
1347
1380
|
// Must explicitly disable since z.ai defaults to thinking enabled.
|
|
@@ -934,7 +934,10 @@ export async function processResponsesStream<TApi extends Api>(
|
|
|
934
934
|
// reaches the SDK stream), actively releasing the connection.
|
|
935
935
|
break;
|
|
936
936
|
} else if (event.type === "error") {
|
|
937
|
-
|
|
937
|
+
const err = (event as any).error ?? event;
|
|
938
|
+
const code = err.code ?? "unknown";
|
|
939
|
+
const message = err.message ?? "no message";
|
|
940
|
+
throw new Error(`Error Code ${code}: ${message}`);
|
|
938
941
|
} else if (event.type === "response.failed") {
|
|
939
942
|
populateResponsesUsageFromResponse(output, event.response?.usage);
|
|
940
943
|
const error = event.response?.error ?? (event.response as any)?.status_details?.error;
|
|
@@ -836,13 +836,18 @@ export function mapOpenAIResponsesToolChoiceForTools(
|
|
|
836
836
|
model: Model<"openai-responses">,
|
|
837
837
|
): OpenAIResponsesToolChoice {
|
|
838
838
|
const mapped = mapToOpenAIResponsesToolChoice(choice);
|
|
839
|
-
if (!mapped || typeof mapped === "string" || mapped.type !== "function"
|
|
839
|
+
if (!mapped || typeof mapped === "string" || mapped.type !== "function") {
|
|
840
840
|
return mapped;
|
|
841
841
|
}
|
|
842
842
|
|
|
843
|
-
const
|
|
844
|
-
|
|
845
|
-
|
|
843
|
+
const directTool = tools.find(tool => tool.name === mapped.name);
|
|
844
|
+
const customTool = supportsFreeformApplyPatch(model)
|
|
845
|
+
? tools.find(tool => tool.customFormat && (tool.name === mapped.name || tool.customWireName === mapped.name))
|
|
846
|
+
: undefined;
|
|
847
|
+
const offeredTool = customTool ?? directTool;
|
|
848
|
+
if (!offeredTool) {
|
|
849
|
+
return undefined;
|
|
850
|
+
}
|
|
846
851
|
return customTool ? { type: "custom", name: customTool.customWireName ?? customTool.name } : mapped;
|
|
847
852
|
}
|
|
848
853
|
|
|
@@ -40,9 +40,10 @@ function resolveClientId(): string {
|
|
|
40
40
|
/**
|
|
41
41
|
* Resolve callback-server options from `GITLAB_REDIRECT_URI`. When set, the
|
|
42
42
|
* exact string is advertised to GitLab (strict matching), random-port fallback
|
|
43
|
-
* is disabled, and
|
|
44
|
-
* so the browser callback lands on us.
|
|
45
|
-
*
|
|
43
|
+
* is disabled, and HTTP loopback URIs bind the listener to the URI's host/port
|
|
44
|
+
* so the browser callback lands on us. HTTPS loopback URIs are rejected because
|
|
45
|
+
* the local callback server is plaintext HTTP. Non-loopback URIs bind a random
|
|
46
|
+
* local port — only the paste-code path can complete in that case.
|
|
46
47
|
*/
|
|
47
48
|
function resolveCallbackOptions(): OAuthCallbackFlowOptions {
|
|
48
49
|
const raw = process.env.GITLAB_REDIRECT_URI?.trim();
|
|
@@ -65,6 +66,10 @@ function resolveCallbackOptions(): OAuthCallbackFlowOptions {
|
|
|
65
66
|
}
|
|
66
67
|
|
|
67
68
|
const isLoopback = parsed.hostname === "localhost" || parsed.hostname === "127.0.0.1" || parsed.hostname === "[::1]";
|
|
69
|
+
if (isLoopback && parsed.protocol !== "http:") {
|
|
70
|
+
throw new Error(`GITLAB_REDIRECT_URI loopback callbacks must use http://, got: ${raw}`);
|
|
71
|
+
}
|
|
72
|
+
|
|
68
73
|
const port = parsed.port ? Number.parseInt(parsed.port, 10) : parsed.protocol === "https:" ? 443 : 80;
|
|
69
74
|
|
|
70
75
|
return {
|
package/src/registry/zai.ts
CHANGED
|
@@ -4,7 +4,7 @@ import type { ProviderDefinition } from "./types";
|
|
|
4
4
|
|
|
5
5
|
const AUTH_URL = "https://z.ai/manage-apikey/apikey-list";
|
|
6
6
|
const API_BASE_URL = "https://api.z.ai/api/coding/paas/v4";
|
|
7
|
-
const VALIDATION_MODEL = "glm-
|
|
7
|
+
const VALIDATION_MODEL = "glm-5.2";
|
|
8
8
|
|
|
9
9
|
export async function loginZai(options: OAuthController): Promise<string> {
|
|
10
10
|
if (!options.onPrompt) {
|
package/src/utils/openai-http.ts
CHANGED
|
@@ -79,6 +79,10 @@ export async function postOpenAIStream<TEvent>(init: OpenAIStreamRequestInit): P
|
|
|
79
79
|
signal: init.signal,
|
|
80
80
|
fetch: init.fetch,
|
|
81
81
|
maxAttempts: init.maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
|
|
82
|
+
// Bun's native fetch enforces a hard ~300s pre-response timeout (issue #2422).
|
|
83
|
+
// Cold large-context streams legitimately exceed it; the caller's
|
|
84
|
+
// `firstEventTimeoutMs`/`AbortSignal` already govern stuck requests.
|
|
85
|
+
timeout: false,
|
|
82
86
|
});
|
|
83
87
|
if (!response.ok) {
|
|
84
88
|
throw await captureOpenAIHttpError(response);
|