@oh-my-pi/pi-ai 15.12.4 → 15.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,31 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.13.0] - 2026-06-14
6
+
7
+ ### Fixed
8
+ - Fixed OpenAI Responses/Realtime SSE stream handler crashing with "Error Code undefined: undefined" when parsing error events with nested error details by falling back to the nested error object fields.
9
+
10
+ - Fixed OpenAI-compatible providers that reject forced `tool_choice` on thinking-required models by downgrading unsupported forced choices to `auto` while keeping tools available ([#2546](https://github.com/can1357/oh-my-pi/issues/2546)).
11
+ - Fixed GitHub Copilot Anthropic transport (`api.githubcopilot.com/v1/messages`) returning `400 tools.0.custom.eager_input_streaming: Extra inputs are not permitted` on every tool-bearing turn by stopping the emission of the per-tool `eager_input_streaming` flag and the `fine-grained-tool-streaming-2025-05-14` beta header on the Copilot transport — the proxy whitelists neither ([#2558](https://github.com/can1357/oh-my-pi/issues/2558)).
12
+ - Disabled Bun's native ~300s pre-response `fetch` timeout in every streaming provider (OpenAI completions/responses, Azure responses, Anthropic, Codex SSE, Bedrock, Gemini CLI, Ollama). The configurable first-event/idle/SDK watchdogs (`PI_STREAM_FIRST_EVENT_TIMEOUT_MS`, `PI_OPENAI_STREAM_IDLE_TIMEOUT_MS`, `compat.streamIdleTimeoutMs`) were silently capped by Bun's hidden ceiling, so cold large-context streams (e.g. self-hosted vLLM at multi-hundred-K prompts) died at exactly 300s with `TimeoutError: The operation timed out.` Direct callers of `./providers/{amazon-bedrock,google-gemini-cli,ollama,openai-codex-responses}` (which bypass `register-builtins`' iterator-level watchdog) now install a pre-response `AbortSignal.timeout(firstEventTimeoutMs)` alongside the disable, so a stalled upstream still fails within the configured budget instead of hanging forever ([#2422](https://github.com/can1357/oh-my-pi/issues/2422))
13
+ - Fixed Gemini / Antigravity streams (Google Cloud Code Assist API) creating a trailing empty text block and emitting redundant `text_start`/`text_delta`/`text_end` events at the end of the turn when the final SSE chunk contains an empty text part (`text: ""`). The parser now ignores empty text parts, preserving the active transcript block state and ensuring proper nesting and rendering of subsequent background jobs or new turns.
14
+ - Preserved terminal Google `thoughtSignature`s by still extracting and applying the signature on the active block even when the text part is empty or undefined.
15
+ - Stopped Gemini Antigravity sessions (`gemini-3*` / Claude under Cloud Code Assist) from leaking system rule reminders and personality preambles into the final response, by appending an explicit 'do not output rule checks' instruction to the injected system parts.
16
+ - Fixed Gemini / Antigravity streams (Google Cloud Code Assist API) letting a `functionCall` part's own `thoughtSignature` clobber the preceding text or thinking block's signature on `think → tool` and `text → tool` turns. A signed function-call part has `text: undefined`, so it fell into the terminal-signature branch while the prior block was still active; that branch now skips function-call parts, leaving the tool call's signature on the tool call where it belongs and preventing corrupted signatures on same-model replay.
17
+ - Fixed MiniMax-M3 OpenAI-compatible streams rendering reasoning twice when the same chunk carried both `<think>…</think>` content and structured `reasoning_content`; structured reasoning now wins and cumulative MiniMax reasoning snapshots are collapsed to deltas using a per-signature snapshot tracker that survives the `</think>`-to-text block transition (so post-answer cumulative snapshots don't reinstate a duplicate thinking block). ([#2433](https://github.com/can1357/oh-my-pi/issues/2433))
18
+
19
+ ## [15.12.6] - 2026-06-14
20
+
21
+ ### Changed
22
+
23
+ - Bumped Z.AI (GLM Coding Plan) API key validation probe to glm-5.2.
24
+
25
+ ### Fixed
26
+
27
+ - Fixed tool schema conversion for non-Cloud Code Assist Google Gemini models by normalizing parameters with `normalizeSchemaForGoogle` to prevent un-normalized schema properties (such as `additionalProperties: false` or type arrays) from causing Gemini API errors.
28
+ - Fixed OpenAI-family request builders dropping forced named `tool_choice` directives when the named tool is absent from the serialized `tools` array, preventing spec-strict providers from rejecting self-inconsistent requests. ([#1701](https://github.com/can1357/oh-my-pi/issues/1701))
29
+
5
30
  ## [15.12.4] - 2026-06-13
6
31
 
7
32
  ### Added
@@ -3392,4 +3417,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
3392
3417
 
3393
3418
  ## [0.9.4] - 2025-11-26
3394
3419
 
3395
- Initial release with multi-provider LLM support.
3420
+ Initial release with multi-provider LLM support.
@@ -25,6 +25,8 @@ export type AnthropicFetchOptions = RequestInit & {
25
25
  cert?: string;
26
26
  key?: string;
27
27
  };
28
+ /** Bun extension: see {@link FetchWithRetryOptions.timeout} — `false` disables Bun's native fetch TTFT timeout (issue #2422). */
29
+ timeout?: number | false;
28
30
  };
29
31
  export interface AnthropicClientOptions {
30
32
  /** Sent as `X-Api-Key` unless the header is already present in `defaultHeaders`. */
@@ -53,7 +53,7 @@ export interface GoogleGeminiCliOptions extends StreamOptions {
53
53
  requestModelId?: string;
54
54
  projectId?: string;
55
55
  }
56
- export { ANTIGRAVITY_SYSTEM_INSTRUCTION, getAntigravityUserAgent, getGeminiCliHeaders, getGeminiCliUserAgent, } from "@oh-my-pi/pi-catalog/wire/gemini-headers";
56
+ export { ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION, ANTIGRAVITY_SYSTEM_INSTRUCTION, getAntigravityUserAgent, getGeminiCliHeaders, getGeminiCliUserAgent, } from "@oh-my-pi/pi-catalog/wire/gemini-headers";
57
57
  interface ParsedGeminiCliCredentials {
58
58
  accessToken: string;
59
59
  projectId: string;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.12.4",
4
+ "version": "15.13.0",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
@@ -38,8 +38,8 @@
38
38
  },
39
39
  "dependencies": {
40
40
  "@bufbuild/protobuf": "^2.12.0",
41
- "@oh-my-pi/pi-catalog": "15.12.4",
42
- "@oh-my-pi/pi-utils": "15.12.4",
41
+ "@oh-my-pi/pi-catalog": "15.13.0",
42
+ "@oh-my-pi/pi-utils": "15.13.0",
43
43
  "partial-json": "^0.1.7",
44
44
  "zod": "^4"
45
45
  },
@@ -31,6 +31,7 @@ import type {
31
31
  import { normalizeToolCallId, resolveCacheRetention } from "../utils";
32
32
  import { AssistantMessageEventStream } from "../utils/event-stream";
33
33
  import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump } from "../utils/http-inspector";
34
+ import { getStreamFirstEventTimeoutMs } from "../utils/idle-iterator";
34
35
  import { parseStreamingJson, parseStreamingJsonThrottled } from "../utils/json-parse";
35
36
  import { toolWireSchema } from "../utils/schema/wire";
36
37
  import { invalidateAwsCredentialCache, resolveAwsCredentials } from "./aws-credentials";
@@ -282,12 +283,29 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream"> = (
282
283
  requestHeaders = { ...baseHeaders, ...signed };
283
284
  }
284
285
 
286
+ // Bun's native fetch ceiling is disabled below (`timeout: false`) so
287
+ // configurable watchdogs govern slow-prefill streams (issue #2422).
288
+ // Direct callers that bypass `register-builtins` (which installs the
289
+ // iterator-level first-event watchdog) still need a pre-response
290
+ // timer, otherwise a Bedrock/proxy that accepts the POST and never
291
+ // sends headers would hang forever.
292
+ const firstEventTimeoutMs = options.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs();
293
+ const preResponseWatchdog =
294
+ firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
295
+ ? AbortSignal.timeout(firstEventTimeoutMs)
296
+ : undefined;
297
+ const fetchSignal = preResponseWatchdog
298
+ ? options.signal
299
+ ? AbortSignal.any([options.signal, preResponseWatchdog])
300
+ : preResponseWatchdog
301
+ : options.signal;
285
302
  const response = await fetchWithRetry(url, {
286
303
  method: "POST",
287
304
  headers: requestHeaders,
288
305
  body,
289
- signal: options.signal,
306
+ signal: fetchSignal,
290
307
  fetch: options.fetch,
308
+ timeout: false,
291
309
  });
292
310
 
293
311
  if (!response.ok) {
@@ -57,6 +57,8 @@ export type AnthropicFetchOptions = RequestInit & {
57
57
  cert?: string;
58
58
  key?: string;
59
59
  };
60
+ /** Bun extension: see {@link FetchWithRetryOptions.timeout} — `false` disables Bun's native fetch TTFT timeout (issue #2422). */
61
+ timeout?: number | false;
60
62
  };
61
63
 
62
64
  export interface AnthropicClientOptions {
@@ -2305,16 +2305,22 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
2305
2305
  const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
2306
2306
  const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
2307
2307
  const tlsFetchOptions = buildClaudeCodeTlsFetchOptions(model, baseUrl);
2308
+ // Disable Bun's native ~300s pre-response fetch timeout (issue #2422).
2309
+ // `AnthropicMessagesClient` already arms its own DEFAULT_TIMEOUT_MS timer
2310
+ // per request, so the native ceiling can only short-circuit slow-prefill
2311
+ // streams before the configured watchdog gets to govern them.
2312
+ const fetchOptions: AnthropicFetchOptions = { ...(tlsFetchOptions ?? {}), timeout: false };
2308
2313
  const baseFetch = args.fetch ?? fetch;
2309
2314
  // Only OAuth requests inject the CC billing header; no API-key request can ever
2310
2315
  // contain it, so there is no need to install the rewriter for those.
2311
2316
  const cchFetch = oauthToken ? wrapFetchForCch(baseFetch) : baseFetch;
2312
2317
  if (model.provider === "github-copilot") {
2313
2318
  const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
2319
+ // The GitHub Copilot Anthropic proxy doesn't accept Anthropic beta
2320
+ // features (and the catalog already forces `supportsEagerToolInputStreaming
2321
+ // = false` for this host, so `needsFineGrainedToolStreamingBeta` is true
2322
+ // whenever tools are present). Forward only caller-supplied betas.
2314
2323
  const betaFeatures = [...extraBetas];
2315
- if (needsFineGrainedToolStreamingBeta) {
2316
- betaFeatures.push(fineGrainedToolStreamingBeta);
2317
- }
2318
2324
  const defaultHeaders = mergeHeaders(
2319
2325
  {
2320
2326
  Accept: stream ? "text/event-stream" : "application/json",
@@ -2337,7 +2343,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
2337
2343
  maxRetries: 5,
2338
2344
  defaultHeaders,
2339
2345
  fetch: cchFetch,
2340
- ...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
2346
+ fetchOptions,
2341
2347
  };
2342
2348
  }
2343
2349
 
@@ -2372,6 +2378,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
2372
2378
  maxRetries: 5,
2373
2379
  defaultHeaders,
2374
2380
  fetch: cchFetch,
2381
+ fetchOptions,
2375
2382
  };
2376
2383
  }
2377
2384
 
@@ -2388,7 +2395,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
2388
2395
  maxRetries: 5,
2389
2396
  defaultHeaders,
2390
2397
  fetch: cchFetch,
2391
- ...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
2398
+ fetchOptions,
2392
2399
  };
2393
2400
  }
2394
2401
  // OpenCode Zen's Anthropic-compatible gateway accepts bearer auth only;
@@ -2402,7 +2409,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
2402
2409
  maxRetries: 5,
2403
2410
  defaultHeaders,
2404
2411
  fetch: cchFetch,
2405
- ...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
2412
+ fetchOptions,
2406
2413
  };
2407
2414
  }
2408
2415
 
@@ -2421,7 +2428,7 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
2421
2428
  maxRetries: 5,
2422
2429
  defaultHeaders,
2423
2430
  fetch: cchFetch,
2424
- ...(tlsFetchOptions ? { fetchOptions: tlsFetchOptions } : {}),
2431
+ fetchOptions,
2425
2432
  };
2426
2433
  }
2427
2434
 
@@ -336,7 +336,15 @@ function buildParams(
336
336
  if (context.tools) {
337
337
  params.tools = convertTools(context.tools);
338
338
  if (options?.toolChoice) {
339
- params.tool_choice = mapToOpenAIResponsesToolChoice(options.toolChoice);
339
+ const toolChoice = mapToOpenAIResponsesToolChoice(options.toolChoice);
340
+ if (
341
+ toolChoice &&
342
+ (typeof toolChoice === "string" ||
343
+ toolChoice.type !== "function" ||
344
+ context.tools.some(tool => tool.name === toolChoice.name))
345
+ ) {
346
+ params.tool_choice = toolChoice;
347
+ }
340
348
  }
341
349
  }
342
350
 
@@ -7,6 +7,7 @@ import { createHash, randomBytes, randomUUID } from "node:crypto";
7
7
  import { scheduler } from "node:timers/promises";
8
8
  import { calculateCost } from "@oh-my-pi/pi-catalog/models";
9
9
  import {
10
+ ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION,
10
11
  ANTIGRAVITY_SYSTEM_INSTRUCTION,
11
12
  getAntigravityUserAgent,
12
13
  getGeminiCliHeaders,
@@ -27,6 +28,7 @@ import type {
27
28
  import { normalizeSystemPrompts } from "../utils";
28
29
  import { AssistantMessageEventStream } from "../utils/event-stream";
29
30
  import { appendRawHttpRequestDumpFor400, type RawHttpRequestDump } from "../utils/http-inspector";
31
+ import { getStreamFirstEventTimeoutMs } from "../utils/idle-iterator";
30
32
  // Refresh is the sole responsibility of AuthStorage (broker-aware, single-flighted);
31
33
  // the stream provider trusts the access token threaded through `options.apiKey`.
32
34
  import { normalizeSchemaForCCA } from "../utils/schema";
@@ -101,6 +103,7 @@ const ANTIGRAVITY_SANDBOX_ENDPOINT = "https://daily-cloudcode-pa.sandbox.googlea
101
103
  const ANTIGRAVITY_ENDPOINT_FALLBACKS = [ANTIGRAVITY_DAILY_ENDPOINT, ANTIGRAVITY_SANDBOX_ENDPOINT] as const;
102
104
 
103
105
  export {
106
+ ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION,
104
107
  ANTIGRAVITY_SYSTEM_INSTRUCTION,
105
108
  getAntigravityUserAgent,
106
109
  getGeminiCliHeaders,
@@ -365,17 +368,34 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
365
368
  headers: requestHeaders,
366
369
  };
367
370
 
371
+ // Direct callers that skip `register-builtins` (which installs the
372
+ // iterator-level watchdog) need a pre-response timer alongside
373
+ // `timeout: false`; otherwise a stalled Cloud Code Assist proxy
374
+ // would hang forever. Floor matches the lazy wrapper's 5min default.
375
+ const firstEventTimeoutMs =
376
+ options?.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs(undefined, 300_000);
377
+ const preResponseWatchdog =
378
+ firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
379
+ ? AbortSignal.timeout(firstEventTimeoutMs)
380
+ : undefined;
381
+ const callerSignal = options?.signal;
382
+ const fetchSignal = preResponseWatchdog
383
+ ? callerSignal
384
+ ? AbortSignal.any([callerSignal, preResponseWatchdog])
385
+ : preResponseWatchdog
386
+ : callerSignal;
368
387
  const response = await fetchWithRetry(
369
388
  attempt => `${endpoints[Math.min(attempt, endpoints.length - 1)]}/v1internal:streamGenerateContent?alt=sse`,
370
389
  {
371
390
  method: "POST",
372
391
  headers: requestHeaders,
373
392
  body: requestBodyJson,
374
- signal: options?.signal,
393
+ signal: fetchSignal,
375
394
  maxAttempts: MAX_RETRIES + 1,
376
395
  defaultDelayMs: attempt => BASE_DELAY_MS * 2 ** attempt,
377
396
  maxDelayMs: options?.maxRetryDelayMs ?? RATE_LIMIT_BUDGET_MS,
378
397
  fetch: options?.fetch,
398
+ timeout: false,
379
399
  },
380
400
  );
381
401
  if (!response.ok) {
@@ -447,7 +467,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
447
467
  const candidate = responseData.candidates?.[0];
448
468
  if (candidate?.content?.parts) {
449
469
  for (const part of candidate.content.parts) {
450
- if (part.text !== undefined) {
470
+ if (part.text !== undefined && part.text !== "") {
451
471
  const isThinking = isThinkingPart(part);
452
472
  if (
453
473
  !currentBlock ||
@@ -484,6 +504,18 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
484
504
  partial: output,
485
505
  });
486
506
  }
507
+ } else if (part.text === "" && part.thoughtSignature && currentBlock && !part.functionCall) {
508
+ if (currentBlock.type === "thinking") {
509
+ currentBlock.thinkingSignature = retainThoughtSignature(
510
+ currentBlock.thinkingSignature,
511
+ part.thoughtSignature,
512
+ );
513
+ } else {
514
+ currentBlock.textSignature = retainThoughtSignature(
515
+ currentBlock.textSignature,
516
+ part.thoughtSignature,
517
+ );
518
+ }
487
519
  }
488
520
 
489
521
  if (part.functionCall) {
@@ -849,10 +881,10 @@ export function buildRequest(
849
881
  if (isAntigravity && shouldInjectAntigravitySystemInstruction(model.id)) {
850
882
  const existingParts = request.systemInstruction?.parts ?? [];
851
883
  request.systemInstruction = {
852
- role: "user",
853
884
  parts: [
854
885
  { text: ANTIGRAVITY_SYSTEM_INSTRUCTION },
855
886
  { text: `Please ignore following [ignore]${ANTIGRAVITY_SYSTEM_INSTRUCTION}[/ignore]` },
887
+ { text: ANTIGRAVITY_NO_PREAMBLE_INSTRUCTION },
856
888
  ...existingParts,
857
889
  ],
858
890
  };
@@ -372,7 +372,7 @@ export function convertTools(
372
372
  description: tool.description || "",
373
373
  ...(useParameters
374
374
  ? { parameters: normalizeSchemaForCCA(toolWireSchema(tool)) }
375
- : { parametersJsonSchema: toolWireSchema(tool) }),
375
+ : { parametersJsonSchema: normalizeSchemaForGoogle(toolWireSchema(tool)) }),
376
376
  })),
377
377
  },
378
378
  ];
@@ -609,7 +609,7 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
609
609
  const candidate = chunk.candidates?.[0];
610
610
  if (candidate?.content?.parts) {
611
611
  for (const part of candidate.content.parts) {
612
- if (part.text !== undefined) {
612
+ if (part.text !== undefined && part.text !== "") {
613
613
  if (!firstTokenSeen) {
614
614
  firstTokenSeen = true;
615
615
  onFirstToken?.();
@@ -650,6 +650,18 @@ export async function consumeGoogleStream<T extends GoogleApiType>(args: {
650
650
  partial: output,
651
651
  });
652
652
  }
653
+ } else if (part.text === "" && part.thoughtSignature && currentBlock && !part.functionCall) {
654
+ if (currentBlock.type === "thinking") {
655
+ currentBlock.thinkingSignature = retainThoughtSignature(
656
+ currentBlock.thinkingSignature,
657
+ part.thoughtSignature,
658
+ );
659
+ } else if (retainTextSignature) {
660
+ currentBlock.textSignature = retainThoughtSignature(
661
+ currentBlock.textSignature,
662
+ part.thoughtSignature,
663
+ );
664
+ }
653
665
  }
654
666
 
655
667
  if (part.functionCall) {
@@ -18,6 +18,7 @@ import type {
18
18
  import { normalizeSystemPrompts } from "../utils";
19
19
  import { AssistantMessageEventStream } from "../utils/event-stream";
20
20
  import { type CapturedHttpErrorResponse, finalizeErrorMessage, type RawHttpRequestDump } from "../utils/http-inspector";
21
+ import { getOpenAIStreamFirstEventTimeoutMs, getOpenAIStreamIdleTimeoutMs } from "../utils/idle-iterator";
21
22
  import { parseStreamingJson } from "../utils/json-parse";
22
23
  import { toolWireSchema } from "../utils/schema/wire";
23
24
  import {
@@ -525,6 +526,22 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
525
526
  url: `${baseUrl}/api/chat`,
526
527
  body,
527
528
  };
529
+ // Direct callers that bypass `register-builtins` (which installs
530
+ // the iterator-level watchdog) need a pre-response timer alongside
531
+ // `timeout: false`; otherwise an Ollama server that accepts the
532
+ // POST and never streams headers would hang forever (issue #2422).
533
+ const idleTimeoutMs = options.streamIdleTimeoutMs ?? getOpenAIStreamIdleTimeoutMs();
534
+ const firstEventTimeoutMs =
535
+ options.streamFirstEventTimeoutMs ?? getOpenAIStreamFirstEventTimeoutMs(idleTimeoutMs);
536
+ const preResponseWatchdog =
537
+ firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
538
+ ? AbortSignal.timeout(firstEventTimeoutMs)
539
+ : undefined;
540
+ const fetchSignal = preResponseWatchdog
541
+ ? options.signal
542
+ ? AbortSignal.any([options.signal, preResponseWatchdog])
543
+ : preResponseWatchdog
544
+ : options.signal;
528
545
  const response = await fetchWithRetry(`${baseUrl}/api/chat`, {
529
546
  method: "POST",
530
547
  headers: {
@@ -534,9 +551,10 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
534
551
  "Content-Type": "application/json",
535
552
  },
536
553
  body: JSON.stringify(body),
537
- signal: options.signal,
554
+ signal: fetchSignal,
538
555
  defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
539
556
  fetch: options.fetch,
557
+ timeout: false,
540
558
  });
541
559
  if (!response.ok) {
542
560
  capturedErrorResponse = await captureHttpErrorResponse(response);
@@ -272,6 +272,7 @@ interface CodexRequestSetup {
272
272
  requestSignal: AbortSignal;
273
273
  wrapCodexSseStream: (source: AsyncGenerator<Record<string, unknown>>) => AsyncGenerator<Record<string, unknown>>;
274
274
  requestAbortController: AbortController;
275
+ firstEventTimeoutMs: number | undefined;
275
276
  websocketIdleTimeoutMs: number | undefined;
276
277
  websocketFirstEventTimeoutMs: number | undefined;
277
278
  }
@@ -554,13 +555,16 @@ export function normalizeCodexToolChoice(
554
555
  if (!choice) return undefined;
555
556
  if (typeof choice === "string") return choice;
556
557
  const allowFreeform = model ? supportsFreeformApplyPatchCodex(model) : false;
557
- const mapName = (name: string): Record<string, string> => {
558
+ const mapName = (name: string): Record<string, string> | undefined => {
559
+ const directTool = tools.find(tool => tool.name === name);
558
560
  const customTool = allowFreeform
559
561
  ? tools.find(tool => tool.customFormat && (tool.name === name || tool.customWireName === name))
560
562
  : undefined;
563
+ const offeredTool = customTool ?? directTool;
564
+ if (!offeredTool) return undefined;
561
565
  return customTool
562
566
  ? { type: "custom", name: customTool.customWireName ?? customTool.name }
563
- : { type: "function", name };
567
+ : { type: "function", name: offeredTool.name };
564
568
  };
565
569
  if (choice.type === "function") {
566
570
  if ("function" in choice && choice.function?.name) {
@@ -687,6 +691,7 @@ function createRequestSetup(options: OpenAICodexResponsesOptions | undefined): C
687
691
  requestAbortController,
688
692
  requestSignal,
689
693
  wrapCodexSseStream,
694
+ firstEventTimeoutMs,
690
695
  websocketIdleTimeoutMs,
691
696
  websocketFirstEventTimeoutMs,
692
697
  };
@@ -983,6 +988,7 @@ async function openCodexSseTransport(
983
988
  state,
984
989
  requestContext.responsesLite,
985
990
  requestSetup.requestSignal,
991
+ requestSetup.firstEventTimeoutMs,
986
992
  event => options?.onSseEvent?.(event, model),
987
993
  options?.fetch,
988
994
  ),
@@ -3016,7 +3022,8 @@ async function openCodexSseEventStream(
3016
3022
  body: RequestBody,
3017
3023
  state: CodexWebSocketSessionState | undefined,
3018
3024
  responsesLite: boolean,
3019
- signal?: AbortSignal,
3025
+ signal: AbortSignal | undefined,
3026
+ firstEventTimeoutMs: number | undefined,
3020
3027
  onSseEvent?: OpenAICodexResponsesOptions["onSseEvent"],
3021
3028
  fetchOverride?: FetchImpl,
3022
3029
  ): Promise<AsyncGenerator<Record<string, unknown>>> {
@@ -3028,15 +3035,31 @@ async function openCodexSseEventStream(
3028
3035
  sentTurnStateHeader: headers.has(X_CODEX_TURN_STATE_HEADER),
3029
3036
  sentModelsEtagHeader: headers.has(X_MODELS_ETAG_HEADER),
3030
3037
  });
3038
+ // `wrapCodexSseStream` arms a first-event watchdog only after this fetch
3039
+ // resolves (it wraps the SSE generator). With `timeout: false` disabling
3040
+ // Bun's native 300s ceiling, a stalled pre-response request needs its own
3041
+ // watchdog — combine the caller signal with a fresh
3042
+ // `AbortSignal.timeout(firstEventTimeoutMs)` so headers must arrive
3043
+ // within the configured budget (issue #2422).
3044
+ const preResponseWatchdog =
3045
+ firstEventTimeoutMs !== undefined && firstEventTimeoutMs > 0
3046
+ ? AbortSignal.timeout(firstEventTimeoutMs)
3047
+ : undefined;
3048
+ const fetchSignal = preResponseWatchdog
3049
+ ? signal
3050
+ ? AbortSignal.any([signal, preResponseWatchdog])
3051
+ : preResponseWatchdog
3052
+ : signal;
3031
3053
  const response = await fetchWithRetry(url, {
3032
3054
  method: "POST",
3033
3055
  headers,
3034
3056
  body: JSON.stringify(body),
3035
- signal,
3057
+ signal: fetchSignal,
3036
3058
  maxAttempts: CODEX_MAX_RETRIES + 1,
3037
3059
  defaultDelayMs: attempt => CODEX_RETRY_DELAY_MS * (attempt + 1),
3038
3060
  maxDelayMs: CODEX_RATE_LIMIT_BUDGET_MS,
3039
3061
  fetch: fetchOverride,
3062
+ timeout: false,
3040
3063
  });
3041
3064
  logCodexDebug("codex response", {
3042
3065
  url: response.url,
@@ -699,6 +699,14 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
699
699
  if (!firstTokenTime) firstTokenTime = Date.now();
700
700
  appendText(output, stream, text);
701
701
  };
702
+ // Tracks the last full cumulative reasoning snapshot per signature (the
703
+ // reasoning field name) so dedup survives block transitions. Required
704
+ // for MiniMax-M3: once `</think>` and visible text arrive, currentBlock
705
+ // flips to "text", but later chunks keep carrying the same cumulative
706
+ // `reasoning_content` snapshot. Without an external tracker the guard
707
+ // below misses and the snapshot gets re-emitted as a fresh thinking
708
+ // block after the answer has started.
709
+ const lastCumulativeReasoningBySignature = new Map<string, string>();
702
710
  const appendThinkingDelta = (
703
711
  thinking: string,
704
712
  signature?: string,
@@ -706,13 +714,13 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
706
714
  ): void => {
707
715
  if (!thinking) return;
708
716
  let emittedThinking = thinking;
709
- if (
710
- source === "cumulative" &&
711
- currentBlock?.type === "thinking" &&
712
- (signature === undefined || currentBlock.thinkingSignature === signature) &&
713
- thinking.startsWith(currentBlock.thinking)
714
- ) {
715
- emittedThinking = thinking.slice(currentBlock.thinking.length);
717
+ if (source === "cumulative") {
718
+ const key = signature ?? "";
719
+ const lastSnapshot = lastCumulativeReasoningBySignature.get(key) ?? "";
720
+ if (thinking.startsWith(lastSnapshot)) {
721
+ emittedThinking = thinking.slice(lastSnapshot.length);
722
+ }
723
+ lastCumulativeReasoningBySignature.set(key, thinking);
716
724
  if (!emittedThinking) return;
717
725
  }
718
726
  if (!firstTokenTime) firstTokenTime = Date.now();
@@ -1217,6 +1225,11 @@ async function createRequestSetup(
1217
1225
  };
1218
1226
  }
1219
1227
 
1228
+ function getForcedCompletionsToolName(toolChoice: OpenAICompletionsParams["tool_choice"]): string | undefined {
1229
+ if (typeof toolChoice !== "object" || toolChoice === null || !("function" in toolChoice)) return undefined;
1230
+ return toolChoice.function.name;
1231
+ }
1232
+
1220
1233
  function buildParams(
1221
1234
  model: Model<"openai-completions">,
1222
1235
  context: Context,
@@ -1228,6 +1241,7 @@ function buildParams(
1228
1241
  Boolean(options?.reasoning) && !options?.disableReasoning && Boolean(model.reasoning);
1229
1242
  const forcedToolChoiceSuppressesThinking =
1230
1243
  compat.disableReasoningOnForcedToolChoice &&
1244
+ compat.supportsForcedToolChoice &&
1231
1245
  isForcedToolChoice(mapToOpenAICompletionsToolChoice(options?.toolChoice));
1232
1246
  if (compat.whenThinking && thinkingEnabledForRequest && !forcedToolChoiceSuppressesThinking) {
1233
1247
  compat = compat.whenThinking; // precomputed at model build — pointer swap, no allocation
@@ -1329,6 +1343,12 @@ function buildParams(
1329
1343
  if (options?.toolChoice && compat.supportsToolChoice) {
1330
1344
  params.tool_choice = mapToOpenAICompletionsToolChoice(options.toolChoice);
1331
1345
  }
1346
+ if (isForcedToolChoice(params.tool_choice) && !compat.supportsForcedToolChoice) {
1347
+ // Some thinking-required OpenAI-compatible models reject forced
1348
+ // `tool_choice` while still accepting tools with the default auto
1349
+ // selector. Keep the tool available and let the model choose it.
1350
+ params.tool_choice = "auto";
1351
+ }
1332
1352
 
1333
1353
  if (params.tool_choice === "none" && (!Array.isArray(params.tools) || params.tools.length === 0)) {
1334
1354
  // `tool_choice: "none"` with no tools to gate is redundant and also
@@ -1342,6 +1362,19 @@ function buildParams(
1342
1362
  delete params.tool_choice;
1343
1363
  }
1344
1364
 
1365
+ const forcedToolName = getForcedCompletionsToolName(params.tool_choice);
1366
+ if (
1367
+ forcedToolName !== undefined &&
1368
+ (!Array.isArray(params.tools) ||
1369
+ !params.tools.some(tool => tool.type === "function" && tool.function.name === forcedToolName))
1370
+ ) {
1371
+ // A forced named tool_choice is only valid when the same request offers
1372
+ // that function in `tools`. Active-tool filtering normally enforces this
1373
+ // before provider dispatch; this guard keeps raw provider callers from
1374
+ // emitting a self-inconsistent OpenAI-compatible payload.
1375
+ delete params.tool_choice;
1376
+ }
1377
+
1345
1378
  if (supportsReasoningParams && compat.thinkingFormat === "zai" && model.reasoning) {
1346
1379
  // Z.ai uses binary thinking: { type: "enabled" | "disabled" }
1347
1380
  // Must explicitly disable since z.ai defaults to thinking enabled.
@@ -934,7 +934,10 @@ export async function processResponsesStream<TApi extends Api>(
934
934
  // reaches the SDK stream), actively releasing the connection.
935
935
  break;
936
936
  } else if (event.type === "error") {
937
- throw new Error(`Error Code ${event.code}: ${event.message}`);
937
+ const err = (event as any).error ?? event;
938
+ const code = err.code ?? "unknown";
939
+ const message = err.message ?? "no message";
940
+ throw new Error(`Error Code ${code}: ${message}`);
938
941
  } else if (event.type === "response.failed") {
939
942
  populateResponsesUsageFromResponse(output, event.response?.usage);
940
943
  const error = event.response?.error ?? (event.response as any)?.status_details?.error;
@@ -836,13 +836,18 @@ export function mapOpenAIResponsesToolChoiceForTools(
836
836
  model: Model<"openai-responses">,
837
837
  ): OpenAIResponsesToolChoice {
838
838
  const mapped = mapToOpenAIResponsesToolChoice(choice);
839
- if (!mapped || typeof mapped === "string" || mapped.type !== "function" || !supportsFreeformApplyPatch(model)) {
839
+ if (!mapped || typeof mapped === "string" || mapped.type !== "function") {
840
840
  return mapped;
841
841
  }
842
842
 
843
- const customTool = tools.find(
844
- tool => tool.customFormat && (tool.name === mapped.name || tool.customWireName === mapped.name),
845
- );
843
+ const directTool = tools.find(tool => tool.name === mapped.name);
844
+ const customTool = supportsFreeformApplyPatch(model)
845
+ ? tools.find(tool => tool.customFormat && (tool.name === mapped.name || tool.customWireName === mapped.name))
846
+ : undefined;
847
+ const offeredTool = customTool ?? directTool;
848
+ if (!offeredTool) {
849
+ return undefined;
850
+ }
846
851
  return customTool ? { type: "custom", name: customTool.customWireName ?? customTool.name } : mapped;
847
852
  }
848
853
 
@@ -40,9 +40,10 @@ function resolveClientId(): string {
40
40
  /**
41
41
  * Resolve callback-server options from `GITLAB_REDIRECT_URI`. When set, the
42
42
  * exact string is advertised to GitLab (strict matching), random-port fallback
43
- * is disabled, and the local listener is bound to the URI's loopback host/port
44
- * so the browser callback lands on us. Non-loopback URIs bind a random local
45
- * port only the paste-code path can complete in that case.
43
+ * is disabled, and HTTP loopback URIs bind the listener to the URI's host/port
44
+ * so the browser callback lands on us. HTTPS loopback URIs are rejected because
45
+ * the local callback server is plaintext HTTP. Non-loopback URIs bind a random
46
+ * local port — only the paste-code path can complete in that case.
46
47
  */
47
48
  function resolveCallbackOptions(): OAuthCallbackFlowOptions {
48
49
  const raw = process.env.GITLAB_REDIRECT_URI?.trim();
@@ -65,6 +66,10 @@ function resolveCallbackOptions(): OAuthCallbackFlowOptions {
65
66
  }
66
67
 
67
68
  const isLoopback = parsed.hostname === "localhost" || parsed.hostname === "127.0.0.1" || parsed.hostname === "[::1]";
69
+ if (isLoopback && parsed.protocol !== "http:") {
70
+ throw new Error(`GITLAB_REDIRECT_URI loopback callbacks must use http://, got: ${raw}`);
71
+ }
72
+
68
73
  const port = parsed.port ? Number.parseInt(parsed.port, 10) : parsed.protocol === "https:" ? 443 : 80;
69
74
 
70
75
  return {
@@ -4,7 +4,7 @@ import type { ProviderDefinition } from "./types";
4
4
 
5
5
  const AUTH_URL = "https://z.ai/manage-apikey/apikey-list";
6
6
  const API_BASE_URL = "https://api.z.ai/api/coding/paas/v4";
7
- const VALIDATION_MODEL = "glm-4.7";
7
+ const VALIDATION_MODEL = "glm-5.2";
8
8
 
9
9
  export async function loginZai(options: OAuthController): Promise<string> {
10
10
  if (!options.onPrompt) {
@@ -79,6 +79,10 @@ export async function postOpenAIStream<TEvent>(init: OpenAIStreamRequestInit): P
79
79
  signal: init.signal,
80
80
  fetch: init.fetch,
81
81
  maxAttempts: init.maxAttempts ?? DEFAULT_MAX_ATTEMPTS,
82
+ // Bun's native fetch enforces a hard ~300s pre-response timeout (issue #2422).
83
+ // Cold large-context streams legitimately exceed it; the caller's
84
+ // `firstEventTimeoutMs`/`AbortSignal` already govern stuck requests.
85
+ timeout: false,
82
86
  });
83
87
  if (!response.ok) {
84
88
  throw await captureOpenAIHttpError(response);