@oh-my-pi/pi-ai 15.0.1 → 15.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [15.0.2] - 2026-05-15
6
+ ### Fixed
7
+
8
+ - Fixed `StreamOptions.fetch` typing to accept fetch-compatible override functions that do not expose `preconnect`, allowing custom fetch implementations to be used without type errors across runtimes
9
+ - Fixed Moonshot Kimi K2.6 forced tool calls to send `thinking: { type: "disabled" }`, avoiding `tool_choice 'specified' is incompatible with thinking enabled` 400s while preserving the requested named tool ([#1077](https://github.com/can1357/oh-my-pi/issues/1077)).
10
+
5
11
  ## [15.0.1] - 2026-05-14
6
12
  ### Breaking Changes
7
13
 
@@ -22,6 +28,11 @@
22
28
 
23
29
  - Fixed OAuth credentials being silently disabled when two omp processes (or any two `AuthStorage` instances sharing a `agent.db`) race on token refresh. Anthropic rotates refresh tokens on every use, so the loser's `invalid_grant` response previously soft-deleted the row that the winner just rotated, forcing the user to `/login` again. `#tryOAuthCredential` now re-reads the row from disk before declaring a definitive failure: if the persisted `refresh` differs from the snapshot it tried, the peer-rotated credential is reloaded and the request retries against the fresh token instead of disabling the live row.
24
30
  - Closed a remaining race window in OAuth refresh-failure handling: between re-reading the credential row to check for peer rotation and the subsequent soft-delete, another process could still complete a refresh and rotate the row, leaving us to disable the freshly-rotated credential by `id`. The disable now runs as a single CAS update conditioned on the row's `data` still matching the snapshot we tried to refresh, and on `disabled_cause IS NULL`. If the CAS reports 0 rows changed (peer rotation, or row already disabled by a concurrent failure on the same snapshot), we reload from disk and retry instead of mutating the wrong row or emitting a spurious `credential_disabled` event.
31
+ ### Changed
32
+ - Lowered the default steady-state stream idle timeout from 120s to 30s while preserving the existing environment overrides.
33
+
34
+ ### Fixed
35
+ - Lazy built-in provider streams now enforce the shared idle watchdog and abort stalled provider requests, so session auto-retry can continue after transient network drops instead of remaining stuck. Caller aborts still terminate as aborted.
25
36
 
26
37
  ## [14.9.3] - 2026-05-10
27
38
 
package/package.json CHANGED
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "15.0.1",
4
+ "version": "15.0.2",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
- "homepage": "https://github.com/can1357/oh-my-pi",
6
+ "homepage": "https://omp.sh",
7
7
  "author": "Can Boluk",
8
8
  "contributors": [
9
9
  "Mario Zechner"
@@ -46,8 +46,8 @@
46
46
  "@aws-sdk/credential-provider-node": "^3.972.39",
47
47
  "@bufbuild/protobuf": "^2.12.0",
48
48
  "@google/genai": "^1.52.0",
49
- "@oh-my-pi/pi-natives": "15.0.1",
50
- "@oh-my-pi/pi-utils": "15.0.1",
49
+ "@oh-my-pi/pi-natives": "15.0.2",
50
+ "@oh-my-pi/pi-utils": "15.0.2",
51
51
  "@sinclair/typebox": "^0.34.49",
52
52
  "@smithy/node-http-handler": "^4.6.1",
53
53
  "ajv": "^8.20.0",
@@ -25,6 +25,7 @@ import type {
25
25
  AssistantMessage,
26
26
  CacheRetention,
27
27
  Context,
28
+ FetchImpl,
28
29
  ImageContent,
29
30
  Message,
30
31
  Model,
@@ -541,6 +542,7 @@ export type AnthropicClientOptionsArgs = {
541
542
  isOAuth?: boolean;
542
543
  hasTools?: boolean;
543
544
  onSseEvent?: AnthropicOptions["onSseEvent"];
545
+ fetch?: FetchImpl;
544
546
  };
545
547
 
546
548
  export type AnthropicClientOptionsResult = {
@@ -965,6 +967,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
965
967
  isOAuth: options?.isOAuth,
966
968
  hasTools: !!context.tools?.length,
967
969
  onSseEvent: options?.onSseEvent,
970
+ fetch: options?.fetch,
968
971
  });
969
972
  client = created.client;
970
973
  isOAuthToken = created.isOAuthToken;
@@ -1405,7 +1408,12 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
1405
1408
  const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
1406
1409
  const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
1407
1410
  const tlsFetchOptions = buildClaudeCodeTlsFetchOptions(model, baseUrl);
1408
- const debugFetch = onSseEvent ? wrapFetchForSseDebug(fetch, event => onSseEvent(event, model)) : undefined;
1411
+ const baseFetch = args.fetch ?? fetch;
1412
+ const debugFetch = onSseEvent
1413
+ ? wrapFetchForSseDebug(baseFetch, event => onSseEvent(event, model))
1414
+ : args.fetch
1415
+ ? baseFetch
1416
+ : undefined;
1409
1417
  if (model.provider === "github-copilot") {
1410
1418
  const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
1411
1419
  const betaFeatures = [...extraBetas];
@@ -241,6 +241,7 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
241
241
 
242
242
  const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
243
243
 
244
+ const baseFetch = options?.fetch ?? fetch;
244
245
  return new AzureOpenAI({
245
246
  apiKey,
246
247
  apiVersion,
@@ -248,7 +249,9 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
248
249
  maxRetries: 5,
249
250
  defaultHeaders: headers,
250
251
  baseURL: baseUrl,
251
- fetch: options?.onSseEvent ? wrapFetchForSseDebug(fetch, event => options.onSseEvent?.(event, model)) : fetch,
252
+ fetch: options?.onSseEvent
253
+ ? wrapFetchForSseDebug(baseFetch, event => options.onSseEvent?.(event, model))
254
+ : baseFetch,
252
255
  });
253
256
  }
254
257
 
@@ -1,5 +1,5 @@
1
1
  import { ANTHROPIC_THINKING, mapAnthropicToolChoice } from "../stream";
2
- import type { Api, Context, Model, SimpleStreamOptions } from "../types";
2
+ import type { Api, Context, FetchImpl, Model, SimpleStreamOptions } from "../types";
3
3
  import { AssistantMessageEventStream } from "../utils/event-stream";
4
4
  import type { OpenAICompletionsOptions } from "./openai-completions";
5
5
  import type { OpenAIResponsesOptions } from "./openai-responses";
@@ -172,13 +172,16 @@ interface DirectAccessToken {
172
172
 
173
173
  const directAccessCache = new Map<string, DirectAccessToken>();
174
174
 
175
- async function getDirectAccessToken(gitlabAccessToken: string): Promise<DirectAccessToken> {
175
+ async function getDirectAccessToken(
176
+ gitlabAccessToken: string,
177
+ fetchImpl: FetchImpl = fetch,
178
+ ): Promise<DirectAccessToken> {
176
179
  const cached = directAccessCache.get(gitlabAccessToken);
177
180
  if (cached && cached.expiresAt > Date.now()) {
178
181
  return cached;
179
182
  }
180
183
 
181
- const response = await fetch(`${GITLAB_COM_URL}/api/v4/ai/third_party_agents/direct_access`, {
184
+ const response = await fetchImpl(`${GITLAB_COM_URL}/api/v4/ai/third_party_agents/direct_access`, {
182
185
  method: "POST",
183
186
  headers: {
184
187
  Authorization: `Bearer ${gitlabAccessToken}`,
@@ -240,7 +243,7 @@ export function streamGitLabDuo(
240
243
  throw new Error(`Unsupported GitLab Duo model: ${model.id}`);
241
244
  }
242
245
 
243
- const directAccess = await getDirectAccessToken(options.apiKey);
246
+ const directAccess = await getDirectAccessToken(options.apiKey, options.fetch);
244
247
  const headers = {
245
248
  ...directAccess.headers,
246
249
  ...options.headers,
@@ -278,6 +281,7 @@ export function streamGitLabDuo(
278
281
  onPayload: options.onPayload,
279
282
  onResponse: options.onResponse,
280
283
  onSseEvent: options.onSseEvent,
284
+ fetch: options.fetch,
281
285
  thinkingEnabled: Boolean(reasoningEffort) && model.reasoning,
282
286
  thinkingBudgetTokens: reasoningEffort
283
287
  ? (options.thinkingBudgets?.[reasoningEffort] ?? ANTHROPIC_THINKING[reasoningEffort])
@@ -314,6 +318,7 @@ export function streamGitLabDuo(
314
318
  onPayload: options.onPayload,
315
319
  onResponse: options.onResponse,
316
320
  onSseEvent: options.onSseEvent,
321
+ fetch: options.fetch,
317
322
  reasoning: reasoningEffort,
318
323
  toolChoice: options.toolChoice,
319
324
  } satisfies OpenAIResponsesOptions,
@@ -345,6 +350,7 @@ export function streamGitLabDuo(
345
350
  onPayload: options.onPayload,
346
351
  onResponse: options.onResponse,
347
352
  onSseEvent: options.onSseEvent,
353
+ fetch: options.fetch,
348
354
  reasoning: reasoningEffort,
349
355
  toolChoice: options.toolChoice,
350
356
  } satisfies OpenAICompletionsOptions,
@@ -362,6 +362,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
362
362
  maxAttempts: MAX_RETRIES + 1,
363
363
  defaultDelayMs: attempt => BASE_DELAY_MS * 2 ** attempt,
364
364
  maxDelayMs: options?.maxRetryDelayMs ?? RATE_LIMIT_BUDGET_MS,
365
+ fetch: options?.fetch,
365
366
  },
366
367
  );
367
368
  if (!response.ok) {
@@ -545,7 +546,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
545
546
  throw new Error("Missing request URL");
546
547
  }
547
548
 
548
- currentResponse = await fetch(requestUrl, {
549
+ currentResponse = await (options?.fetch ?? fetch)(requestUrl, {
549
550
  method: "POST",
550
551
  headers: requestHeaders,
551
552
  body: requestBodyJson,
@@ -1,6 +1,6 @@
1
1
  import { GoogleGenAI } from "@google/genai";
2
2
  import { $env } from "@oh-my-pi/pi-utils";
3
- import type { Context, Model, StreamFunction } from "../types";
3
+ import type { Context, FetchImpl, Model, StreamFunction } from "../types";
4
4
  import type { AssistantMessageEventStream } from "../utils/event-stream";
5
5
  import { buildGoogleGenerateContentParams, type GoogleSharedStreamOptions, streamGoogleGenAI } from "./google-shared";
6
6
 
@@ -25,7 +25,9 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
25
25
  const apiKey = resolveApiKey(options);
26
26
  const project = apiKey ? undefined : resolveProject(options);
27
27
  const location = apiKey ? undefined : resolveLocation(options);
28
- const client = apiKey ? createClientWithApiKey(model, apiKey) : createClient(model, project!, location!);
28
+ const client = apiKey
29
+ ? createClientWithApiKey(model, apiKey, options?.fetch)
30
+ : createClient(model, project!, location!, options?.fetch);
29
31
  const params = buildGoogleGenerateContentParams(model, context, options ?? {});
30
32
  const url = apiKey
31
33
  ? `https://aiplatform.googleapis.com/${API_VERSION}/publishers/google/models/${model.id}:streamGenerateContent`
@@ -34,29 +36,45 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
34
36
  },
35
37
  });
36
38
 
37
- function buildHttpOptions(model: Model<"google-vertex">): { headers?: Record<string, string> } | undefined {
38
- if (!model.headers) {
39
- return undefined;
39
+ function buildHttpOptions(
40
+ model: Model<"google-vertex">,
41
+ fetchOverride: FetchImpl | undefined,
42
+ ): { headers?: Record<string, string>; fetch?: FetchImpl } | undefined {
43
+ const options: { headers?: Record<string, string>; fetch?: FetchImpl } = {};
44
+ if (model.headers) {
45
+ options.headers = { ...model.headers };
46
+ }
47
+ if (fetchOverride) {
48
+ options.fetch = fetchOverride;
40
49
  }
41
- return { headers: { ...model.headers } };
50
+ return Object.keys(options).length > 0 ? options : undefined;
42
51
  }
43
52
 
44
- function createClient(model: Model<"google-vertex">, project: string, location: string): GoogleGenAI {
53
+ function createClient(
54
+ model: Model<"google-vertex">,
55
+ project: string,
56
+ location: string,
57
+ fetchOverride: FetchImpl | undefined,
58
+ ): GoogleGenAI {
45
59
  return new GoogleGenAI({
46
60
  vertexai: true,
47
61
  project,
48
62
  location,
49
63
  apiVersion: API_VERSION,
50
- httpOptions: buildHttpOptions(model),
64
+ httpOptions: buildHttpOptions(model, fetchOverride),
51
65
  });
52
66
  }
53
67
 
54
- function createClientWithApiKey(model: Model<"google-vertex">, apiKey: string): GoogleGenAI {
68
+ function createClientWithApiKey(
69
+ model: Model<"google-vertex">,
70
+ apiKey: string,
71
+ fetchOverride: FetchImpl | undefined,
72
+ ): GoogleGenAI {
55
73
  return new GoogleGenAI({
56
74
  vertexai: true,
57
75
  apiKey,
58
76
  apiVersion: API_VERSION,
59
- httpOptions: buildHttpOptions(model),
77
+ httpOptions: buildHttpOptions(model, fetchOverride),
60
78
  });
61
79
  }
62
80
 
@@ -1,6 +1,6 @@
1
1
  import { GoogleGenAI } from "@google/genai";
2
2
  import { getEnvApiKey } from "../stream";
3
- import type { Context, Model, StreamFunction } from "../types";
3
+ import type { Context, FetchImpl, Model, StreamFunction } from "../types";
4
4
  import type { AssistantMessageEventStream } from "../utils/event-stream";
5
5
  import { buildGoogleGenerateContentParams, type GoogleSharedStreamOptions, streamGoogleGenAI } from "./google-shared";
6
6
 
@@ -17,15 +17,20 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
17
17
  api: "google-generative-ai",
18
18
  prepare: () => {
19
19
  const apiKey = options?.apiKey || getEnvApiKey(model.provider);
20
- const client = createClient(model, apiKey);
20
+ const client = createClient(model, apiKey, options?.fetch);
21
21
  const params = buildGoogleGenerateContentParams(model, context, options ?? {});
22
22
  const url = model.baseUrl ? `${model.baseUrl}/models/${model.id}:streamGenerateContent` : undefined;
23
23
  return { client, params, url };
24
24
  },
25
25
  });
26
26
 
27
- function createClient(model: Model<"google-generative-ai">, apiKey?: string): GoogleGenAI {
28
- const httpOptions: { baseUrl?: string; apiVersion?: string; headers?: Record<string, string> } = {};
27
+ function createClient(model: Model<"google-generative-ai">, apiKey?: string, fetchOverride?: FetchImpl): GoogleGenAI {
28
+ const httpOptions: {
29
+ baseUrl?: string;
30
+ apiVersion?: string;
31
+ headers?: Record<string, string>;
32
+ fetch?: FetchImpl;
33
+ } = {};
29
34
  if (model.baseUrl) {
30
35
  httpOptions.baseUrl = model.baseUrl;
31
36
  httpOptions.apiVersion = ""; // baseUrl already includes version path, don't append
@@ -33,6 +38,9 @@ function createClient(model: Model<"google-generative-ai">, apiKey?: string): Go
33
38
  if (model.headers) {
34
39
  httpOptions.headers = model.headers;
35
40
  }
41
+ if (fetchOverride) {
42
+ httpOptions.fetch = fetchOverride;
43
+ }
36
44
 
37
45
  return new GoogleGenAI({
38
46
  apiKey,
@@ -378,6 +378,7 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
378
378
  body: JSON.stringify(body),
379
379
  signal: options.signal,
380
380
  defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
381
+ fetch: options.fetch,
381
382
  });
382
383
  if (!response.ok) {
383
384
  throw new Error(`HTTP ${response.status} from ${baseUrl}/api/chat`);
@@ -88,6 +88,7 @@ export function streamOpenAIAnthropicShim(
88
88
  onPayload: options?.onPayload,
89
89
  onResponse: options?.onResponse,
90
90
  onSseEvent: options?.onSseEvent,
91
+ fetch: options?.fetch,
91
92
  thinkingEnabled,
92
93
  thinkingBudgetTokens: thinkingBudget,
93
94
  });
@@ -116,6 +117,7 @@ export function streamOpenAIAnthropicShim(
116
117
  onPayload: options?.onPayload,
117
118
  onResponse: options?.onResponse,
118
119
  onSseEvent: options?.onSseEvent,
120
+ fetch: options?.fetch,
119
121
  reasoning: reasoningEffort,
120
122
  });
121
123
 
@@ -17,6 +17,7 @@ import {
17
17
  type Api,
18
18
  type AssistantMessage,
19
19
  type Context,
20
+ type FetchImpl,
20
21
  type Model,
21
22
  type ProviderSessionState,
22
23
  type ServiceTier,
@@ -735,6 +736,7 @@ async function openCodexSseTransport(
735
736
  state,
736
737
  requestSetup.requestSignal,
737
738
  event => options?.onSseEvent?.(event, model),
739
+ options?.fetch,
738
740
  ),
739
741
  );
740
742
  return { eventStream, requestBodyForState: structuredCloneJSON(body), transport: "sse" };
@@ -2173,6 +2175,7 @@ async function openCodexSseEventStream(
2173
2175
  state: CodexWebSocketSessionState | undefined,
2174
2176
  signal?: AbortSignal,
2175
2177
  onSseEvent?: OpenAICodexResponsesOptions["onSseEvent"],
2178
+ fetchOverride?: FetchImpl,
2176
2179
  ): Promise<AsyncGenerator<Record<string, unknown>>> {
2177
2180
  const headers = createCodexHeaders(requestHeaders, accountId, apiKey, sessionId, "sse", state);
2178
2181
  logCodexDebug("codex request", {
@@ -2190,6 +2193,7 @@ async function openCodexSseEventStream(
2190
2193
  maxAttempts: CODEX_MAX_RETRIES + 1,
2191
2194
  defaultDelayMs: attempt => CODEX_RETRY_DELAY_MS * (attempt + 1),
2192
2195
  maxDelayMs: CODEX_RATE_LIMIT_BUDGET_MS,
2196
+ fetch: fetchOverride,
2193
2197
  });
2194
2198
  logCodexDebug("codex response", {
2195
2199
  url: response.url,
@@ -53,6 +53,12 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
53
53
  const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
54
54
  const isKilo = provider === "kilo" || baseUrl.includes("api.kilo.ai");
55
55
  const isKimiModel = model.id.includes("moonshotai/kimi") || /^kimi[-.]/i.test(model.id);
56
+ const isMoonshotKimi =
57
+ isKimiModel &&
58
+ (provider === "moonshot" ||
59
+ provider === "kimi-code" ||
60
+ baseUrl.includes("api.moonshot.ai") ||
61
+ baseUrl.includes("api.kimi.com"));
56
62
  const isAnthropicModel =
57
63
  provider === "anthropic" ||
58
64
  baseUrl.includes("api.anthropic.com") ||
@@ -90,6 +96,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
90
96
  provider === "opencode-zen" ||
91
97
  provider === "opencode-go" ||
92
98
  baseUrl.includes("opencode.ai");
99
+ const isOpenCodeProvider = provider === "opencode-go" || provider === "opencode-zen";
93
100
 
94
101
  const useMaxTokens =
95
102
  provider === "mistral" ||
@@ -173,22 +180,25 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
173
180
  requiresAssistantAfterToolResult: false,
174
181
  requiresThinkingAsText: isMistral,
175
182
  requiresMistralToolIds: isMistral,
176
- thinkingFormat: isZai
177
- ? "zai"
178
- : provider === "openrouter" || baseUrl.includes("openrouter.ai")
179
- ? "openrouter"
180
- : isAlibaba || isQwen
181
- ? "qwen"
182
- : "openai",
183
+ thinkingFormat:
184
+ isZai || isMoonshotKimi
185
+ ? "zai"
186
+ : provider === "openrouter" || baseUrl.includes("openrouter.ai")
187
+ ? "openrouter"
188
+ : isAlibaba || isQwen
189
+ ? "qwen"
190
+ : "openai",
183
191
  reasoningContentField: "reasoning_content",
184
192
  // Backends that 400 follow-up requests when prior assistant tool-call turns lack `reasoning_content`:
185
- // - Kimi: documented invariant on its native API and via OpenCode-Go.
193
+ // - Kimi: documented invariant on its native API.
186
194
  // - Any reasoning-capable model reached through OpenRouter: DeepSeek V4 Pro and similar enforce
187
195
  // this server-side whenever the request is in thinking mode. We can't translate Anthropic's
188
196
  // redacted/encrypted reasoning into DeepSeek's plaintext form, so cross-provider continuations
189
197
  // rely on a placeholder — see `convertMessages` for the placeholder injection.
198
+ // - OpenCode-Go and OpenCode-Zen handle reasoning content internally and reject
199
+ // `reasoning_content` in client-sent messages — exclude them even for Kimi models.
190
200
  requiresReasoningContentForToolCalls:
191
- isKimiModel ||
201
+ (isKimiModel && !isOpenCodeProvider) ||
192
202
  (isDeepseekFamily && Boolean(model.reasoning)) ||
193
203
  ((provider === "openrouter" || baseUrl.includes("openrouter.ai")) && Boolean(model.reasoning)),
194
204
  // DeepSeek V4 rejects synthetic reasoning_content placeholders (".") on tool-call turns.
@@ -16,6 +16,7 @@ import { getEnvApiKey } from "../stream";
16
16
  import {
17
17
  type AssistantMessage,
18
18
  type Context,
19
+ type FetchImpl,
19
20
  getPriorityPremiumRequests,
20
21
  type Message,
21
22
  type MessageAttribution,
@@ -362,6 +363,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
362
363
  options?.headers,
363
364
  options?.initiatorOverride,
364
365
  options?.onSseEvent,
366
+ options?.fetch,
365
367
  );
366
368
  const priorityPremiumRequests = getPriorityPremiumRequests(options?.serviceTier, model.provider);
367
369
  const premiumRequestsTotal =
@@ -778,6 +780,7 @@ async function createClient(
778
780
  extraHeaders?: Record<string, string>,
779
781
  initiatorOverride?: MessageAttribution,
780
782
  onSseEvent?: OpenAICompletionsOptions["onSseEvent"],
783
+ fetchOverride?: FetchImpl,
781
784
  ): Promise<{
782
785
  client: OpenAI;
783
786
  copilotPremiumRequests: number | undefined;
@@ -847,9 +850,10 @@ async function createClient(
847
850
  azureDefaultQuery = { "api-version": apiVersion };
848
851
  }
849
852
  let capturedErrorResponse: CapturedHttpErrorResponse | undefined;
853
+ const baseFetch = fetchOverride ?? fetch;
850
854
  const wrappedFetch = Object.assign(
851
855
  async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
852
- const response = await fetch(input, init);
856
+ const response = await baseFetch(input, init);
853
857
  if (response.ok) {
854
858
  capturedErrorResponse = undefined;
855
859
  return response;
@@ -872,7 +876,7 @@ async function createClient(
872
876
  };
873
877
  return response;
874
878
  },
875
- { preconnect: fetch.preconnect },
879
+ baseFetch.preconnect ? { preconnect: baseFetch.preconnect } : {},
876
880
  );
877
881
  const debugFetch = onSseEvent ? wrapFetchForSseDebug(wrappedFetch, event => onSseEvent(event, model)) : wrappedFetch;
878
882
  return {
@@ -1019,12 +1023,14 @@ function buildParams(
1019
1023
  }
1020
1024
 
1021
1025
  if (compat.disableReasoningOnForcedToolChoice && isForcedToolChoice(params.tool_choice)) {
1022
- // Mirrors anthropic.ts:disableThinkingIfToolChoiceForced backends like
1023
- // Kimi 400 with `tool_choice 'specified' is incompatible with thinking
1024
- // enabled`. Drop reasoning for this turn instead of dropping tool_choice;
1025
- // the agent still gets the forced tool call, just without thinking.
1026
+ // Backends like Kimi 400 with `tool_choice 'specified' is incompatible
1027
+ // with thinking enabled`. Suppress thinking for this single forced-tool
1028
+ // turn while keeping the tool-selection contract intact.
1026
1029
  delete params.reasoning_effort;
1027
1030
  delete params.reasoning;
1031
+ if (compat.thinkingFormat === "zai") {
1032
+ params.thinking = { type: "disabled" };
1033
+ }
1028
1034
  }
1029
1035
 
1030
1036
  // OpenRouter provider routing preferences
@@ -1362,7 +1368,9 @@ export function convertMessages(
1362
1368
  const canUseSyntheticReasoningContent =
1363
1369
  compat.requiresReasoningContentForToolCalls &&
1364
1370
  compat.allowsSyntheticReasoningContentForToolCalls &&
1365
- (compat.thinkingFormat === "openai" || compat.thinkingFormat === "openrouter");
1371
+ (compat.thinkingFormat === "openai" ||
1372
+ compat.thinkingFormat === "openrouter" ||
1373
+ compat.thinkingFormat === "zai");
1366
1374
  // DeepSeek reasoning models require reasoning_content on ALL assistant turns,
1367
1375
  // not just tool-call turns. Other providers (Kimi, OpenRouter) only require it
1368
1376
  // on tool-call turns.
@@ -10,6 +10,7 @@ import {
10
10
  type AssistantMessage,
11
11
  type CacheRetention,
12
12
  type Context,
13
+ type FetchImpl,
13
14
  getPriorityPremiumRequests,
14
15
  type MessageAttribution,
15
16
  type Model,
@@ -210,6 +211,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
210
211
  options?.initiatorOverride,
211
212
  cacheSessionId,
212
213
  options?.onSseEvent,
214
+ options?.fetch,
213
215
  );
214
216
  const priorityPremiumRequests = getPriorityPremiumRequests(options?.serviceTier, model.provider);
215
217
  const premiumRequestsTotal =
@@ -312,6 +314,7 @@ function createClient(
312
314
  initiatorOverride?: MessageAttribution,
313
315
  sessionId?: string,
314
316
  onSseEvent?: OpenAIResponsesOptions["onSseEvent"],
317
+ fetchOverride?: FetchImpl,
315
318
  ): {
316
319
  client: OpenAI;
317
320
  copilotPremiumRequests: number | undefined;
@@ -349,6 +352,7 @@ function createClient(
349
352
  headers.session_id ??= sessionId;
350
353
  headers["x-client-request-id"] ??= sessionId;
351
354
  }
355
+ const baseFetch = fetchOverride ?? fetch;
352
356
  return {
353
357
  client: new OpenAI({
354
358
  apiKey,
@@ -356,7 +360,7 @@ function createClient(
356
360
  dangerouslyAllowBrowser: true,
357
361
  maxRetries: 5,
358
362
  defaultHeaders: headers,
359
- fetch: onSseEvent ? wrapFetchForSseDebug(fetch, event => onSseEvent(event, model)) : fetch,
363
+ fetch: onSseEvent ? wrapFetchForSseDebug(baseFetch, event => onSseEvent(event, model)) : baseFetch,
360
364
  }),
361
365
  copilotPremiumRequests,
362
366
  baseUrl,
@@ -19,7 +19,9 @@ import type {
19
19
  Model,
20
20
  OptionsForApi,
21
21
  } from "../types";
22
+ import { type AbortSourceTracker, createAbortSourceTracker } from "../utils/abort";
22
23
  import { AssistantMessageEventStream as EventStreamImpl } from "../utils/event-stream";
24
+ import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTimeout } from "../utils/idle-iterator";
23
25
  import type { BedrockOptions } from "./amazon-bedrock";
24
26
  import type { AnthropicOptions } from "./anthropic";
25
27
  import type { AzureOpenAIResponsesOptions } from "./azure-openai-responses";
@@ -155,6 +157,9 @@ export function setBedrockProviderModule(module: BedrockProviderModule): void {
155
157
  // Stream forwarding / error helpers
156
158
  // ---------------------------------------------------------------------------
157
159
 
160
+ const LAZY_STREAM_IDLE_TIMEOUT_ERROR = "Provider stream stalled while waiting for the next event";
161
+ const LAZY_STREAM_FIRST_EVENT_TIMEOUT_ERROR = "Provider stream timed out while waiting for the first event";
162
+
158
163
  function hasFinalResult(
159
164
  source: AsyncIterable<AssistantMessageEvent>,
160
165
  ): source is AsyncIterable<AssistantMessageEvent> & { result(): Promise<AssistantMessage> } {
@@ -165,10 +170,23 @@ function forwardStream<TApi extends Api>(
165
170
  target: EventStreamImpl,
166
171
  source: AsyncIterable<AssistantMessageEvent>,
167
172
  model: Model<TApi>,
173
+ options: OptionsForApi<TApi>,
174
+ abortTracker: AbortSourceTracker,
168
175
  ): void {
169
176
  (async () => {
170
177
  try {
171
- for await (const event of source) {
178
+ const idleTimeoutMs = options.streamIdleTimeoutMs ?? getStreamIdleTimeoutMs();
179
+ const watchedSource = iterateWithIdleTimeout(source, {
180
+ idleTimeoutMs,
181
+ firstItemTimeoutMs: options.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs(idleTimeoutMs),
182
+ errorMessage: LAZY_STREAM_IDLE_TIMEOUT_ERROR,
183
+ firstItemErrorMessage: LAZY_STREAM_FIRST_EVENT_TIMEOUT_ERROR,
184
+ onIdle: () => abortTracker.abortLocally(new Error(LAZY_STREAM_IDLE_TIMEOUT_ERROR)),
185
+ onFirstItemTimeout: () => abortTracker.abortLocally(new Error(LAZY_STREAM_FIRST_EVENT_TIMEOUT_ERROR)),
186
+ abortSignal: options.signal,
187
+ });
188
+
189
+ for await (const event of watchedSource) {
172
190
  target.push(event);
173
191
  }
174
192
  if (hasFinalResult(source)) {
@@ -177,14 +195,19 @@ function forwardStream<TApi extends Api>(
177
195
  target.end();
178
196
  }
179
197
  } catch (error) {
180
- const message = createLazyLoadErrorMessage(model, error);
181
- target.push({ type: "error", reason: "error", error: message });
198
+ const stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
199
+ const message = createLazyLoadErrorMessage(model, error, stopReason);
200
+ target.push({ type: "error", reason: stopReason, error: message });
182
201
  target.end(message);
183
202
  }
184
203
  })();
185
204
  }
186
205
 
187
- function createLazyLoadErrorMessage<TApi extends Api>(model: Model<TApi>, error: unknown): AssistantMessage {
206
+ function createLazyLoadErrorMessage<TApi extends Api>(
207
+ model: Model<TApi>,
208
+ error: unknown,
209
+ stopReason: Extract<AssistantMessage["stopReason"], "aborted" | "error"> = "error",
210
+ ): AssistantMessage {
188
211
  return {
189
212
  role: "assistant",
190
213
  content: [],
@@ -199,8 +222,9 @@ function createLazyLoadErrorMessage<TApi extends Api>(model: Model<TApi>, error:
199
222
  totalTokens: 0,
200
223
  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
201
224
  },
202
- stopReason: "error",
203
- errorMessage: error instanceof Error ? error.message : String(error),
225
+ stopReason,
226
+ errorMessage:
227
+ stopReason === "aborted" ? "Request was aborted" : error instanceof Error ? error.message : String(error),
204
228
  timestamp: Date.now(),
205
229
  };
206
230
  }
@@ -214,11 +238,14 @@ function createLazyStream<TApi extends Api>(
214
238
  ): (model: Model<TApi>, context: Context, options: OptionsForApi<TApi>) => EventStreamImpl {
215
239
  return (model, context, options) => {
216
240
  const outer = new EventStreamImpl();
241
+ const streamOptions = (options ?? {}) as OptionsForApi<TApi>;
217
242
 
218
243
  loadModule()
219
244
  .then(module => {
220
- const inner = module.stream(model, context, options);
221
- forwardStream(outer, inner, model);
245
+ const abortTracker = createAbortSourceTracker(streamOptions.signal);
246
+ const providerOptions = { ...streamOptions, signal: abortTracker.requestSignal } as OptionsForApi<TApi>;
247
+ const inner = module.stream(model, context, providerOptions);
248
+ forwardStream(outer, inner, model, streamOptions, abortTracker);
222
249
  })
223
250
  .catch(error => {
224
251
  const message = createLazyLoadErrorMessage(model, error);
package/src/types.ts CHANGED
@@ -204,6 +204,15 @@ export interface RawSseEvent {
204
204
  raw: string[];
205
205
  }
206
206
 
207
+ /**
208
+ * `fetch`-compatible function. Accepts any callable matching the standard
209
+ * fetch signature; `preconnect` is optional because non-Bun runtimes (browsers,
210
+ * test mocks) won't expose it.
211
+ */
212
+ export type FetchImpl = ((input: string | URL | Request, init?: RequestInit) => Promise<Response>) & {
213
+ preconnect?: typeof globalThis.fetch.preconnect;
214
+ };
215
+
207
216
  export interface StreamOptions {
208
217
  temperature?: number;
209
218
  topP?: number;
@@ -275,6 +284,14 @@ export interface StreamOptions {
275
284
  * Set to 0 to disable the inter-event idle watchdog for this request.
276
285
  */
277
286
  streamIdleTimeoutMs?: number;
287
+ /**
288
+ * Optional `fetch` implementation override. Providers route every HTTP
289
+ * request — direct calls, SDK clients, and retry helpers — through this
290
+ * implementation when set. Defaults to `globalThis.fetch`. Providers that
291
+ * do not use `fetch` (Bedrock's AWS SDK transport, Cursor's HTTP/2
292
+ * channel) silently ignore the override.
293
+ */
294
+ fetch?: FetchImpl;
278
295
  /** Cursor exec/MCP tool handlers (cursor-agent only). */
279
296
  execHandlers?: CursorExecHandlers;
280
297
  }
@@ -613,7 +630,7 @@ export interface OpenAICompat {
613
630
  requiresThinkingAsText?: boolean;
614
631
  /** Whether tool call IDs must be normalized to Mistral format (exactly 9 alphanumeric chars). Default: auto-detected from URL. */
615
632
  requiresMistralToolIds?: boolean;
616
- /** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "openrouter" uses reasoning: { effort }, "zai" uses thinking: { type: "enabled" }, "qwen" uses top-level enable_thinking, and "qwen-chat-template" uses chat_template_kwargs.enable_thinking. Default: "openai". */
633
+ /** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "openrouter" uses reasoning: { effort }, "zai" uses thinking: { type: "enabled" | "disabled" } (also used by Moonshot Kimi), "qwen" uses top-level enable_thinking, and "qwen-chat-template" uses chat_template_kwargs.enable_thinking. Default: "openai". */
617
634
  thinkingFormat?: "openai" | "openrouter" | "zai" | "qwen" | "qwen-chat-template";
618
635
  /** Which reasoning content field to emit on assistant messages. Default: auto-detected. */
619
636
  reasoningContentField?: "reasoning_content" | "reasoning" | "reasoning_text";
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * Patch `globalThis.fetch` to advertise HTTP/2 in TLS ALPN, with transparent
3
- * HTTP/1.1 fallback when the server doesn't select `h2`.
3
+ * HTTP/1.1 fallback when the server doesn't negotiate `h2`.
4
4
  *
5
5
  * Bun's HTTP/2 client is gated on `BUN_FEATURE_FLAG_EXPERIMENTAL_HTTP2_CLIENT`,
6
6
  * read by the native runtime before any JS executes; assigning to
@@ -8,6 +8,12 @@
8
8
  * activates h2 over TLS ALPN and rejects with `error.code === "HTTP2Unsupported"`
9
9
  * if the server picks anything else, so we catch and retry without the hint.
10
10
  *
11
+ * Some HTTPS endpoints (e.g. corporate API gateways behind reverse proxies)
12
+ * advertise h2 via ALPN but then refuse or reset the connection at the HTTP/2
13
+ * framing layer. Bun surfaces these as `ConnectionRefused`, `ConnectionReset`,
14
+ * or `ConnectionClosed` rather than `HTTP2Unsupported`, so we treat those
15
+ * codes as h2-fallback triggers as well.
16
+ *
11
17
  * Bun negotiates h2 via ALPN over TLS only (no h2c), so plain `http://` URLs
12
18
  * skip the attempt entirely — avoids the throw/retry round-trip for localhost.
13
19
  *
@@ -24,12 +30,19 @@ export function installH2Fetch(): void {
24
30
  const original = globalThis.fetch as typeof fetch & PatchedFetch;
25
31
  if (original[installed]) return;
26
32
 
33
+ /** Error codes that indicate h2 negotiation/transport failure (not an application error). */
34
+ const h2FallbackCodes: ReadonlySet<string> = new Set([
35
+ "HTTP2Unsupported", // Server selected h1 in ALPN
36
+ "ConnectionRefused", // Server refused the h2 connection
37
+ "ConnectionReset", // Server reset during h2 handshake
38
+ "ConnectionClosed", // Server closed before h2 response
39
+ ]);
27
40
  const wrapper = async function h2fetch(input: string | URL | Request, init?: RequestInit): Promise<Response> {
28
41
  if (!isHttps(input)) return original(input, init);
29
42
  try {
30
43
  return await original(input, { ...init, protocol: "http2" });
31
44
  } catch (err) {
32
- if ((err as { code?: unknown }).code !== "HTTP2Unsupported") throw err;
45
+ if (!h2FallbackCodes.has((err as { code?: string }).code ?? "")) throw err;
33
46
  return original(input, init);
34
47
  }
35
48
  } as typeof fetch & PatchedFetch;
@@ -1,6 +1,6 @@
1
1
  import { $env } from "@oh-my-pi/pi-utils";
2
2
 
3
- const DEFAULT_STREAM_IDLE_TIMEOUT_MS = 120_000;
3
+ const DEFAULT_STREAM_IDLE_TIMEOUT_MS = 30_000;
4
4
  const DEFAULT_STREAM_FIRST_EVENT_TIMEOUT_MS = 100_000;
5
5
 
6
6
  function normalizeIdleTimeoutMs(value: string | undefined, fallback: number): number | undefined {