@oh-my-pi/pi-ai 15.0.1 → 15.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/package.json +4 -4
- package/src/providers/anthropic.ts +9 -1
- package/src/providers/azure-openai-responses.ts +4 -1
- package/src/providers/gitlab-duo.ts +10 -4
- package/src/providers/google-gemini-cli.ts +2 -1
- package/src/providers/google-vertex.ts +28 -10
- package/src/providers/google.ts +12 -4
- package/src/providers/ollama.ts +1 -0
- package/src/providers/openai-anthropic-shim.ts +2 -0
- package/src/providers/openai-codex-responses.ts +4 -0
- package/src/providers/openai-completions-compat.ts +19 -9
- package/src/providers/openai-completions.ts +15 -7
- package/src/providers/openai-responses.ts +5 -1
- package/src/providers/register-builtins.ts +35 -8
- package/src/types.ts +18 -1
- package/src/utils/h2-fetch.ts +15 -2
- package/src/utils/idle-iterator.ts +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.0.2] - 2026-05-15
|
|
6
|
+
### Fixed
|
|
7
|
+
|
|
8
|
+
- Fixed `StreamOptions.fetch` typing to accept fetch-compatible override functions that do not expose `preconnect`, allowing custom fetch implementations to be used without type errors across runtimes
|
|
9
|
+
- Fixed Moonshot Kimi K2.6 forced tool calls to send `thinking: { type: "disabled" }`, avoiding `tool_choice 'specified' is incompatible with thinking enabled` 400s while preserving the requested named tool ([#1077](https://github.com/can1357/oh-my-pi/issues/1077)).
|
|
10
|
+
|
|
5
11
|
## [15.0.1] - 2026-05-14
|
|
6
12
|
### Breaking Changes
|
|
7
13
|
|
|
@@ -22,6 +28,11 @@
|
|
|
22
28
|
|
|
23
29
|
- Fixed OAuth credentials being silently disabled when two omp processes (or any two `AuthStorage` instances sharing a `agent.db`) race on token refresh. Anthropic rotates refresh tokens on every use, so the loser's `invalid_grant` response previously soft-deleted the row that the winner just rotated, forcing the user to `/login` again. `#tryOAuthCredential` now re-reads the row from disk before declaring a definitive failure: if the persisted `refresh` differs from the snapshot it tried, the peer-rotated credential is reloaded and the request retries against the fresh token instead of disabling the live row.
|
|
24
30
|
- Closed a remaining race window in OAuth refresh-failure handling: between re-reading the credential row to check for peer rotation and the subsequent soft-delete, another process could still complete a refresh and rotate the row, leaving us to disable the freshly-rotated credential by `id`. The disable now runs as a single CAS update conditioned on the row's `data` still matching the snapshot we tried to refresh, and on `disabled_cause IS NULL`. If the CAS reports 0 rows changed (peer rotation, or row already disabled by a concurrent failure on the same snapshot), we reload from disk and retry instead of mutating the wrong row or emitting a spurious `credential_disabled` event.
|
|
31
|
+
### Changed
|
|
32
|
+
- Lowered the default steady-state stream idle timeout from 120s to 30s while preserving the existing environment overrides.
|
|
33
|
+
|
|
34
|
+
### Fixed
|
|
35
|
+
- Lazy built-in provider streams now enforce the shared idle watchdog and abort stalled provider requests, so session auto-retry can continue after transient network drops instead of remaining stuck. Caller aborts still terminate as aborted.
|
|
25
36
|
|
|
26
37
|
## [14.9.3] - 2026-05-10
|
|
27
38
|
|
package/package.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "15.0.
|
|
4
|
+
"version": "15.0.2",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
|
-
"homepage": "https://
|
|
6
|
+
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
8
8
|
"contributors": [
|
|
9
9
|
"Mario Zechner"
|
|
@@ -46,8 +46,8 @@
|
|
|
46
46
|
"@aws-sdk/credential-provider-node": "^3.972.39",
|
|
47
47
|
"@bufbuild/protobuf": "^2.12.0",
|
|
48
48
|
"@google/genai": "^1.52.0",
|
|
49
|
-
"@oh-my-pi/pi-natives": "15.0.
|
|
50
|
-
"@oh-my-pi/pi-utils": "15.0.
|
|
49
|
+
"@oh-my-pi/pi-natives": "15.0.2",
|
|
50
|
+
"@oh-my-pi/pi-utils": "15.0.2",
|
|
51
51
|
"@sinclair/typebox": "^0.34.49",
|
|
52
52
|
"@smithy/node-http-handler": "^4.6.1",
|
|
53
53
|
"ajv": "^8.20.0",
|
|
@@ -25,6 +25,7 @@ import type {
|
|
|
25
25
|
AssistantMessage,
|
|
26
26
|
CacheRetention,
|
|
27
27
|
Context,
|
|
28
|
+
FetchImpl,
|
|
28
29
|
ImageContent,
|
|
29
30
|
Message,
|
|
30
31
|
Model,
|
|
@@ -541,6 +542,7 @@ export type AnthropicClientOptionsArgs = {
|
|
|
541
542
|
isOAuth?: boolean;
|
|
542
543
|
hasTools?: boolean;
|
|
543
544
|
onSseEvent?: AnthropicOptions["onSseEvent"];
|
|
545
|
+
fetch?: FetchImpl;
|
|
544
546
|
};
|
|
545
547
|
|
|
546
548
|
export type AnthropicClientOptionsResult = {
|
|
@@ -965,6 +967,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
965
967
|
isOAuth: options?.isOAuth,
|
|
966
968
|
hasTools: !!context.tools?.length,
|
|
967
969
|
onSseEvent: options?.onSseEvent,
|
|
970
|
+
fetch: options?.fetch,
|
|
968
971
|
});
|
|
969
972
|
client = created.client;
|
|
970
973
|
isOAuthToken = created.isOAuthToken;
|
|
@@ -1405,7 +1408,12 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
1405
1408
|
const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
|
|
1406
1409
|
const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
|
|
1407
1410
|
const tlsFetchOptions = buildClaudeCodeTlsFetchOptions(model, baseUrl);
|
|
1408
|
-
const
|
|
1411
|
+
const baseFetch = args.fetch ?? fetch;
|
|
1412
|
+
const debugFetch = onSseEvent
|
|
1413
|
+
? wrapFetchForSseDebug(baseFetch, event => onSseEvent(event, model))
|
|
1414
|
+
: args.fetch
|
|
1415
|
+
? baseFetch
|
|
1416
|
+
: undefined;
|
|
1409
1417
|
if (model.provider === "github-copilot") {
|
|
1410
1418
|
const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
|
|
1411
1419
|
const betaFeatures = [...extraBetas];
|
|
@@ -241,6 +241,7 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
|
|
|
241
241
|
|
|
242
242
|
const { baseUrl, apiVersion } = resolveAzureConfig(model, options);
|
|
243
243
|
|
|
244
|
+
const baseFetch = options?.fetch ?? fetch;
|
|
244
245
|
return new AzureOpenAI({
|
|
245
246
|
apiKey,
|
|
246
247
|
apiVersion,
|
|
@@ -248,7 +249,9 @@ function createClient(model: Model<"azure-openai-responses">, apiKey: string, op
|
|
|
248
249
|
maxRetries: 5,
|
|
249
250
|
defaultHeaders: headers,
|
|
250
251
|
baseURL: baseUrl,
|
|
251
|
-
fetch: options?.onSseEvent
|
|
252
|
+
fetch: options?.onSseEvent
|
|
253
|
+
? wrapFetchForSseDebug(baseFetch, event => options.onSseEvent?.(event, model))
|
|
254
|
+
: baseFetch,
|
|
252
255
|
});
|
|
253
256
|
}
|
|
254
257
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { ANTHROPIC_THINKING, mapAnthropicToolChoice } from "../stream";
|
|
2
|
-
import type { Api, Context, Model, SimpleStreamOptions } from "../types";
|
|
2
|
+
import type { Api, Context, FetchImpl, Model, SimpleStreamOptions } from "../types";
|
|
3
3
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
4
4
|
import type { OpenAICompletionsOptions } from "./openai-completions";
|
|
5
5
|
import type { OpenAIResponsesOptions } from "./openai-responses";
|
|
@@ -172,13 +172,16 @@ interface DirectAccessToken {
|
|
|
172
172
|
|
|
173
173
|
const directAccessCache = new Map<string, DirectAccessToken>();
|
|
174
174
|
|
|
175
|
-
async function getDirectAccessToken(
|
|
175
|
+
async function getDirectAccessToken(
|
|
176
|
+
gitlabAccessToken: string,
|
|
177
|
+
fetchImpl: FetchImpl = fetch,
|
|
178
|
+
): Promise<DirectAccessToken> {
|
|
176
179
|
const cached = directAccessCache.get(gitlabAccessToken);
|
|
177
180
|
if (cached && cached.expiresAt > Date.now()) {
|
|
178
181
|
return cached;
|
|
179
182
|
}
|
|
180
183
|
|
|
181
|
-
const response = await
|
|
184
|
+
const response = await fetchImpl(`${GITLAB_COM_URL}/api/v4/ai/third_party_agents/direct_access`, {
|
|
182
185
|
method: "POST",
|
|
183
186
|
headers: {
|
|
184
187
|
Authorization: `Bearer ${gitlabAccessToken}`,
|
|
@@ -240,7 +243,7 @@ export function streamGitLabDuo(
|
|
|
240
243
|
throw new Error(`Unsupported GitLab Duo model: ${model.id}`);
|
|
241
244
|
}
|
|
242
245
|
|
|
243
|
-
const directAccess = await getDirectAccessToken(options.apiKey);
|
|
246
|
+
const directAccess = await getDirectAccessToken(options.apiKey, options.fetch);
|
|
244
247
|
const headers = {
|
|
245
248
|
...directAccess.headers,
|
|
246
249
|
...options.headers,
|
|
@@ -278,6 +281,7 @@ export function streamGitLabDuo(
|
|
|
278
281
|
onPayload: options.onPayload,
|
|
279
282
|
onResponse: options.onResponse,
|
|
280
283
|
onSseEvent: options.onSseEvent,
|
|
284
|
+
fetch: options.fetch,
|
|
281
285
|
thinkingEnabled: Boolean(reasoningEffort) && model.reasoning,
|
|
282
286
|
thinkingBudgetTokens: reasoningEffort
|
|
283
287
|
? (options.thinkingBudgets?.[reasoningEffort] ?? ANTHROPIC_THINKING[reasoningEffort])
|
|
@@ -314,6 +318,7 @@ export function streamGitLabDuo(
|
|
|
314
318
|
onPayload: options.onPayload,
|
|
315
319
|
onResponse: options.onResponse,
|
|
316
320
|
onSseEvent: options.onSseEvent,
|
|
321
|
+
fetch: options.fetch,
|
|
317
322
|
reasoning: reasoningEffort,
|
|
318
323
|
toolChoice: options.toolChoice,
|
|
319
324
|
} satisfies OpenAIResponsesOptions,
|
|
@@ -345,6 +350,7 @@ export function streamGitLabDuo(
|
|
|
345
350
|
onPayload: options.onPayload,
|
|
346
351
|
onResponse: options.onResponse,
|
|
347
352
|
onSseEvent: options.onSseEvent,
|
|
353
|
+
fetch: options.fetch,
|
|
348
354
|
reasoning: reasoningEffort,
|
|
349
355
|
toolChoice: options.toolChoice,
|
|
350
356
|
} satisfies OpenAICompletionsOptions,
|
|
@@ -362,6 +362,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
362
362
|
maxAttempts: MAX_RETRIES + 1,
|
|
363
363
|
defaultDelayMs: attempt => BASE_DELAY_MS * 2 ** attempt,
|
|
364
364
|
maxDelayMs: options?.maxRetryDelayMs ?? RATE_LIMIT_BUDGET_MS,
|
|
365
|
+
fetch: options?.fetch,
|
|
365
366
|
},
|
|
366
367
|
);
|
|
367
368
|
if (!response.ok) {
|
|
@@ -545,7 +546,7 @@ export const streamGoogleGeminiCli: StreamFunction<"google-gemini-cli"> = (
|
|
|
545
546
|
throw new Error("Missing request URL");
|
|
546
547
|
}
|
|
547
548
|
|
|
548
|
-
currentResponse = await fetch(requestUrl, {
|
|
549
|
+
currentResponse = await (options?.fetch ?? fetch)(requestUrl, {
|
|
549
550
|
method: "POST",
|
|
550
551
|
headers: requestHeaders,
|
|
551
552
|
body: requestBodyJson,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { GoogleGenAI } from "@google/genai";
|
|
2
2
|
import { $env } from "@oh-my-pi/pi-utils";
|
|
3
|
-
import type { Context, Model, StreamFunction } from "../types";
|
|
3
|
+
import type { Context, FetchImpl, Model, StreamFunction } from "../types";
|
|
4
4
|
import type { AssistantMessageEventStream } from "../utils/event-stream";
|
|
5
5
|
import { buildGoogleGenerateContentParams, type GoogleSharedStreamOptions, streamGoogleGenAI } from "./google-shared";
|
|
6
6
|
|
|
@@ -25,7 +25,9 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
|
|
|
25
25
|
const apiKey = resolveApiKey(options);
|
|
26
26
|
const project = apiKey ? undefined : resolveProject(options);
|
|
27
27
|
const location = apiKey ? undefined : resolveLocation(options);
|
|
28
|
-
const client = apiKey
|
|
28
|
+
const client = apiKey
|
|
29
|
+
? createClientWithApiKey(model, apiKey, options?.fetch)
|
|
30
|
+
: createClient(model, project!, location!, options?.fetch);
|
|
29
31
|
const params = buildGoogleGenerateContentParams(model, context, options ?? {});
|
|
30
32
|
const url = apiKey
|
|
31
33
|
? `https://aiplatform.googleapis.com/${API_VERSION}/publishers/google/models/${model.id}:streamGenerateContent`
|
|
@@ -34,29 +36,45 @@ export const streamGoogleVertex: StreamFunction<"google-vertex"> = (
|
|
|
34
36
|
},
|
|
35
37
|
});
|
|
36
38
|
|
|
37
|
-
function buildHttpOptions(
|
|
38
|
-
|
|
39
|
-
|
|
39
|
+
function buildHttpOptions(
|
|
40
|
+
model: Model<"google-vertex">,
|
|
41
|
+
fetchOverride: FetchImpl | undefined,
|
|
42
|
+
): { headers?: Record<string, string>; fetch?: FetchImpl } | undefined {
|
|
43
|
+
const options: { headers?: Record<string, string>; fetch?: FetchImpl } = {};
|
|
44
|
+
if (model.headers) {
|
|
45
|
+
options.headers = { ...model.headers };
|
|
46
|
+
}
|
|
47
|
+
if (fetchOverride) {
|
|
48
|
+
options.fetch = fetchOverride;
|
|
40
49
|
}
|
|
41
|
-
return
|
|
50
|
+
return Object.keys(options).length > 0 ? options : undefined;
|
|
42
51
|
}
|
|
43
52
|
|
|
44
|
-
function createClient(
|
|
53
|
+
function createClient(
|
|
54
|
+
model: Model<"google-vertex">,
|
|
55
|
+
project: string,
|
|
56
|
+
location: string,
|
|
57
|
+
fetchOverride: FetchImpl | undefined,
|
|
58
|
+
): GoogleGenAI {
|
|
45
59
|
return new GoogleGenAI({
|
|
46
60
|
vertexai: true,
|
|
47
61
|
project,
|
|
48
62
|
location,
|
|
49
63
|
apiVersion: API_VERSION,
|
|
50
|
-
httpOptions: buildHttpOptions(model),
|
|
64
|
+
httpOptions: buildHttpOptions(model, fetchOverride),
|
|
51
65
|
});
|
|
52
66
|
}
|
|
53
67
|
|
|
54
|
-
function createClientWithApiKey(
|
|
68
|
+
function createClientWithApiKey(
|
|
69
|
+
model: Model<"google-vertex">,
|
|
70
|
+
apiKey: string,
|
|
71
|
+
fetchOverride: FetchImpl | undefined,
|
|
72
|
+
): GoogleGenAI {
|
|
55
73
|
return new GoogleGenAI({
|
|
56
74
|
vertexai: true,
|
|
57
75
|
apiKey,
|
|
58
76
|
apiVersion: API_VERSION,
|
|
59
|
-
httpOptions: buildHttpOptions(model),
|
|
77
|
+
httpOptions: buildHttpOptions(model, fetchOverride),
|
|
60
78
|
});
|
|
61
79
|
}
|
|
62
80
|
|
package/src/providers/google.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { GoogleGenAI } from "@google/genai";
|
|
2
2
|
import { getEnvApiKey } from "../stream";
|
|
3
|
-
import type { Context, Model, StreamFunction } from "../types";
|
|
3
|
+
import type { Context, FetchImpl, Model, StreamFunction } from "../types";
|
|
4
4
|
import type { AssistantMessageEventStream } from "../utils/event-stream";
|
|
5
5
|
import { buildGoogleGenerateContentParams, type GoogleSharedStreamOptions, streamGoogleGenAI } from "./google-shared";
|
|
6
6
|
|
|
@@ -17,15 +17,20 @@ export const streamGoogle: StreamFunction<"google-generative-ai"> = (
|
|
|
17
17
|
api: "google-generative-ai",
|
|
18
18
|
prepare: () => {
|
|
19
19
|
const apiKey = options?.apiKey || getEnvApiKey(model.provider);
|
|
20
|
-
const client = createClient(model, apiKey);
|
|
20
|
+
const client = createClient(model, apiKey, options?.fetch);
|
|
21
21
|
const params = buildGoogleGenerateContentParams(model, context, options ?? {});
|
|
22
22
|
const url = model.baseUrl ? `${model.baseUrl}/models/${model.id}:streamGenerateContent` : undefined;
|
|
23
23
|
return { client, params, url };
|
|
24
24
|
},
|
|
25
25
|
});
|
|
26
26
|
|
|
27
|
-
function createClient(model: Model<"google-generative-ai">, apiKey?: string): GoogleGenAI {
|
|
28
|
-
const httpOptions: {
|
|
27
|
+
function createClient(model: Model<"google-generative-ai">, apiKey?: string, fetchOverride?: FetchImpl): GoogleGenAI {
|
|
28
|
+
const httpOptions: {
|
|
29
|
+
baseUrl?: string;
|
|
30
|
+
apiVersion?: string;
|
|
31
|
+
headers?: Record<string, string>;
|
|
32
|
+
fetch?: FetchImpl;
|
|
33
|
+
} = {};
|
|
29
34
|
if (model.baseUrl) {
|
|
30
35
|
httpOptions.baseUrl = model.baseUrl;
|
|
31
36
|
httpOptions.apiVersion = ""; // baseUrl already includes version path, don't append
|
|
@@ -33,6 +38,9 @@ function createClient(model: Model<"google-generative-ai">, apiKey?: string): Go
|
|
|
33
38
|
if (model.headers) {
|
|
34
39
|
httpOptions.headers = model.headers;
|
|
35
40
|
}
|
|
41
|
+
if (fetchOverride) {
|
|
42
|
+
httpOptions.fetch = fetchOverride;
|
|
43
|
+
}
|
|
36
44
|
|
|
37
45
|
return new GoogleGenAI({
|
|
38
46
|
apiKey,
|
package/src/providers/ollama.ts
CHANGED
|
@@ -378,6 +378,7 @@ export const streamOllama: StreamFunction<"ollama-chat"> = (
|
|
|
378
378
|
body: JSON.stringify(body),
|
|
379
379
|
signal: options.signal,
|
|
380
380
|
defaultDelayMs: OLLAMA_RETRY_DELAYS_MS,
|
|
381
|
+
fetch: options.fetch,
|
|
381
382
|
});
|
|
382
383
|
if (!response.ok) {
|
|
383
384
|
throw new Error(`HTTP ${response.status} from ${baseUrl}/api/chat`);
|
|
@@ -88,6 +88,7 @@ export function streamOpenAIAnthropicShim(
|
|
|
88
88
|
onPayload: options?.onPayload,
|
|
89
89
|
onResponse: options?.onResponse,
|
|
90
90
|
onSseEvent: options?.onSseEvent,
|
|
91
|
+
fetch: options?.fetch,
|
|
91
92
|
thinkingEnabled,
|
|
92
93
|
thinkingBudgetTokens: thinkingBudget,
|
|
93
94
|
});
|
|
@@ -116,6 +117,7 @@ export function streamOpenAIAnthropicShim(
|
|
|
116
117
|
onPayload: options?.onPayload,
|
|
117
118
|
onResponse: options?.onResponse,
|
|
118
119
|
onSseEvent: options?.onSseEvent,
|
|
120
|
+
fetch: options?.fetch,
|
|
119
121
|
reasoning: reasoningEffort,
|
|
120
122
|
});
|
|
121
123
|
|
|
@@ -17,6 +17,7 @@ import {
|
|
|
17
17
|
type Api,
|
|
18
18
|
type AssistantMessage,
|
|
19
19
|
type Context,
|
|
20
|
+
type FetchImpl,
|
|
20
21
|
type Model,
|
|
21
22
|
type ProviderSessionState,
|
|
22
23
|
type ServiceTier,
|
|
@@ -735,6 +736,7 @@ async function openCodexSseTransport(
|
|
|
735
736
|
state,
|
|
736
737
|
requestSetup.requestSignal,
|
|
737
738
|
event => options?.onSseEvent?.(event, model),
|
|
739
|
+
options?.fetch,
|
|
738
740
|
),
|
|
739
741
|
);
|
|
740
742
|
return { eventStream, requestBodyForState: structuredCloneJSON(body), transport: "sse" };
|
|
@@ -2173,6 +2175,7 @@ async function openCodexSseEventStream(
|
|
|
2173
2175
|
state: CodexWebSocketSessionState | undefined,
|
|
2174
2176
|
signal?: AbortSignal,
|
|
2175
2177
|
onSseEvent?: OpenAICodexResponsesOptions["onSseEvent"],
|
|
2178
|
+
fetchOverride?: FetchImpl,
|
|
2176
2179
|
): Promise<AsyncGenerator<Record<string, unknown>>> {
|
|
2177
2180
|
const headers = createCodexHeaders(requestHeaders, accountId, apiKey, sessionId, "sse", state);
|
|
2178
2181
|
logCodexDebug("codex request", {
|
|
@@ -2190,6 +2193,7 @@ async function openCodexSseEventStream(
|
|
|
2190
2193
|
maxAttempts: CODEX_MAX_RETRIES + 1,
|
|
2191
2194
|
defaultDelayMs: attempt => CODEX_RETRY_DELAY_MS * (attempt + 1),
|
|
2192
2195
|
maxDelayMs: CODEX_RATE_LIMIT_BUDGET_MS,
|
|
2196
|
+
fetch: fetchOverride,
|
|
2193
2197
|
});
|
|
2194
2198
|
logCodexDebug("codex response", {
|
|
2195
2199
|
url: response.url,
|
|
@@ -53,6 +53,12 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
53
53
|
const isZai = provider === "zai" || baseUrl.includes("api.z.ai");
|
|
54
54
|
const isKilo = provider === "kilo" || baseUrl.includes("api.kilo.ai");
|
|
55
55
|
const isKimiModel = model.id.includes("moonshotai/kimi") || /^kimi[-.]/i.test(model.id);
|
|
56
|
+
const isMoonshotKimi =
|
|
57
|
+
isKimiModel &&
|
|
58
|
+
(provider === "moonshot" ||
|
|
59
|
+
provider === "kimi-code" ||
|
|
60
|
+
baseUrl.includes("api.moonshot.ai") ||
|
|
61
|
+
baseUrl.includes("api.kimi.com"));
|
|
56
62
|
const isAnthropicModel =
|
|
57
63
|
provider === "anthropic" ||
|
|
58
64
|
baseUrl.includes("api.anthropic.com") ||
|
|
@@ -90,6 +96,7 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
90
96
|
provider === "opencode-zen" ||
|
|
91
97
|
provider === "opencode-go" ||
|
|
92
98
|
baseUrl.includes("opencode.ai");
|
|
99
|
+
const isOpenCodeProvider = provider === "opencode-go" || provider === "opencode-zen";
|
|
93
100
|
|
|
94
101
|
const useMaxTokens =
|
|
95
102
|
provider === "mistral" ||
|
|
@@ -173,22 +180,25 @@ export function detectOpenAICompat(model: Model<"openai-completions">, resolvedB
|
|
|
173
180
|
requiresAssistantAfterToolResult: false,
|
|
174
181
|
requiresThinkingAsText: isMistral,
|
|
175
182
|
requiresMistralToolIds: isMistral,
|
|
176
|
-
thinkingFormat:
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
+
thinkingFormat:
|
|
184
|
+
isZai || isMoonshotKimi
|
|
185
|
+
? "zai"
|
|
186
|
+
: provider === "openrouter" || baseUrl.includes("openrouter.ai")
|
|
187
|
+
? "openrouter"
|
|
188
|
+
: isAlibaba || isQwen
|
|
189
|
+
? "qwen"
|
|
190
|
+
: "openai",
|
|
183
191
|
reasoningContentField: "reasoning_content",
|
|
184
192
|
// Backends that 400 follow-up requests when prior assistant tool-call turns lack `reasoning_content`:
|
|
185
|
-
// - Kimi: documented invariant on its native API
|
|
193
|
+
// - Kimi: documented invariant on its native API.
|
|
186
194
|
// - Any reasoning-capable model reached through OpenRouter: DeepSeek V4 Pro and similar enforce
|
|
187
195
|
// this server-side whenever the request is in thinking mode. We can't translate Anthropic's
|
|
188
196
|
// redacted/encrypted reasoning into DeepSeek's plaintext form, so cross-provider continuations
|
|
189
197
|
// rely on a placeholder — see `convertMessages` for the placeholder injection.
|
|
198
|
+
// - OpenCode-Go and OpenCode-Zen handle reasoning content internally and reject
|
|
199
|
+
// `reasoning_content` in client-sent messages — exclude them even for Kimi models.
|
|
190
200
|
requiresReasoningContentForToolCalls:
|
|
191
|
-
isKimiModel ||
|
|
201
|
+
(isKimiModel && !isOpenCodeProvider) ||
|
|
192
202
|
(isDeepseekFamily && Boolean(model.reasoning)) ||
|
|
193
203
|
((provider === "openrouter" || baseUrl.includes("openrouter.ai")) && Boolean(model.reasoning)),
|
|
194
204
|
// DeepSeek V4 rejects synthetic reasoning_content placeholders (".") on tool-call turns.
|
|
@@ -16,6 +16,7 @@ import { getEnvApiKey } from "../stream";
|
|
|
16
16
|
import {
|
|
17
17
|
type AssistantMessage,
|
|
18
18
|
type Context,
|
|
19
|
+
type FetchImpl,
|
|
19
20
|
getPriorityPremiumRequests,
|
|
20
21
|
type Message,
|
|
21
22
|
type MessageAttribution,
|
|
@@ -362,6 +363,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
362
363
|
options?.headers,
|
|
363
364
|
options?.initiatorOverride,
|
|
364
365
|
options?.onSseEvent,
|
|
366
|
+
options?.fetch,
|
|
365
367
|
);
|
|
366
368
|
const priorityPremiumRequests = getPriorityPremiumRequests(options?.serviceTier, model.provider);
|
|
367
369
|
const premiumRequestsTotal =
|
|
@@ -778,6 +780,7 @@ async function createClient(
|
|
|
778
780
|
extraHeaders?: Record<string, string>,
|
|
779
781
|
initiatorOverride?: MessageAttribution,
|
|
780
782
|
onSseEvent?: OpenAICompletionsOptions["onSseEvent"],
|
|
783
|
+
fetchOverride?: FetchImpl,
|
|
781
784
|
): Promise<{
|
|
782
785
|
client: OpenAI;
|
|
783
786
|
copilotPremiumRequests: number | undefined;
|
|
@@ -847,9 +850,10 @@ async function createClient(
|
|
|
847
850
|
azureDefaultQuery = { "api-version": apiVersion };
|
|
848
851
|
}
|
|
849
852
|
let capturedErrorResponse: CapturedHttpErrorResponse | undefined;
|
|
853
|
+
const baseFetch = fetchOverride ?? fetch;
|
|
850
854
|
const wrappedFetch = Object.assign(
|
|
851
855
|
async (input: string | URL | Request, init?: RequestInit): Promise<Response> => {
|
|
852
|
-
const response = await
|
|
856
|
+
const response = await baseFetch(input, init);
|
|
853
857
|
if (response.ok) {
|
|
854
858
|
capturedErrorResponse = undefined;
|
|
855
859
|
return response;
|
|
@@ -872,7 +876,7 @@ async function createClient(
|
|
|
872
876
|
};
|
|
873
877
|
return response;
|
|
874
878
|
},
|
|
875
|
-
{ preconnect:
|
|
879
|
+
baseFetch.preconnect ? { preconnect: baseFetch.preconnect } : {},
|
|
876
880
|
);
|
|
877
881
|
const debugFetch = onSseEvent ? wrapFetchForSseDebug(wrappedFetch, event => onSseEvent(event, model)) : wrappedFetch;
|
|
878
882
|
return {
|
|
@@ -1019,12 +1023,14 @@ function buildParams(
|
|
|
1019
1023
|
}
|
|
1020
1024
|
|
|
1021
1025
|
if (compat.disableReasoningOnForcedToolChoice && isForcedToolChoice(params.tool_choice)) {
|
|
1022
|
-
//
|
|
1023
|
-
//
|
|
1024
|
-
//
|
|
1025
|
-
// the agent still gets the forced tool call, just without thinking.
|
|
1026
|
+
// Backends like Kimi 400 with `tool_choice 'specified' is incompatible
|
|
1027
|
+
// with thinking enabled`. Suppress thinking for this single forced-tool
|
|
1028
|
+
// turn while keeping the tool-selection contract intact.
|
|
1026
1029
|
delete params.reasoning_effort;
|
|
1027
1030
|
delete params.reasoning;
|
|
1031
|
+
if (compat.thinkingFormat === "zai") {
|
|
1032
|
+
params.thinking = { type: "disabled" };
|
|
1033
|
+
}
|
|
1028
1034
|
}
|
|
1029
1035
|
|
|
1030
1036
|
// OpenRouter provider routing preferences
|
|
@@ -1362,7 +1368,9 @@ export function convertMessages(
|
|
|
1362
1368
|
const canUseSyntheticReasoningContent =
|
|
1363
1369
|
compat.requiresReasoningContentForToolCalls &&
|
|
1364
1370
|
compat.allowsSyntheticReasoningContentForToolCalls &&
|
|
1365
|
-
(compat.thinkingFormat === "openai" ||
|
|
1371
|
+
(compat.thinkingFormat === "openai" ||
|
|
1372
|
+
compat.thinkingFormat === "openrouter" ||
|
|
1373
|
+
compat.thinkingFormat === "zai");
|
|
1366
1374
|
// DeepSeek reasoning models require reasoning_content on ALL assistant turns,
|
|
1367
1375
|
// not just tool-call turns. Other providers (Kimi, OpenRouter) only require it
|
|
1368
1376
|
// on tool-call turns.
|
|
@@ -10,6 +10,7 @@ import {
|
|
|
10
10
|
type AssistantMessage,
|
|
11
11
|
type CacheRetention,
|
|
12
12
|
type Context,
|
|
13
|
+
type FetchImpl,
|
|
13
14
|
getPriorityPremiumRequests,
|
|
14
15
|
type MessageAttribution,
|
|
15
16
|
type Model,
|
|
@@ -210,6 +211,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|
|
210
211
|
options?.initiatorOverride,
|
|
211
212
|
cacheSessionId,
|
|
212
213
|
options?.onSseEvent,
|
|
214
|
+
options?.fetch,
|
|
213
215
|
);
|
|
214
216
|
const priorityPremiumRequests = getPriorityPremiumRequests(options?.serviceTier, model.provider);
|
|
215
217
|
const premiumRequestsTotal =
|
|
@@ -312,6 +314,7 @@ function createClient(
|
|
|
312
314
|
initiatorOverride?: MessageAttribution,
|
|
313
315
|
sessionId?: string,
|
|
314
316
|
onSseEvent?: OpenAIResponsesOptions["onSseEvent"],
|
|
317
|
+
fetchOverride?: FetchImpl,
|
|
315
318
|
): {
|
|
316
319
|
client: OpenAI;
|
|
317
320
|
copilotPremiumRequests: number | undefined;
|
|
@@ -349,6 +352,7 @@ function createClient(
|
|
|
349
352
|
headers.session_id ??= sessionId;
|
|
350
353
|
headers["x-client-request-id"] ??= sessionId;
|
|
351
354
|
}
|
|
355
|
+
const baseFetch = fetchOverride ?? fetch;
|
|
352
356
|
return {
|
|
353
357
|
client: new OpenAI({
|
|
354
358
|
apiKey,
|
|
@@ -356,7 +360,7 @@ function createClient(
|
|
|
356
360
|
dangerouslyAllowBrowser: true,
|
|
357
361
|
maxRetries: 5,
|
|
358
362
|
defaultHeaders: headers,
|
|
359
|
-
fetch: onSseEvent ? wrapFetchForSseDebug(
|
|
363
|
+
fetch: onSseEvent ? wrapFetchForSseDebug(baseFetch, event => onSseEvent(event, model)) : baseFetch,
|
|
360
364
|
}),
|
|
361
365
|
copilotPremiumRequests,
|
|
362
366
|
baseUrl,
|
|
@@ -19,7 +19,9 @@ import type {
|
|
|
19
19
|
Model,
|
|
20
20
|
OptionsForApi,
|
|
21
21
|
} from "../types";
|
|
22
|
+
import { type AbortSourceTracker, createAbortSourceTracker } from "../utils/abort";
|
|
22
23
|
import { AssistantMessageEventStream as EventStreamImpl } from "../utils/event-stream";
|
|
24
|
+
import { getStreamFirstEventTimeoutMs, getStreamIdleTimeoutMs, iterateWithIdleTimeout } from "../utils/idle-iterator";
|
|
23
25
|
import type { BedrockOptions } from "./amazon-bedrock";
|
|
24
26
|
import type { AnthropicOptions } from "./anthropic";
|
|
25
27
|
import type { AzureOpenAIResponsesOptions } from "./azure-openai-responses";
|
|
@@ -155,6 +157,9 @@ export function setBedrockProviderModule(module: BedrockProviderModule): void {
|
|
|
155
157
|
// Stream forwarding / error helpers
|
|
156
158
|
// ---------------------------------------------------------------------------
|
|
157
159
|
|
|
160
|
+
const LAZY_STREAM_IDLE_TIMEOUT_ERROR = "Provider stream stalled while waiting for the next event";
|
|
161
|
+
const LAZY_STREAM_FIRST_EVENT_TIMEOUT_ERROR = "Provider stream timed out while waiting for the first event";
|
|
162
|
+
|
|
158
163
|
function hasFinalResult(
|
|
159
164
|
source: AsyncIterable<AssistantMessageEvent>,
|
|
160
165
|
): source is AsyncIterable<AssistantMessageEvent> & { result(): Promise<AssistantMessage> } {
|
|
@@ -165,10 +170,23 @@ function forwardStream<TApi extends Api>(
|
|
|
165
170
|
target: EventStreamImpl,
|
|
166
171
|
source: AsyncIterable<AssistantMessageEvent>,
|
|
167
172
|
model: Model<TApi>,
|
|
173
|
+
options: OptionsForApi<TApi>,
|
|
174
|
+
abortTracker: AbortSourceTracker,
|
|
168
175
|
): void {
|
|
169
176
|
(async () => {
|
|
170
177
|
try {
|
|
171
|
-
|
|
178
|
+
const idleTimeoutMs = options.streamIdleTimeoutMs ?? getStreamIdleTimeoutMs();
|
|
179
|
+
const watchedSource = iterateWithIdleTimeout(source, {
|
|
180
|
+
idleTimeoutMs,
|
|
181
|
+
firstItemTimeoutMs: options.streamFirstEventTimeoutMs ?? getStreamFirstEventTimeoutMs(idleTimeoutMs),
|
|
182
|
+
errorMessage: LAZY_STREAM_IDLE_TIMEOUT_ERROR,
|
|
183
|
+
firstItemErrorMessage: LAZY_STREAM_FIRST_EVENT_TIMEOUT_ERROR,
|
|
184
|
+
onIdle: () => abortTracker.abortLocally(new Error(LAZY_STREAM_IDLE_TIMEOUT_ERROR)),
|
|
185
|
+
onFirstItemTimeout: () => abortTracker.abortLocally(new Error(LAZY_STREAM_FIRST_EVENT_TIMEOUT_ERROR)),
|
|
186
|
+
abortSignal: options.signal,
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
for await (const event of watchedSource) {
|
|
172
190
|
target.push(event);
|
|
173
191
|
}
|
|
174
192
|
if (hasFinalResult(source)) {
|
|
@@ -177,14 +195,19 @@ function forwardStream<TApi extends Api>(
|
|
|
177
195
|
target.end();
|
|
178
196
|
}
|
|
179
197
|
} catch (error) {
|
|
180
|
-
const
|
|
181
|
-
|
|
198
|
+
const stopReason = abortTracker.wasCallerAbort() ? "aborted" : "error";
|
|
199
|
+
const message = createLazyLoadErrorMessage(model, error, stopReason);
|
|
200
|
+
target.push({ type: "error", reason: stopReason, error: message });
|
|
182
201
|
target.end(message);
|
|
183
202
|
}
|
|
184
203
|
})();
|
|
185
204
|
}
|
|
186
205
|
|
|
187
|
-
function createLazyLoadErrorMessage<TApi extends Api>(
|
|
206
|
+
function createLazyLoadErrorMessage<TApi extends Api>(
|
|
207
|
+
model: Model<TApi>,
|
|
208
|
+
error: unknown,
|
|
209
|
+
stopReason: Extract<AssistantMessage["stopReason"], "aborted" | "error"> = "error",
|
|
210
|
+
): AssistantMessage {
|
|
188
211
|
return {
|
|
189
212
|
role: "assistant",
|
|
190
213
|
content: [],
|
|
@@ -199,8 +222,9 @@ function createLazyLoadErrorMessage<TApi extends Api>(model: Model<TApi>, error:
|
|
|
199
222
|
totalTokens: 0,
|
|
200
223
|
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
|
201
224
|
},
|
|
202
|
-
stopReason
|
|
203
|
-
errorMessage:
|
|
225
|
+
stopReason,
|
|
226
|
+
errorMessage:
|
|
227
|
+
stopReason === "aborted" ? "Request was aborted" : error instanceof Error ? error.message : String(error),
|
|
204
228
|
timestamp: Date.now(),
|
|
205
229
|
};
|
|
206
230
|
}
|
|
@@ -214,11 +238,14 @@ function createLazyStream<TApi extends Api>(
|
|
|
214
238
|
): (model: Model<TApi>, context: Context, options: OptionsForApi<TApi>) => EventStreamImpl {
|
|
215
239
|
return (model, context, options) => {
|
|
216
240
|
const outer = new EventStreamImpl();
|
|
241
|
+
const streamOptions = (options ?? {}) as OptionsForApi<TApi>;
|
|
217
242
|
|
|
218
243
|
loadModule()
|
|
219
244
|
.then(module => {
|
|
220
|
-
const
|
|
221
|
-
|
|
245
|
+
const abortTracker = createAbortSourceTracker(streamOptions.signal);
|
|
246
|
+
const providerOptions = { ...streamOptions, signal: abortTracker.requestSignal } as OptionsForApi<TApi>;
|
|
247
|
+
const inner = module.stream(model, context, providerOptions);
|
|
248
|
+
forwardStream(outer, inner, model, streamOptions, abortTracker);
|
|
222
249
|
})
|
|
223
250
|
.catch(error => {
|
|
224
251
|
const message = createLazyLoadErrorMessage(model, error);
|
package/src/types.ts
CHANGED
|
@@ -204,6 +204,15 @@ export interface RawSseEvent {
|
|
|
204
204
|
raw: string[];
|
|
205
205
|
}
|
|
206
206
|
|
|
207
|
+
/**
|
|
208
|
+
* `fetch`-compatible function. Accepts any callable matching the standard
|
|
209
|
+
* fetch signature; `preconnect` is optional because non-Bun runtimes (browsers,
|
|
210
|
+
* test mocks) won't expose it.
|
|
211
|
+
*/
|
|
212
|
+
export type FetchImpl = ((input: string | URL | Request, init?: RequestInit) => Promise<Response>) & {
|
|
213
|
+
preconnect?: typeof globalThis.fetch.preconnect;
|
|
214
|
+
};
|
|
215
|
+
|
|
207
216
|
export interface StreamOptions {
|
|
208
217
|
temperature?: number;
|
|
209
218
|
topP?: number;
|
|
@@ -275,6 +284,14 @@ export interface StreamOptions {
|
|
|
275
284
|
* Set to 0 to disable the inter-event idle watchdog for this request.
|
|
276
285
|
*/
|
|
277
286
|
streamIdleTimeoutMs?: number;
|
|
287
|
+
/**
|
|
288
|
+
* Optional `fetch` implementation override. Providers route every HTTP
|
|
289
|
+
* request — direct calls, SDK clients, and retry helpers — through this
|
|
290
|
+
* implementation when set. Defaults to `globalThis.fetch`. Providers that
|
|
291
|
+
* do not use `fetch` (Bedrock's AWS SDK transport, Cursor's HTTP/2
|
|
292
|
+
* channel) silently ignore the override.
|
|
293
|
+
*/
|
|
294
|
+
fetch?: FetchImpl;
|
|
278
295
|
/** Cursor exec/MCP tool handlers (cursor-agent only). */
|
|
279
296
|
execHandlers?: CursorExecHandlers;
|
|
280
297
|
}
|
|
@@ -613,7 +630,7 @@ export interface OpenAICompat {
|
|
|
613
630
|
requiresThinkingAsText?: boolean;
|
|
614
631
|
/** Whether tool call IDs must be normalized to Mistral format (exactly 9 alphanumeric chars). Default: auto-detected from URL. */
|
|
615
632
|
requiresMistralToolIds?: boolean;
|
|
616
|
-
/** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "openrouter" uses reasoning: { effort }, "zai" uses thinking: { type: "enabled" }, "qwen" uses top-level enable_thinking, and "qwen-chat-template" uses chat_template_kwargs.enable_thinking. Default: "openai". */
|
|
633
|
+
/** Format for reasoning/thinking parameter. "openai" uses reasoning_effort, "openrouter" uses reasoning: { effort }, "zai" uses thinking: { type: "enabled" | "disabled" } (also used by Moonshot Kimi), "qwen" uses top-level enable_thinking, and "qwen-chat-template" uses chat_template_kwargs.enable_thinking. Default: "openai". */
|
|
617
634
|
thinkingFormat?: "openai" | "openrouter" | "zai" | "qwen" | "qwen-chat-template";
|
|
618
635
|
/** Which reasoning content field to emit on assistant messages. Default: auto-detected. */
|
|
619
636
|
reasoningContentField?: "reasoning_content" | "reasoning" | "reasoning_text";
|
package/src/utils/h2-fetch.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Patch `globalThis.fetch` to advertise HTTP/2 in TLS ALPN, with transparent
|
|
3
|
-
* HTTP/1.1 fallback when the server doesn't
|
|
3
|
+
* HTTP/1.1 fallback when the server doesn't negotiate `h2`.
|
|
4
4
|
*
|
|
5
5
|
* Bun's HTTP/2 client is gated on `BUN_FEATURE_FLAG_EXPERIMENTAL_HTTP2_CLIENT`,
|
|
6
6
|
* read by the native runtime before any JS executes; assigning to
|
|
@@ -8,6 +8,12 @@
|
|
|
8
8
|
* activates h2 over TLS ALPN and rejects with `error.code === "HTTP2Unsupported"`
|
|
9
9
|
* if the server picks anything else, so we catch and retry without the hint.
|
|
10
10
|
*
|
|
11
|
+
* Some HTTPS endpoints (e.g. corporate API gateways behind reverse proxies)
|
|
12
|
+
* advertise h2 via ALPN but then refuse or reset the connection at the HTTP/2
|
|
13
|
+
* framing layer. Bun surfaces these as `ConnectionRefused`, `ConnectionReset`,
|
|
14
|
+
* or `ConnectionClosed` rather than `HTTP2Unsupported`, so we treat those
|
|
15
|
+
* codes as h2-fallback triggers as well.
|
|
16
|
+
*
|
|
11
17
|
* Bun negotiates h2 via ALPN over TLS only (no h2c), so plain `http://` URLs
|
|
12
18
|
* skip the attempt entirely — avoids the throw/retry round-trip for localhost.
|
|
13
19
|
*
|
|
@@ -24,12 +30,19 @@ export function installH2Fetch(): void {
|
|
|
24
30
|
const original = globalThis.fetch as typeof fetch & PatchedFetch;
|
|
25
31
|
if (original[installed]) return;
|
|
26
32
|
|
|
33
|
+
/** Error codes that indicate h2 negotiation/transport failure (not an application error). */
|
|
34
|
+
const h2FallbackCodes: ReadonlySet<string> = new Set([
|
|
35
|
+
"HTTP2Unsupported", // Server selected h1 in ALPN
|
|
36
|
+
"ConnectionRefused", // Server refused the h2 connection
|
|
37
|
+
"ConnectionReset", // Server reset during h2 handshake
|
|
38
|
+
"ConnectionClosed", // Server closed before h2 response
|
|
39
|
+
]);
|
|
27
40
|
const wrapper = async function h2fetch(input: string | URL | Request, init?: RequestInit): Promise<Response> {
|
|
28
41
|
if (!isHttps(input)) return original(input, init);
|
|
29
42
|
try {
|
|
30
43
|
return await original(input, { ...init, protocol: "http2" });
|
|
31
44
|
} catch (err) {
|
|
32
|
-
if ((err as { code?:
|
|
45
|
+
if (!h2FallbackCodes.has((err as { code?: string }).code ?? "")) throw err;
|
|
33
46
|
return original(input, init);
|
|
34
47
|
}
|
|
35
48
|
} as typeof fetch & PatchedFetch;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { $env } from "@oh-my-pi/pi-utils";
|
|
2
2
|
|
|
3
|
-
const DEFAULT_STREAM_IDLE_TIMEOUT_MS =
|
|
3
|
+
const DEFAULT_STREAM_IDLE_TIMEOUT_MS = 30_000;
|
|
4
4
|
const DEFAULT_STREAM_FIRST_EVENT_TIMEOUT_MS = 100_000;
|
|
5
5
|
|
|
6
6
|
function normalizeIdleTimeoutMs(value: string | undefined, fallback: number): number | undefined {
|