@oh-my-pi/pi-ai 3.20.1 → 3.35.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -12
- package/package.json +3 -10
- package/src/cli.ts +89 -89
- package/src/index.ts +2 -2
- package/src/models.generated.ts +949 -178
- package/src/models.ts +11 -17
- package/src/providers/anthropic.ts +94 -29
- package/src/providers/google-gemini-cli.ts +270 -134
- package/src/providers/google-shared.ts +48 -5
- package/src/providers/google-vertex.ts +15 -4
- package/src/providers/google.ts +15 -4
- package/src/providers/openai-codex/index.ts +7 -0
- package/src/providers/openai-codex/prompts/codex.ts +26 -59
- package/src/providers/openai-codex/prompts/pi-codex-bridge.ts +38 -31
- package/src/providers/openai-codex/prompts/system-prompt.ts +26 -0
- package/src/providers/openai-codex/request-transformer.ts +38 -203
- package/src/providers/openai-codex-responses.ts +96 -26
- package/src/providers/openai-completions.ts +35 -27
- package/src/providers/openai-responses.ts +3 -2
- package/src/providers/transorm-messages.ts +4 -3
- package/src/stream.ts +34 -25
- package/src/types.ts +21 -4
- package/src/utils/oauth/github-copilot.ts +38 -3
- package/src/utils/oauth/google-antigravity.ts +146 -55
- package/src/utils/oauth/google-gemini-cli.ts +146 -55
- package/src/utils/oauth/index.ts +5 -5
- package/src/utils/oauth/openai-codex.ts +129 -54
- package/src/utils/overflow.ts +1 -1
- package/src/utils/retry-after.ts +110 -0
- package/src/bun-imports.d.ts +0 -14
|
@@ -6,7 +6,7 @@ import type {
|
|
|
6
6
|
ResponseInputText,
|
|
7
7
|
ResponseOutputMessage,
|
|
8
8
|
ResponseReasoningItem,
|
|
9
|
-
} from "openai/resources/responses/responses
|
|
9
|
+
} from "openai/resources/responses/responses";
|
|
10
10
|
import { calculateCost } from "../models";
|
|
11
11
|
import { getEnvApiKey } from "../stream";
|
|
12
12
|
import type {
|
|
@@ -24,6 +24,7 @@ import type {
|
|
|
24
24
|
} from "../types";
|
|
25
25
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
26
26
|
import { parseStreamingJson } from "../utils/json-parse";
|
|
27
|
+
import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
|
|
27
28
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode";
|
|
28
29
|
import {
|
|
29
30
|
CODEX_BASE_URL,
|
|
@@ -33,12 +34,9 @@ import {
|
|
|
33
34
|
URL_PATHS,
|
|
34
35
|
} from "./openai-codex/constants";
|
|
35
36
|
import { getCodexInstructions } from "./openai-codex/prompts/codex";
|
|
36
|
-
import {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
type RequestBody,
|
|
40
|
-
transformRequestBody,
|
|
41
|
-
} from "./openai-codex/request-transformer";
|
|
37
|
+
import { buildCodexPiBridge } from "./openai-codex/prompts/pi-codex-bridge";
|
|
38
|
+
import { buildCodexSystemPrompt } from "./openai-codex/prompts/system-prompt";
|
|
39
|
+
import { type CodexRequestOptions, type RequestBody, transformRequestBody } from "./openai-codex/request-transformer";
|
|
42
40
|
import { parseCodexError, parseCodexSseStream } from "./openai-codex/response-handler";
|
|
43
41
|
import { transformMessages } from "./transorm-messages";
|
|
44
42
|
|
|
@@ -94,6 +92,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
94
92
|
model: model.id,
|
|
95
93
|
input: messages,
|
|
96
94
|
stream: true,
|
|
95
|
+
prompt_cache_key: options?.sessionId,
|
|
97
96
|
};
|
|
98
97
|
|
|
99
98
|
if (options?.maxTokens) {
|
|
@@ -108,8 +107,15 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
108
107
|
params.tools = convertTools(context.tools);
|
|
109
108
|
}
|
|
110
109
|
|
|
111
|
-
const
|
|
112
|
-
const
|
|
110
|
+
const codexInstructions = await getCodexInstructions(params.model);
|
|
111
|
+
const bridgeText = buildCodexPiBridge(context.tools);
|
|
112
|
+
const systemPrompt = buildCodexSystemPrompt({
|
|
113
|
+
codexInstructions,
|
|
114
|
+
bridgeText,
|
|
115
|
+
userSystemPrompt: context.systemPrompt,
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
params.instructions = systemPrompt.instructions;
|
|
113
119
|
|
|
114
120
|
const codexOptions: CodexRequestOptions = {
|
|
115
121
|
reasoningEffort: options?.reasoningEffort,
|
|
@@ -118,17 +124,14 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
118
124
|
include: options?.include,
|
|
119
125
|
};
|
|
120
126
|
|
|
121
|
-
const transformedBody = await transformRequestBody(
|
|
122
|
-
params,
|
|
123
|
-
codexInstructions,
|
|
124
|
-
codexOptions,
|
|
125
|
-
options?.codexMode ?? true,
|
|
126
|
-
);
|
|
127
|
+
const transformedBody = await transformRequestBody(params, codexOptions, systemPrompt);
|
|
127
128
|
|
|
128
|
-
const
|
|
129
|
+
const reasoningEffort = transformedBody.reasoning?.effort ?? null;
|
|
130
|
+
const headers = createCodexHeaders(model.headers, accountId, apiKey, options?.sessionId);
|
|
129
131
|
logCodexDebug("codex request", {
|
|
130
132
|
url,
|
|
131
133
|
model: params.model,
|
|
134
|
+
reasoningEffort,
|
|
132
135
|
headers: redactHeaders(headers),
|
|
133
136
|
});
|
|
134
137
|
|
|
@@ -149,7 +152,9 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
149
152
|
|
|
150
153
|
if (!response.ok) {
|
|
151
154
|
const info = await parseCodexError(response);
|
|
152
|
-
|
|
155
|
+
const error = new Error(info.friendlyMessage || info.message);
|
|
156
|
+
(error as { headers?: Headers }).headers = response.headers;
|
|
157
|
+
throw error;
|
|
153
158
|
}
|
|
154
159
|
|
|
155
160
|
if (!response.body) {
|
|
@@ -340,10 +345,10 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
340
345
|
}
|
|
341
346
|
} else if (eventType === "error") {
|
|
342
347
|
const code = (rawEvent as { code?: string }).code || "";
|
|
343
|
-
const message = (rawEvent as { message?: string }).message || "
|
|
344
|
-
throw new Error(
|
|
348
|
+
const message = (rawEvent as { message?: string }).message || "";
|
|
349
|
+
throw new Error(formatCodexErrorEvent(rawEvent, code, message));
|
|
345
350
|
} else if (eventType === "response.failed") {
|
|
346
|
-
throw new Error("
|
|
351
|
+
throw new Error(formatCodexFailure(rawEvent) ?? "Codex response failed");
|
|
347
352
|
}
|
|
348
353
|
}
|
|
349
354
|
|
|
@@ -352,7 +357,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
352
357
|
}
|
|
353
358
|
|
|
354
359
|
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
|
355
|
-
throw new Error("
|
|
360
|
+
throw new Error("Codex response failed");
|
|
356
361
|
}
|
|
357
362
|
|
|
358
363
|
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
@@ -360,7 +365,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
360
365
|
} catch (error) {
|
|
361
366
|
for (const block of output.content) delete (block as { index?: number }).index;
|
|
362
367
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
363
|
-
output.errorMessage =
|
|
368
|
+
output.errorMessage = formatErrorMessageWithRetryAfter(error);
|
|
364
369
|
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
365
370
|
stream.end();
|
|
366
371
|
}
|
|
@@ -406,11 +411,11 @@ function logCodexDebug(message: string, details?: Record<string, unknown>): void
|
|
|
406
411
|
|
|
407
412
|
function redactHeaders(headers: Headers): Record<string, string> {
|
|
408
413
|
const redacted: Record<string, string> = {};
|
|
409
|
-
headers.
|
|
414
|
+
for (const [key, value] of headers.entries()) {
|
|
410
415
|
const lower = key.toLowerCase();
|
|
411
416
|
if (lower === "authorization") {
|
|
412
417
|
redacted[key] = "Bearer [redacted]";
|
|
413
|
-
|
|
418
|
+
continue;
|
|
414
419
|
}
|
|
415
420
|
if (
|
|
416
421
|
lower.includes("account") ||
|
|
@@ -419,10 +424,10 @@ function redactHeaders(headers: Headers): Record<string, string> {
|
|
|
419
424
|
lower === "cookie"
|
|
420
425
|
) {
|
|
421
426
|
redacted[key] = "[redacted]";
|
|
422
|
-
|
|
427
|
+
continue;
|
|
423
428
|
}
|
|
424
429
|
redacted[key] = value;
|
|
425
|
-
}
|
|
430
|
+
}
|
|
426
431
|
return redacted;
|
|
427
432
|
}
|
|
428
433
|
|
|
@@ -617,3 +622,68 @@ function mapStopReason(status: string | undefined): StopReason {
|
|
|
617
622
|
return "stop";
|
|
618
623
|
}
|
|
619
624
|
}
|
|
625
|
+
|
|
626
|
+
function asRecord(value: unknown): Record<string, unknown> | null {
|
|
627
|
+
if (value && typeof value === "object") {
|
|
628
|
+
return value as Record<string, unknown>;
|
|
629
|
+
}
|
|
630
|
+
return null;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
function getString(value: unknown): string | undefined {
|
|
634
|
+
return typeof value === "string" ? value : undefined;
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
function truncate(text: string, limit: number): string {
|
|
638
|
+
if (text.length <= limit) return text;
|
|
639
|
+
return `${text.slice(0, limit)}...[truncated ${text.length - limit}]`;
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
function formatCodexFailure(rawEvent: Record<string, unknown>): string | null {
|
|
643
|
+
const response = asRecord(rawEvent.response);
|
|
644
|
+
const error = asRecord(rawEvent.error) ?? (response ? asRecord(response.error) : null);
|
|
645
|
+
|
|
646
|
+
const message = getString(error?.message) ?? getString(rawEvent.message) ?? getString(response?.message);
|
|
647
|
+
const code = getString(error?.code) ?? getString(error?.type) ?? getString(rawEvent.code);
|
|
648
|
+
const status = getString(response?.status) ?? getString(rawEvent.status);
|
|
649
|
+
|
|
650
|
+
const meta: string[] = [];
|
|
651
|
+
if (code) meta.push(`code=${code}`);
|
|
652
|
+
if (status) meta.push(`status=${status}`);
|
|
653
|
+
|
|
654
|
+
if (message) {
|
|
655
|
+
const metaText = meta.length ? ` (${meta.join(", ")})` : "";
|
|
656
|
+
return `Codex response failed: ${message}${metaText}`;
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
if (meta.length) {
|
|
660
|
+
return `Codex response failed (${meta.join(", ")})`;
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
try {
|
|
664
|
+
return `Codex response failed: ${truncate(JSON.stringify(rawEvent), 800)}`;
|
|
665
|
+
} catch {
|
|
666
|
+
return "Codex response failed";
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
function formatCodexErrorEvent(rawEvent: Record<string, unknown>, code: string, message: string): string {
|
|
671
|
+
const detail = formatCodexFailure(rawEvent);
|
|
672
|
+
if (detail) {
|
|
673
|
+
return detail.replace("response failed", "error event");
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
const meta: string[] = [];
|
|
677
|
+
if (code) meta.push(`code=${code}`);
|
|
678
|
+
if (message) meta.push(`message=${message}`);
|
|
679
|
+
|
|
680
|
+
if (meta.length > 0) {
|
|
681
|
+
return `Codex error event (${meta.join(", ")})`;
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
try {
|
|
685
|
+
return `Codex error event: ${truncate(JSON.stringify(rawEvent), 800)}`;
|
|
686
|
+
} catch {
|
|
687
|
+
return "Codex error event";
|
|
688
|
+
}
|
|
689
|
+
}
|
|
@@ -7,7 +7,7 @@ import type {
|
|
|
7
7
|
ChatCompletionContentPartText,
|
|
8
8
|
ChatCompletionMessageParam,
|
|
9
9
|
ChatCompletionToolMessageParam,
|
|
10
|
-
} from "openai/resources/chat/completions
|
|
10
|
+
} from "openai/resources/chat/completions";
|
|
11
11
|
import { calculateCost } from "../models";
|
|
12
12
|
import { getEnvApiKey } from "../stream";
|
|
13
13
|
import type {
|
|
@@ -26,6 +26,7 @@ import type {
|
|
|
26
26
|
} from "../types";
|
|
27
27
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
28
28
|
import { parseStreamingJson } from "../utils/json-parse";
|
|
29
|
+
import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
|
|
29
30
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode";
|
|
30
31
|
import { transformMessages } from "./transorm-messages";
|
|
31
32
|
|
|
@@ -196,34 +197,44 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
196
197
|
|
|
197
198
|
// Some endpoints return reasoning in reasoning_content (llama.cpp),
|
|
198
199
|
// or reasoning (other openai compatible endpoints)
|
|
200
|
+
// Use the first non-empty reasoning field to avoid duplication
|
|
201
|
+
// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
|
|
199
202
|
const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
|
|
203
|
+
let foundReasoningField: string | null = null;
|
|
200
204
|
for (const field of reasoningFields) {
|
|
201
205
|
if (
|
|
202
206
|
(choice.delta as any)[field] !== null &&
|
|
203
207
|
(choice.delta as any)[field] !== undefined &&
|
|
204
208
|
(choice.delta as any)[field].length > 0
|
|
205
209
|
) {
|
|
206
|
-
if (!
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
type: "thinking",
|
|
210
|
-
thinking: "",
|
|
211
|
-
thinkingSignature: field,
|
|
212
|
-
};
|
|
213
|
-
output.content.push(currentBlock);
|
|
214
|
-
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
|
210
|
+
if (!foundReasoningField) {
|
|
211
|
+
foundReasoningField = field;
|
|
212
|
+
break;
|
|
215
213
|
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
216
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
}
|
|
217
|
+
if (foundReasoningField) {
|
|
218
|
+
if (!currentBlock || currentBlock.type !== "thinking") {
|
|
219
|
+
finishCurrentBlock(currentBlock);
|
|
220
|
+
currentBlock = {
|
|
221
|
+
type: "thinking",
|
|
222
|
+
thinking: "",
|
|
223
|
+
thinkingSignature: foundReasoningField,
|
|
224
|
+
};
|
|
225
|
+
output.content.push(currentBlock);
|
|
226
|
+
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
if (currentBlock.type === "thinking") {
|
|
230
|
+
const delta = (choice.delta as any)[foundReasoningField];
|
|
231
|
+
currentBlock.thinking += delta;
|
|
232
|
+
stream.push({
|
|
233
|
+
type: "thinking_delta",
|
|
234
|
+
contentIndex: blockIndex(),
|
|
235
|
+
delta,
|
|
236
|
+
partial: output,
|
|
237
|
+
});
|
|
227
238
|
}
|
|
228
239
|
}
|
|
229
240
|
|
|
@@ -296,7 +307,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
296
307
|
} catch (error) {
|
|
297
308
|
for (const block of output.content) delete (block as any).index;
|
|
298
309
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
299
|
-
output.errorMessage =
|
|
310
|
+
output.errorMessage = formatErrorMessageWithRetryAfter(error);
|
|
300
311
|
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
301
312
|
stream.end();
|
|
302
313
|
}
|
|
@@ -480,10 +491,8 @@ function convertMessages(
|
|
|
480
491
|
const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
|
|
481
492
|
if (nonEmptyThinkingBlocks.length > 0) {
|
|
482
493
|
if (compat.requiresThinkingAsText) {
|
|
483
|
-
// Convert thinking blocks to text
|
|
484
|
-
const thinkingText = nonEmptyThinkingBlocks
|
|
485
|
-
.map((b) => `<thinking>\n${b.thinking}\n</thinking>`)
|
|
486
|
-
.join("\n");
|
|
494
|
+
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
|
|
495
|
+
const thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n\n");
|
|
487
496
|
const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
|
|
488
497
|
if (textContent) {
|
|
489
498
|
textContent.unshift({ type: "text", text: thinkingText });
|
|
@@ -633,8 +642,7 @@ function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
|
|
|
633
642
|
baseUrl.includes("cerebras.ai") ||
|
|
634
643
|
baseUrl.includes("api.x.ai") ||
|
|
635
644
|
baseUrl.includes("mistral.ai") ||
|
|
636
|
-
baseUrl.includes("chutes.ai")
|
|
637
|
-
baseUrl.includes("localhost");
|
|
645
|
+
baseUrl.includes("chutes.ai");
|
|
638
646
|
|
|
639
647
|
const useMaxTokens = baseUrl.includes("mistral.ai") || baseUrl.includes("chutes.ai");
|
|
640
648
|
|
|
@@ -9,7 +9,7 @@ import type {
|
|
|
9
9
|
ResponseInputText,
|
|
10
10
|
ResponseOutputMessage,
|
|
11
11
|
ResponseReasoningItem,
|
|
12
|
-
} from "openai/resources/responses/responses
|
|
12
|
+
} from "openai/resources/responses/responses";
|
|
13
13
|
import { calculateCost } from "../models";
|
|
14
14
|
import { getEnvApiKey } from "../stream";
|
|
15
15
|
import type {
|
|
@@ -27,6 +27,7 @@ import type {
|
|
|
27
27
|
} from "../types";
|
|
28
28
|
import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
29
29
|
import { parseStreamingJson } from "../utils/json-parse";
|
|
30
|
+
import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
|
|
30
31
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode";
|
|
31
32
|
import { transformMessages } from "./transorm-messages";
|
|
32
33
|
|
|
@@ -303,7 +304,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
|
|
|
303
304
|
} catch (error) {
|
|
304
305
|
for (const block of output.content) delete (block as any).index;
|
|
305
306
|
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
|
|
306
|
-
output.errorMessage =
|
|
307
|
+
output.errorMessage = formatErrorMessageWithRetryAfter(error);
|
|
307
308
|
stream.push({ type: "error", reason: output.stopReason, error: output });
|
|
308
309
|
stream.end();
|
|
309
310
|
}
|
|
@@ -45,12 +45,13 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
|
|
|
45
45
|
assistantMsg.api !== model.api;
|
|
46
46
|
|
|
47
47
|
// Transform message from different provider/model
|
|
48
|
-
const transformedContent = assistantMsg.content.
|
|
48
|
+
const transformedContent = assistantMsg.content.flatMap((block) => {
|
|
49
49
|
if (block.type === "thinking") {
|
|
50
|
-
//
|
|
50
|
+
// Skip empty thinking blocks, convert others to plain text
|
|
51
|
+
if (!block.thinking || block.thinking.trim() === "") return [];
|
|
51
52
|
return {
|
|
52
53
|
type: "text" as const,
|
|
53
|
-
text:
|
|
54
|
+
text: block.thinking,
|
|
54
55
|
};
|
|
55
56
|
}
|
|
56
57
|
// Normalize tool call IDs for github-copilot cross-API switches
|
package/src/stream.ts
CHANGED
|
@@ -21,8 +21,9 @@ import type {
|
|
|
21
21
|
KnownProvider,
|
|
22
22
|
Model,
|
|
23
23
|
OptionsForApi,
|
|
24
|
-
ReasoningEffort,
|
|
25
24
|
SimpleStreamOptions,
|
|
25
|
+
ThinkingBudgets,
|
|
26
|
+
ThinkingLevel,
|
|
26
27
|
} from "./types";
|
|
27
28
|
|
|
28
29
|
const VERTEX_ADC_CREDENTIALS_PATH = join(homedir(), ".config", "gcloud", "application_default_credentials.json");
|
|
@@ -64,7 +65,6 @@ export function getEnvApiKey(provider: any): string | undefined {
|
|
|
64
65
|
if (hasCredentials && hasProject && hasLocation) {
|
|
65
66
|
return "<authenticated>";
|
|
66
67
|
}
|
|
67
|
-
return undefined;
|
|
68
68
|
}
|
|
69
69
|
|
|
70
70
|
const envMap: Record<string, string> = {
|
|
@@ -76,6 +76,7 @@ export function getEnvApiKey(provider: any): string | undefined {
|
|
|
76
76
|
openrouter: "OPENROUTER_API_KEY",
|
|
77
77
|
zai: "ZAI_API_KEY",
|
|
78
78
|
mistral: "MISTRAL_API_KEY",
|
|
79
|
+
opencode: "OPENCODE_API_KEY",
|
|
79
80
|
};
|
|
80
81
|
|
|
81
82
|
const envVar = envMap[provider];
|
|
@@ -178,10 +179,11 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
178
179
|
maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
|
|
179
180
|
signal: options?.signal,
|
|
180
181
|
apiKey: apiKey || options?.apiKey,
|
|
182
|
+
sessionId: options?.sessionId,
|
|
181
183
|
};
|
|
182
184
|
|
|
183
185
|
// Helper to clamp xhigh to high for providers that don't support it
|
|
184
|
-
const clampReasoning = (effort:
|
|
186
|
+
const clampReasoning = (effort: ThinkingLevel | undefined) => (effort === "xhigh" ? "high" : effort);
|
|
185
187
|
|
|
186
188
|
switch (model.api) {
|
|
187
189
|
case "anthropic-messages": {
|
|
@@ -192,15 +194,17 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
192
194
|
|
|
193
195
|
// Claude requires max_tokens > thinking.budget_tokens
|
|
194
196
|
// So we need to ensure maxTokens accounts for both thinking and output
|
|
195
|
-
const
|
|
197
|
+
const defaultBudgets: ThinkingBudgets = {
|
|
196
198
|
minimal: 1024,
|
|
197
199
|
low: 2048,
|
|
198
200
|
medium: 8192,
|
|
199
201
|
high: 16384,
|
|
200
202
|
};
|
|
203
|
+
const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
|
|
201
204
|
|
|
202
205
|
const minOutputTokens = 1024;
|
|
203
|
-
|
|
206
|
+
const level = clampReasoning(options.reasoning)!;
|
|
207
|
+
let thinkingBudget = budgets[level]!;
|
|
204
208
|
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
|
205
209
|
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
|
206
210
|
|
|
@@ -261,7 +265,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
261
265
|
...base,
|
|
262
266
|
thinking: {
|
|
263
267
|
enabled: true,
|
|
264
|
-
budgetTokens: getGoogleBudget(googleModel, effort),
|
|
268
|
+
budgetTokens: getGoogleBudget(googleModel, effort, options?.thinkingBudgets),
|
|
265
269
|
},
|
|
266
270
|
} satisfies GoogleOptions;
|
|
267
271
|
}
|
|
@@ -287,15 +291,16 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
287
291
|
// Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
|
|
288
292
|
// Claude requires max_tokens > thinking.budget_tokens
|
|
289
293
|
// So we need to ensure maxTokens accounts for both thinking and output
|
|
290
|
-
const
|
|
294
|
+
const defaultBudgets: ThinkingBudgets = {
|
|
291
295
|
minimal: 1024,
|
|
292
296
|
low: 2048,
|
|
293
297
|
medium: 8192,
|
|
294
298
|
high: 16384,
|
|
295
299
|
};
|
|
300
|
+
const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
|
|
296
301
|
|
|
297
302
|
const minOutputTokens = 1024;
|
|
298
|
-
let thinkingBudget = budgets[effort]
|
|
303
|
+
let thinkingBudget = budgets[effort]!;
|
|
299
304
|
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
|
300
305
|
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
|
301
306
|
|
|
@@ -316,22 +321,20 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
316
321
|
|
|
317
322
|
case "google-vertex": {
|
|
318
323
|
// Explicitly disable thinking when reasoning is not specified
|
|
319
|
-
// This is needed because Gemini has "dynamic thinking" enabled by default
|
|
320
324
|
if (!options?.reasoning) {
|
|
321
325
|
return { ...base, thinking: { enabled: false } } satisfies GoogleVertexOptions;
|
|
322
326
|
}
|
|
323
327
|
|
|
324
|
-
const
|
|
328
|
+
const vertexModel = model as Model<"google-vertex">;
|
|
325
329
|
const effort = clampReasoning(options.reasoning)!;
|
|
330
|
+
const geminiModel = vertexModel as unknown as Model<"google-generative-ai">;
|
|
326
331
|
|
|
327
|
-
|
|
328
|
-
// https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
329
|
-
if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
|
|
332
|
+
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
|
|
330
333
|
return {
|
|
331
334
|
...base,
|
|
332
335
|
thinking: {
|
|
333
336
|
enabled: true,
|
|
334
|
-
level: getGemini3ThinkingLevel(effort,
|
|
337
|
+
level: getGemini3ThinkingLevel(effort, geminiModel),
|
|
335
338
|
},
|
|
336
339
|
} satisfies GoogleVertexOptions;
|
|
337
340
|
}
|
|
@@ -340,7 +343,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
340
343
|
...base,
|
|
341
344
|
thinking: {
|
|
342
345
|
enabled: true,
|
|
343
|
-
budgetTokens: getGoogleBudget(
|
|
346
|
+
budgetTokens: getGoogleBudget(geminiModel, effort, options?.thinkingBudgets),
|
|
344
347
|
},
|
|
345
348
|
} satisfies GoogleVertexOptions;
|
|
346
349
|
}
|
|
@@ -353,21 +356,21 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
353
356
|
}
|
|
354
357
|
}
|
|
355
358
|
|
|
356
|
-
type
|
|
359
|
+
type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh">;
|
|
357
360
|
|
|
358
|
-
function isGemini3ProModel(model: Model<"google-generative-ai">
|
|
361
|
+
function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
|
|
359
362
|
// Covers gemini-3-pro, gemini-3-pro-preview, and possible other prefixed ids in the future
|
|
360
363
|
return model.id.includes("3-pro");
|
|
361
364
|
}
|
|
362
365
|
|
|
363
|
-
function isGemini3FlashModel(model: Model<"google-generative-ai">
|
|
366
|
+
function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
|
|
364
367
|
// Covers gemini-3-flash, gemini-3-flash-preview, and possible other prefixed ids in the future
|
|
365
368
|
return model.id.includes("3-flash");
|
|
366
369
|
}
|
|
367
370
|
|
|
368
371
|
function getGemini3ThinkingLevel(
|
|
369
|
-
effort:
|
|
370
|
-
model: Model<"google-generative-ai"
|
|
372
|
+
effort: ClampedThinkingLevel,
|
|
373
|
+
model: Model<"google-generative-ai">,
|
|
371
374
|
): GoogleThinkingLevel {
|
|
372
375
|
if (isGemini3ProModel(model)) {
|
|
373
376
|
// Gemini 3 Pro only supports LOW/HIGH (for now)
|
|
@@ -393,7 +396,7 @@ function getGemini3ThinkingLevel(
|
|
|
393
396
|
}
|
|
394
397
|
}
|
|
395
398
|
|
|
396
|
-
function getGeminiCliThinkingLevel(effort:
|
|
399
|
+
function getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string): GoogleThinkingLevel {
|
|
397
400
|
if (modelId.includes("3-pro")) {
|
|
398
401
|
// Gemini 3 Pro only supports LOW/HIGH (for now)
|
|
399
402
|
switch (effort) {
|
|
@@ -419,12 +422,18 @@ function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: stri
|
|
|
419
422
|
}
|
|
420
423
|
|
|
421
424
|
function getGoogleBudget(
|
|
422
|
-
model: Model<"google-generative-ai"
|
|
423
|
-
effort:
|
|
425
|
+
model: Model<"google-generative-ai">,
|
|
426
|
+
effort: ClampedThinkingLevel,
|
|
427
|
+
customBudgets?: ThinkingBudgets,
|
|
424
428
|
): number {
|
|
429
|
+
// Custom budgets take precedence if provided for this level
|
|
430
|
+
if (customBudgets?.[effort] !== undefined) {
|
|
431
|
+
return customBudgets[effort]!;
|
|
432
|
+
}
|
|
433
|
+
|
|
425
434
|
// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
426
435
|
if (model.id.includes("2.5-pro")) {
|
|
427
|
-
const budgets: Record<
|
|
436
|
+
const budgets: Record<ClampedThinkingLevel, number> = {
|
|
428
437
|
minimal: 128,
|
|
429
438
|
low: 2048,
|
|
430
439
|
medium: 8192,
|
|
@@ -435,7 +444,7 @@ function getGoogleBudget(
|
|
|
435
444
|
|
|
436
445
|
if (model.id.includes("2.5-flash")) {
|
|
437
446
|
// Covers 2.5-flash-lite as well
|
|
438
|
-
const budgets: Record<
|
|
447
|
+
const budgets: Record<ClampedThinkingLevel, number> = {
|
|
439
448
|
minimal: 128,
|
|
440
449
|
low: 2048,
|
|
441
450
|
medium: 8192,
|
package/src/types.ts
CHANGED
|
@@ -54,10 +54,19 @@ export type KnownProvider =
|
|
|
54
54
|
| "cerebras"
|
|
55
55
|
| "openrouter"
|
|
56
56
|
| "zai"
|
|
57
|
-
| "mistral"
|
|
57
|
+
| "mistral"
|
|
58
|
+
| "opencode";
|
|
58
59
|
export type Provider = KnownProvider | string;
|
|
59
60
|
|
|
60
|
-
export type
|
|
61
|
+
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
62
|
+
|
|
63
|
+
/** Token budgets for each thinking level (token-based providers only) */
|
|
64
|
+
export interface ThinkingBudgets {
|
|
65
|
+
minimal?: number;
|
|
66
|
+
low?: number;
|
|
67
|
+
medium?: number;
|
|
68
|
+
high?: number;
|
|
69
|
+
}
|
|
61
70
|
|
|
62
71
|
// Base options all providers share
|
|
63
72
|
export interface StreamOptions {
|
|
@@ -65,11 +74,19 @@ export interface StreamOptions {
|
|
|
65
74
|
maxTokens?: number;
|
|
66
75
|
signal?: AbortSignal;
|
|
67
76
|
apiKey?: string;
|
|
77
|
+
/**
|
|
78
|
+
* Optional session identifier for providers that support session-based caching.
|
|
79
|
+
* Providers can use this to enable prompt caching, request routing, or other
|
|
80
|
+
* session-aware features. Ignored by providers that don't support it.
|
|
81
|
+
*/
|
|
82
|
+
sessionId?: string;
|
|
68
83
|
}
|
|
69
84
|
|
|
70
85
|
// Unified options with reasoning passed to streamSimple() and completeSimple()
|
|
71
86
|
export interface SimpleStreamOptions extends StreamOptions {
|
|
72
|
-
reasoning?:
|
|
87
|
+
reasoning?: ThinkingLevel;
|
|
88
|
+
/** Custom token budgets for thinking levels (token-based providers only) */
|
|
89
|
+
thinkingBudgets?: ThinkingBudgets;
|
|
73
90
|
}
|
|
74
91
|
|
|
75
92
|
// Generic StreamFunction with typed options
|
|
@@ -146,7 +163,7 @@ export interface ToolResultMessage<TDetails = any> {
|
|
|
146
163
|
toolName: string;
|
|
147
164
|
content: (TextContent | ImageContent)[]; // Supports text and images
|
|
148
165
|
details?: TDetails;
|
|
149
|
-
isError
|
|
166
|
+
isError: boolean;
|
|
150
167
|
timestamp: number; // Unix timestamp in milliseconds
|
|
151
168
|
}
|
|
152
169
|
|