@oh-my-pi/pi-ai 3.20.1 → 3.34.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +69 -12
- package/package.json +3 -10
- package/src/cli.ts +89 -89
- package/src/index.ts +2 -2
- package/src/models.generated.ts +949 -178
- package/src/models.ts +11 -17
- package/src/providers/anthropic.ts +92 -28
- package/src/providers/google-gemini-cli.ts +268 -133
- package/src/providers/google-shared.ts +48 -5
- package/src/providers/google-vertex.ts +13 -3
- package/src/providers/google.ts +13 -3
- package/src/providers/openai-codex/index.ts +7 -0
- package/src/providers/openai-codex/prompts/codex.ts +26 -59
- package/src/providers/openai-codex/prompts/pi-codex-bridge.ts +38 -31
- package/src/providers/openai-codex/prompts/system-prompt.ts +26 -0
- package/src/providers/openai-codex/request-transformer.ts +38 -203
- package/src/providers/openai-codex-responses.ts +91 -24
- package/src/providers/openai-completions.ts +33 -26
- package/src/providers/openai-responses.ts +1 -1
- package/src/providers/transorm-messages.ts +4 -3
- package/src/stream.ts +34 -25
- package/src/types.ts +21 -4
- package/src/utils/oauth/github-copilot.ts +38 -3
- package/src/utils/oauth/google-antigravity.ts +146 -55
- package/src/utils/oauth/google-gemini-cli.ts +146 -55
- package/src/utils/oauth/index.ts +5 -5
- package/src/utils/oauth/openai-codex.ts +129 -54
- package/src/utils/overflow.ts +1 -1
- package/src/bun-imports.d.ts +0 -14
|
@@ -6,7 +6,7 @@ import type {
|
|
|
6
6
|
ResponseInputText,
|
|
7
7
|
ResponseOutputMessage,
|
|
8
8
|
ResponseReasoningItem,
|
|
9
|
-
} from "openai/resources/responses/responses
|
|
9
|
+
} from "openai/resources/responses/responses";
|
|
10
10
|
import { calculateCost } from "../models";
|
|
11
11
|
import { getEnvApiKey } from "../stream";
|
|
12
12
|
import type {
|
|
@@ -33,12 +33,9 @@ import {
|
|
|
33
33
|
URL_PATHS,
|
|
34
34
|
} from "./openai-codex/constants";
|
|
35
35
|
import { getCodexInstructions } from "./openai-codex/prompts/codex";
|
|
36
|
-
import {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
type RequestBody,
|
|
40
|
-
transformRequestBody,
|
|
41
|
-
} from "./openai-codex/request-transformer";
|
|
36
|
+
import { buildCodexPiBridge } from "./openai-codex/prompts/pi-codex-bridge";
|
|
37
|
+
import { buildCodexSystemPrompt } from "./openai-codex/prompts/system-prompt";
|
|
38
|
+
import { type CodexRequestOptions, type RequestBody, transformRequestBody } from "./openai-codex/request-transformer";
|
|
42
39
|
import { parseCodexError, parseCodexSseStream } from "./openai-codex/response-handler";
|
|
43
40
|
import { transformMessages } from "./transorm-messages";
|
|
44
41
|
|
|
@@ -94,6 +91,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
94
91
|
model: model.id,
|
|
95
92
|
input: messages,
|
|
96
93
|
stream: true,
|
|
94
|
+
prompt_cache_key: options?.sessionId,
|
|
97
95
|
};
|
|
98
96
|
|
|
99
97
|
if (options?.maxTokens) {
|
|
@@ -108,8 +106,15 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
108
106
|
params.tools = convertTools(context.tools);
|
|
109
107
|
}
|
|
110
108
|
|
|
111
|
-
const
|
|
112
|
-
const
|
|
109
|
+
const codexInstructions = await getCodexInstructions(params.model);
|
|
110
|
+
const bridgeText = buildCodexPiBridge(context.tools);
|
|
111
|
+
const systemPrompt = buildCodexSystemPrompt({
|
|
112
|
+
codexInstructions,
|
|
113
|
+
bridgeText,
|
|
114
|
+
userSystemPrompt: context.systemPrompt,
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
params.instructions = systemPrompt.instructions;
|
|
113
118
|
|
|
114
119
|
const codexOptions: CodexRequestOptions = {
|
|
115
120
|
reasoningEffort: options?.reasoningEffort,
|
|
@@ -118,17 +123,14 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
118
123
|
include: options?.include,
|
|
119
124
|
};
|
|
120
125
|
|
|
121
|
-
const transformedBody = await transformRequestBody(
|
|
122
|
-
params,
|
|
123
|
-
codexInstructions,
|
|
124
|
-
codexOptions,
|
|
125
|
-
options?.codexMode ?? true,
|
|
126
|
-
);
|
|
126
|
+
const transformedBody = await transformRequestBody(params, codexOptions, systemPrompt);
|
|
127
127
|
|
|
128
|
-
const
|
|
128
|
+
const reasoningEffort = transformedBody.reasoning?.effort ?? null;
|
|
129
|
+
const headers = createCodexHeaders(model.headers, accountId, apiKey, options?.sessionId);
|
|
129
130
|
logCodexDebug("codex request", {
|
|
130
131
|
url,
|
|
131
132
|
model: params.model,
|
|
133
|
+
reasoningEffort,
|
|
132
134
|
headers: redactHeaders(headers),
|
|
133
135
|
});
|
|
134
136
|
|
|
@@ -340,10 +342,10 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
340
342
|
}
|
|
341
343
|
} else if (eventType === "error") {
|
|
342
344
|
const code = (rawEvent as { code?: string }).code || "";
|
|
343
|
-
const message = (rawEvent as { message?: string }).message || "
|
|
344
|
-
throw new Error(
|
|
345
|
+
const message = (rawEvent as { message?: string }).message || "";
|
|
346
|
+
throw new Error(formatCodexErrorEvent(rawEvent, code, message));
|
|
345
347
|
} else if (eventType === "response.failed") {
|
|
346
|
-
throw new Error("
|
|
348
|
+
throw new Error(formatCodexFailure(rawEvent) ?? "Codex response failed");
|
|
347
349
|
}
|
|
348
350
|
}
|
|
349
351
|
|
|
@@ -352,7 +354,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
352
354
|
}
|
|
353
355
|
|
|
354
356
|
if (output.stopReason === "aborted" || output.stopReason === "error") {
|
|
355
|
-
throw new Error("
|
|
357
|
+
throw new Error("Codex response failed");
|
|
356
358
|
}
|
|
357
359
|
|
|
358
360
|
stream.push({ type: "done", reason: output.stopReason, message: output });
|
|
@@ -406,11 +408,11 @@ function logCodexDebug(message: string, details?: Record<string, unknown>): void
|
|
|
406
408
|
|
|
407
409
|
function redactHeaders(headers: Headers): Record<string, string> {
|
|
408
410
|
const redacted: Record<string, string> = {};
|
|
409
|
-
headers.
|
|
411
|
+
for (const [key, value] of headers.entries()) {
|
|
410
412
|
const lower = key.toLowerCase();
|
|
411
413
|
if (lower === "authorization") {
|
|
412
414
|
redacted[key] = "Bearer [redacted]";
|
|
413
|
-
|
|
415
|
+
continue;
|
|
414
416
|
}
|
|
415
417
|
if (
|
|
416
418
|
lower.includes("account") ||
|
|
@@ -419,10 +421,10 @@ function redactHeaders(headers: Headers): Record<string, string> {
|
|
|
419
421
|
lower === "cookie"
|
|
420
422
|
) {
|
|
421
423
|
redacted[key] = "[redacted]";
|
|
422
|
-
|
|
424
|
+
continue;
|
|
423
425
|
}
|
|
424
426
|
redacted[key] = value;
|
|
425
|
-
}
|
|
427
|
+
}
|
|
426
428
|
return redacted;
|
|
427
429
|
}
|
|
428
430
|
|
|
@@ -617,3 +619,68 @@ function mapStopReason(status: string | undefined): StopReason {
|
|
|
617
619
|
return "stop";
|
|
618
620
|
}
|
|
619
621
|
}
|
|
622
|
+
|
|
623
|
+
function asRecord(value: unknown): Record<string, unknown> | null {
|
|
624
|
+
if (value && typeof value === "object") {
|
|
625
|
+
return value as Record<string, unknown>;
|
|
626
|
+
}
|
|
627
|
+
return null;
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
function getString(value: unknown): string | undefined {
|
|
631
|
+
return typeof value === "string" ? value : undefined;
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
function truncate(text: string, limit: number): string {
|
|
635
|
+
if (text.length <= limit) return text;
|
|
636
|
+
return `${text.slice(0, limit)}...[truncated ${text.length - limit}]`;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
function formatCodexFailure(rawEvent: Record<string, unknown>): string | null {
|
|
640
|
+
const response = asRecord(rawEvent.response);
|
|
641
|
+
const error = asRecord(rawEvent.error) ?? (response ? asRecord(response.error) : null);
|
|
642
|
+
|
|
643
|
+
const message = getString(error?.message) ?? getString(rawEvent.message) ?? getString(response?.message);
|
|
644
|
+
const code = getString(error?.code) ?? getString(error?.type) ?? getString(rawEvent.code);
|
|
645
|
+
const status = getString(response?.status) ?? getString(rawEvent.status);
|
|
646
|
+
|
|
647
|
+
const meta: string[] = [];
|
|
648
|
+
if (code) meta.push(`code=${code}`);
|
|
649
|
+
if (status) meta.push(`status=${status}`);
|
|
650
|
+
|
|
651
|
+
if (message) {
|
|
652
|
+
const metaText = meta.length ? ` (${meta.join(", ")})` : "";
|
|
653
|
+
return `Codex response failed: ${message}${metaText}`;
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
if (meta.length) {
|
|
657
|
+
return `Codex response failed (${meta.join(", ")})`;
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
try {
|
|
661
|
+
return `Codex response failed: ${truncate(JSON.stringify(rawEvent), 800)}`;
|
|
662
|
+
} catch {
|
|
663
|
+
return "Codex response failed";
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
function formatCodexErrorEvent(rawEvent: Record<string, unknown>, code: string, message: string): string {
|
|
668
|
+
const detail = formatCodexFailure(rawEvent);
|
|
669
|
+
if (detail) {
|
|
670
|
+
return detail.replace("response failed", "error event");
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
const meta: string[] = [];
|
|
674
|
+
if (code) meta.push(`code=${code}`);
|
|
675
|
+
if (message) meta.push(`message=${message}`);
|
|
676
|
+
|
|
677
|
+
if (meta.length > 0) {
|
|
678
|
+
return `Codex error event (${meta.join(", ")})`;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
try {
|
|
682
|
+
return `Codex error event: ${truncate(JSON.stringify(rawEvent), 800)}`;
|
|
683
|
+
} catch {
|
|
684
|
+
return "Codex error event";
|
|
685
|
+
}
|
|
686
|
+
}
|
|
@@ -7,7 +7,7 @@ import type {
|
|
|
7
7
|
ChatCompletionContentPartText,
|
|
8
8
|
ChatCompletionMessageParam,
|
|
9
9
|
ChatCompletionToolMessageParam,
|
|
10
|
-
} from "openai/resources/chat/completions
|
|
10
|
+
} from "openai/resources/chat/completions";
|
|
11
11
|
import { calculateCost } from "../models";
|
|
12
12
|
import { getEnvApiKey } from "../stream";
|
|
13
13
|
import type {
|
|
@@ -196,34 +196,44 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
|
|
|
196
196
|
|
|
197
197
|
// Some endpoints return reasoning in reasoning_content (llama.cpp),
|
|
198
198
|
// or reasoning (other openai compatible endpoints)
|
|
199
|
+
// Use the first non-empty reasoning field to avoid duplication
|
|
200
|
+
// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
|
|
199
201
|
const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
|
|
202
|
+
let foundReasoningField: string | null = null;
|
|
200
203
|
for (const field of reasoningFields) {
|
|
201
204
|
if (
|
|
202
205
|
(choice.delta as any)[field] !== null &&
|
|
203
206
|
(choice.delta as any)[field] !== undefined &&
|
|
204
207
|
(choice.delta as any)[field].length > 0
|
|
205
208
|
) {
|
|
206
|
-
if (!
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
type: "thinking",
|
|
210
|
-
thinking: "",
|
|
211
|
-
thinkingSignature: field,
|
|
212
|
-
};
|
|
213
|
-
output.content.push(currentBlock);
|
|
214
|
-
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
|
209
|
+
if (!foundReasoningField) {
|
|
210
|
+
foundReasoningField = field;
|
|
211
|
+
break;
|
|
215
212
|
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
216
215
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
}
|
|
216
|
+
if (foundReasoningField) {
|
|
217
|
+
if (!currentBlock || currentBlock.type !== "thinking") {
|
|
218
|
+
finishCurrentBlock(currentBlock);
|
|
219
|
+
currentBlock = {
|
|
220
|
+
type: "thinking",
|
|
221
|
+
thinking: "",
|
|
222
|
+
thinkingSignature: foundReasoningField,
|
|
223
|
+
};
|
|
224
|
+
output.content.push(currentBlock);
|
|
225
|
+
stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
if (currentBlock.type === "thinking") {
|
|
229
|
+
const delta = (choice.delta as any)[foundReasoningField];
|
|
230
|
+
currentBlock.thinking += delta;
|
|
231
|
+
stream.push({
|
|
232
|
+
type: "thinking_delta",
|
|
233
|
+
contentIndex: blockIndex(),
|
|
234
|
+
delta,
|
|
235
|
+
partial: output,
|
|
236
|
+
});
|
|
227
237
|
}
|
|
228
238
|
}
|
|
229
239
|
|
|
@@ -480,10 +490,8 @@ function convertMessages(
|
|
|
480
490
|
const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
|
|
481
491
|
if (nonEmptyThinkingBlocks.length > 0) {
|
|
482
492
|
if (compat.requiresThinkingAsText) {
|
|
483
|
-
// Convert thinking blocks to text
|
|
484
|
-
const thinkingText = nonEmptyThinkingBlocks
|
|
485
|
-
.map((b) => `<thinking>\n${b.thinking}\n</thinking>`)
|
|
486
|
-
.join("\n");
|
|
493
|
+
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
|
|
494
|
+
const thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n\n");
|
|
487
495
|
const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
|
|
488
496
|
if (textContent) {
|
|
489
497
|
textContent.unshift({ type: "text", text: thinkingText });
|
|
@@ -633,8 +641,7 @@ function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
|
|
|
633
641
|
baseUrl.includes("cerebras.ai") ||
|
|
634
642
|
baseUrl.includes("api.x.ai") ||
|
|
635
643
|
baseUrl.includes("mistral.ai") ||
|
|
636
|
-
baseUrl.includes("chutes.ai")
|
|
637
|
-
baseUrl.includes("localhost");
|
|
644
|
+
baseUrl.includes("chutes.ai");
|
|
638
645
|
|
|
639
646
|
const useMaxTokens = baseUrl.includes("mistral.ai") || baseUrl.includes("chutes.ai");
|
|
640
647
|
|
|
@@ -9,7 +9,7 @@ import type {
|
|
|
9
9
|
ResponseInputText,
|
|
10
10
|
ResponseOutputMessage,
|
|
11
11
|
ResponseReasoningItem,
|
|
12
|
-
} from "openai/resources/responses/responses
|
|
12
|
+
} from "openai/resources/responses/responses";
|
|
13
13
|
import { calculateCost } from "../models";
|
|
14
14
|
import { getEnvApiKey } from "../stream";
|
|
15
15
|
import type {
|
|
@@ -45,12 +45,13 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
|
|
|
45
45
|
assistantMsg.api !== model.api;
|
|
46
46
|
|
|
47
47
|
// Transform message from different provider/model
|
|
48
|
-
const transformedContent = assistantMsg.content.
|
|
48
|
+
const transformedContent = assistantMsg.content.flatMap((block) => {
|
|
49
49
|
if (block.type === "thinking") {
|
|
50
|
-
//
|
|
50
|
+
// Skip empty thinking blocks, convert others to plain text
|
|
51
|
+
if (!block.thinking || block.thinking.trim() === "") return [];
|
|
51
52
|
return {
|
|
52
53
|
type: "text" as const,
|
|
53
|
-
text:
|
|
54
|
+
text: block.thinking,
|
|
54
55
|
};
|
|
55
56
|
}
|
|
56
57
|
// Normalize tool call IDs for github-copilot cross-API switches
|
package/src/stream.ts
CHANGED
|
@@ -21,8 +21,9 @@ import type {
|
|
|
21
21
|
KnownProvider,
|
|
22
22
|
Model,
|
|
23
23
|
OptionsForApi,
|
|
24
|
-
ReasoningEffort,
|
|
25
24
|
SimpleStreamOptions,
|
|
25
|
+
ThinkingBudgets,
|
|
26
|
+
ThinkingLevel,
|
|
26
27
|
} from "./types";
|
|
27
28
|
|
|
28
29
|
const VERTEX_ADC_CREDENTIALS_PATH = join(homedir(), ".config", "gcloud", "application_default_credentials.json");
|
|
@@ -64,7 +65,6 @@ export function getEnvApiKey(provider: any): string | undefined {
|
|
|
64
65
|
if (hasCredentials && hasProject && hasLocation) {
|
|
65
66
|
return "<authenticated>";
|
|
66
67
|
}
|
|
67
|
-
return undefined;
|
|
68
68
|
}
|
|
69
69
|
|
|
70
70
|
const envMap: Record<string, string> = {
|
|
@@ -76,6 +76,7 @@ export function getEnvApiKey(provider: any): string | undefined {
|
|
|
76
76
|
openrouter: "OPENROUTER_API_KEY",
|
|
77
77
|
zai: "ZAI_API_KEY",
|
|
78
78
|
mistral: "MISTRAL_API_KEY",
|
|
79
|
+
opencode: "OPENCODE_API_KEY",
|
|
79
80
|
};
|
|
80
81
|
|
|
81
82
|
const envVar = envMap[provider];
|
|
@@ -178,10 +179,11 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
178
179
|
maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
|
|
179
180
|
signal: options?.signal,
|
|
180
181
|
apiKey: apiKey || options?.apiKey,
|
|
182
|
+
sessionId: options?.sessionId,
|
|
181
183
|
};
|
|
182
184
|
|
|
183
185
|
// Helper to clamp xhigh to high for providers that don't support it
|
|
184
|
-
const clampReasoning = (effort:
|
|
186
|
+
const clampReasoning = (effort: ThinkingLevel | undefined) => (effort === "xhigh" ? "high" : effort);
|
|
185
187
|
|
|
186
188
|
switch (model.api) {
|
|
187
189
|
case "anthropic-messages": {
|
|
@@ -192,15 +194,17 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
192
194
|
|
|
193
195
|
// Claude requires max_tokens > thinking.budget_tokens
|
|
194
196
|
// So we need to ensure maxTokens accounts for both thinking and output
|
|
195
|
-
const
|
|
197
|
+
const defaultBudgets: ThinkingBudgets = {
|
|
196
198
|
minimal: 1024,
|
|
197
199
|
low: 2048,
|
|
198
200
|
medium: 8192,
|
|
199
201
|
high: 16384,
|
|
200
202
|
};
|
|
203
|
+
const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
|
|
201
204
|
|
|
202
205
|
const minOutputTokens = 1024;
|
|
203
|
-
|
|
206
|
+
const level = clampReasoning(options.reasoning)!;
|
|
207
|
+
let thinkingBudget = budgets[level]!;
|
|
204
208
|
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
|
205
209
|
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
|
206
210
|
|
|
@@ -261,7 +265,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
261
265
|
...base,
|
|
262
266
|
thinking: {
|
|
263
267
|
enabled: true,
|
|
264
|
-
budgetTokens: getGoogleBudget(googleModel, effort),
|
|
268
|
+
budgetTokens: getGoogleBudget(googleModel, effort, options?.thinkingBudgets),
|
|
265
269
|
},
|
|
266
270
|
} satisfies GoogleOptions;
|
|
267
271
|
}
|
|
@@ -287,15 +291,16 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
287
291
|
// Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
|
|
288
292
|
// Claude requires max_tokens > thinking.budget_tokens
|
|
289
293
|
// So we need to ensure maxTokens accounts for both thinking and output
|
|
290
|
-
const
|
|
294
|
+
const defaultBudgets: ThinkingBudgets = {
|
|
291
295
|
minimal: 1024,
|
|
292
296
|
low: 2048,
|
|
293
297
|
medium: 8192,
|
|
294
298
|
high: 16384,
|
|
295
299
|
};
|
|
300
|
+
const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
|
|
296
301
|
|
|
297
302
|
const minOutputTokens = 1024;
|
|
298
|
-
let thinkingBudget = budgets[effort]
|
|
303
|
+
let thinkingBudget = budgets[effort]!;
|
|
299
304
|
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
|
300
305
|
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
|
301
306
|
|
|
@@ -316,22 +321,20 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
316
321
|
|
|
317
322
|
case "google-vertex": {
|
|
318
323
|
// Explicitly disable thinking when reasoning is not specified
|
|
319
|
-
// This is needed because Gemini has "dynamic thinking" enabled by default
|
|
320
324
|
if (!options?.reasoning) {
|
|
321
325
|
return { ...base, thinking: { enabled: false } } satisfies GoogleVertexOptions;
|
|
322
326
|
}
|
|
323
327
|
|
|
324
|
-
const
|
|
328
|
+
const vertexModel = model as Model<"google-vertex">;
|
|
325
329
|
const effort = clampReasoning(options.reasoning)!;
|
|
330
|
+
const geminiModel = vertexModel as unknown as Model<"google-generative-ai">;
|
|
326
331
|
|
|
327
|
-
|
|
328
|
-
// https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
329
|
-
if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
|
|
332
|
+
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
|
|
330
333
|
return {
|
|
331
334
|
...base,
|
|
332
335
|
thinking: {
|
|
333
336
|
enabled: true,
|
|
334
|
-
level: getGemini3ThinkingLevel(effort,
|
|
337
|
+
level: getGemini3ThinkingLevel(effort, geminiModel),
|
|
335
338
|
},
|
|
336
339
|
} satisfies GoogleVertexOptions;
|
|
337
340
|
}
|
|
@@ -340,7 +343,7 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
340
343
|
...base,
|
|
341
344
|
thinking: {
|
|
342
345
|
enabled: true,
|
|
343
|
-
budgetTokens: getGoogleBudget(
|
|
346
|
+
budgetTokens: getGoogleBudget(geminiModel, effort, options?.thinkingBudgets),
|
|
344
347
|
},
|
|
345
348
|
} satisfies GoogleVertexOptions;
|
|
346
349
|
}
|
|
@@ -353,21 +356,21 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
353
356
|
}
|
|
354
357
|
}
|
|
355
358
|
|
|
356
|
-
type
|
|
359
|
+
type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh">;
|
|
357
360
|
|
|
358
|
-
function isGemini3ProModel(model: Model<"google-generative-ai">
|
|
361
|
+
function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
|
|
359
362
|
// Covers gemini-3-pro, gemini-3-pro-preview, and possible other prefixed ids in the future
|
|
360
363
|
return model.id.includes("3-pro");
|
|
361
364
|
}
|
|
362
365
|
|
|
363
|
-
function isGemini3FlashModel(model: Model<"google-generative-ai">
|
|
366
|
+
function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
|
|
364
367
|
// Covers gemini-3-flash, gemini-3-flash-preview, and possible other prefixed ids in the future
|
|
365
368
|
return model.id.includes("3-flash");
|
|
366
369
|
}
|
|
367
370
|
|
|
368
371
|
function getGemini3ThinkingLevel(
|
|
369
|
-
effort:
|
|
370
|
-
model: Model<"google-generative-ai"
|
|
372
|
+
effort: ClampedThinkingLevel,
|
|
373
|
+
model: Model<"google-generative-ai">,
|
|
371
374
|
): GoogleThinkingLevel {
|
|
372
375
|
if (isGemini3ProModel(model)) {
|
|
373
376
|
// Gemini 3 Pro only supports LOW/HIGH (for now)
|
|
@@ -393,7 +396,7 @@ function getGemini3ThinkingLevel(
|
|
|
393
396
|
}
|
|
394
397
|
}
|
|
395
398
|
|
|
396
|
-
function getGeminiCliThinkingLevel(effort:
|
|
399
|
+
function getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string): GoogleThinkingLevel {
|
|
397
400
|
if (modelId.includes("3-pro")) {
|
|
398
401
|
// Gemini 3 Pro only supports LOW/HIGH (for now)
|
|
399
402
|
switch (effort) {
|
|
@@ -419,12 +422,18 @@ function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: stri
|
|
|
419
422
|
}
|
|
420
423
|
|
|
421
424
|
function getGoogleBudget(
|
|
422
|
-
model: Model<"google-generative-ai"
|
|
423
|
-
effort:
|
|
425
|
+
model: Model<"google-generative-ai">,
|
|
426
|
+
effort: ClampedThinkingLevel,
|
|
427
|
+
customBudgets?: ThinkingBudgets,
|
|
424
428
|
): number {
|
|
429
|
+
// Custom budgets take precedence if provided for this level
|
|
430
|
+
if (customBudgets?.[effort] !== undefined) {
|
|
431
|
+
return customBudgets[effort]!;
|
|
432
|
+
}
|
|
433
|
+
|
|
425
434
|
// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
426
435
|
if (model.id.includes("2.5-pro")) {
|
|
427
|
-
const budgets: Record<
|
|
436
|
+
const budgets: Record<ClampedThinkingLevel, number> = {
|
|
428
437
|
minimal: 128,
|
|
429
438
|
low: 2048,
|
|
430
439
|
medium: 8192,
|
|
@@ -435,7 +444,7 @@ function getGoogleBudget(
|
|
|
435
444
|
|
|
436
445
|
if (model.id.includes("2.5-flash")) {
|
|
437
446
|
// Covers 2.5-flash-lite as well
|
|
438
|
-
const budgets: Record<
|
|
447
|
+
const budgets: Record<ClampedThinkingLevel, number> = {
|
|
439
448
|
minimal: 128,
|
|
440
449
|
low: 2048,
|
|
441
450
|
medium: 8192,
|
package/src/types.ts
CHANGED
|
@@ -54,10 +54,19 @@ export type KnownProvider =
|
|
|
54
54
|
| "cerebras"
|
|
55
55
|
| "openrouter"
|
|
56
56
|
| "zai"
|
|
57
|
-
| "mistral"
|
|
57
|
+
| "mistral"
|
|
58
|
+
| "opencode";
|
|
58
59
|
export type Provider = KnownProvider | string;
|
|
59
60
|
|
|
60
|
-
export type
|
|
61
|
+
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
62
|
+
|
|
63
|
+
/** Token budgets for each thinking level (token-based providers only) */
|
|
64
|
+
export interface ThinkingBudgets {
|
|
65
|
+
minimal?: number;
|
|
66
|
+
low?: number;
|
|
67
|
+
medium?: number;
|
|
68
|
+
high?: number;
|
|
69
|
+
}
|
|
61
70
|
|
|
62
71
|
// Base options all providers share
|
|
63
72
|
export interface StreamOptions {
|
|
@@ -65,11 +74,19 @@ export interface StreamOptions {
|
|
|
65
74
|
maxTokens?: number;
|
|
66
75
|
signal?: AbortSignal;
|
|
67
76
|
apiKey?: string;
|
|
77
|
+
/**
|
|
78
|
+
* Optional session identifier for providers that support session-based caching.
|
|
79
|
+
* Providers can use this to enable prompt caching, request routing, or other
|
|
80
|
+
* session-aware features. Ignored by providers that don't support it.
|
|
81
|
+
*/
|
|
82
|
+
sessionId?: string;
|
|
68
83
|
}
|
|
69
84
|
|
|
70
85
|
// Unified options with reasoning passed to streamSimple() and completeSimple()
|
|
71
86
|
export interface SimpleStreamOptions extends StreamOptions {
|
|
72
|
-
reasoning?:
|
|
87
|
+
reasoning?: ThinkingLevel;
|
|
88
|
+
/** Custom token budgets for thinking levels (token-based providers only) */
|
|
89
|
+
thinkingBudgets?: ThinkingBudgets;
|
|
73
90
|
}
|
|
74
91
|
|
|
75
92
|
// Generic StreamFunction with typed options
|
|
@@ -146,7 +163,7 @@ export interface ToolResultMessage<TDetails = any> {
|
|
|
146
163
|
toolName: string;
|
|
147
164
|
content: (TextContent | ImageContent)[]; // Supports text and images
|
|
148
165
|
details?: TDetails;
|
|
149
|
-
isError
|
|
166
|
+
isError: boolean;
|
|
150
167
|
timestamp: number; // Unix timestamp in milliseconds
|
|
151
168
|
}
|
|
152
169
|
|
|
@@ -136,17 +136,45 @@ async function startDeviceFlow(domain: string): Promise<DeviceCodeResponse> {
|
|
|
136
136
|
};
|
|
137
137
|
}
|
|
138
138
|
|
|
139
|
+
/**
|
|
140
|
+
* Sleep that can be interrupted by an AbortSignal
|
|
141
|
+
*/
|
|
142
|
+
function abortableSleep(ms: number, signal?: AbortSignal): Promise<void> {
|
|
143
|
+
return new Promise((resolve, reject) => {
|
|
144
|
+
if (signal?.aborted) {
|
|
145
|
+
reject(new Error("Login cancelled"));
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const timeout = setTimeout(resolve, ms);
|
|
150
|
+
|
|
151
|
+
signal?.addEventListener(
|
|
152
|
+
"abort",
|
|
153
|
+
() => {
|
|
154
|
+
clearTimeout(timeout);
|
|
155
|
+
reject(new Error("Login cancelled"));
|
|
156
|
+
},
|
|
157
|
+
{ once: true },
|
|
158
|
+
);
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
|
|
139
162
|
async function pollForGitHubAccessToken(
|
|
140
163
|
domain: string,
|
|
141
164
|
deviceCode: string,
|
|
142
165
|
intervalSeconds: number,
|
|
143
166
|
expiresIn: number,
|
|
167
|
+
signal?: AbortSignal,
|
|
144
168
|
) {
|
|
145
169
|
const urls = getUrls(domain);
|
|
146
170
|
const deadline = Date.now() + expiresIn * 1000;
|
|
147
171
|
let intervalMs = Math.max(1000, Math.floor(intervalSeconds * 1000));
|
|
148
172
|
|
|
149
173
|
while (Date.now() < deadline) {
|
|
174
|
+
if (signal?.aborted) {
|
|
175
|
+
throw new Error("Login cancelled");
|
|
176
|
+
}
|
|
177
|
+
|
|
150
178
|
const raw = await fetchJson(urls.accessTokenUrl, {
|
|
151
179
|
method: "POST",
|
|
152
180
|
headers: {
|
|
@@ -168,20 +196,20 @@ async function pollForGitHubAccessToken(
|
|
|
168
196
|
if (raw && typeof raw === "object" && typeof (raw as DeviceTokenErrorResponse).error === "string") {
|
|
169
197
|
const err = (raw as DeviceTokenErrorResponse).error;
|
|
170
198
|
if (err === "authorization_pending") {
|
|
171
|
-
await
|
|
199
|
+
await abortableSleep(intervalMs, signal);
|
|
172
200
|
continue;
|
|
173
201
|
}
|
|
174
202
|
|
|
175
203
|
if (err === "slow_down") {
|
|
176
204
|
intervalMs += 5000;
|
|
177
|
-
await
|
|
205
|
+
await abortableSleep(intervalMs, signal);
|
|
178
206
|
continue;
|
|
179
207
|
}
|
|
180
208
|
|
|
181
209
|
throw new Error(`Device flow failed: ${err}`);
|
|
182
210
|
}
|
|
183
211
|
|
|
184
|
-
await
|
|
212
|
+
await abortableSleep(intervalMs, signal);
|
|
185
213
|
}
|
|
186
214
|
|
|
187
215
|
throw new Error("Device flow timed out");
|
|
@@ -274,11 +302,13 @@ async function enableAllGitHubCopilotModels(
|
|
|
274
302
|
* @param options.onAuth - Callback with URL and optional instructions (user code)
|
|
275
303
|
* @param options.onPrompt - Callback to prompt user for input
|
|
276
304
|
* @param options.onProgress - Optional progress callback
|
|
305
|
+
* @param options.signal - Optional AbortSignal for cancellation
|
|
277
306
|
*/
|
|
278
307
|
export async function loginGitHubCopilot(options: {
|
|
279
308
|
onAuth: (url: string, instructions?: string) => void;
|
|
280
309
|
onPrompt: (prompt: { message: string; placeholder?: string; allowEmpty?: boolean }) => Promise<string>;
|
|
281
310
|
onProgress?: (message: string) => void;
|
|
311
|
+
signal?: AbortSignal;
|
|
282
312
|
}): Promise<OAuthCredentials> {
|
|
283
313
|
const input = await options.onPrompt({
|
|
284
314
|
message: "GitHub Enterprise URL/domain (blank for github.com)",
|
|
@@ -286,6 +316,10 @@ export async function loginGitHubCopilot(options: {
|
|
|
286
316
|
allowEmpty: true,
|
|
287
317
|
});
|
|
288
318
|
|
|
319
|
+
if (options.signal?.aborted) {
|
|
320
|
+
throw new Error("Login cancelled");
|
|
321
|
+
}
|
|
322
|
+
|
|
289
323
|
const trimmed = input.trim();
|
|
290
324
|
const enterpriseDomain = normalizeDomain(input);
|
|
291
325
|
if (trimmed && !enterpriseDomain) {
|
|
@@ -301,6 +335,7 @@ export async function loginGitHubCopilot(options: {
|
|
|
301
335
|
device.device_code,
|
|
302
336
|
device.interval,
|
|
303
337
|
device.expires_in,
|
|
338
|
+
options.signal,
|
|
304
339
|
);
|
|
305
340
|
const credentials = await refreshGitHubCopilotToken(githubAccessToken, enterpriseDomain ?? undefined);
|
|
306
341
|
|