@oh-my-pi/pi-ai 3.20.1 → 3.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@ import type {
6
6
  ResponseInputText,
7
7
  ResponseOutputMessage,
8
8
  ResponseReasoningItem,
9
- } from "openai/resources/responses/responses.js";
9
+ } from "openai/resources/responses/responses";
10
10
  import { calculateCost } from "../models";
11
11
  import { getEnvApiKey } from "../stream";
12
12
  import type {
@@ -24,6 +24,7 @@ import type {
24
24
  } from "../types";
25
25
  import { AssistantMessageEventStream } from "../utils/event-stream";
26
26
  import { parseStreamingJson } from "../utils/json-parse";
27
+ import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
27
28
  import { sanitizeSurrogates } from "../utils/sanitize-unicode";
28
29
  import {
29
30
  CODEX_BASE_URL,
@@ -33,12 +34,9 @@ import {
33
34
  URL_PATHS,
34
35
  } from "./openai-codex/constants";
35
36
  import { getCodexInstructions } from "./openai-codex/prompts/codex";
36
- import {
37
- type CodexRequestOptions,
38
- normalizeModel,
39
- type RequestBody,
40
- transformRequestBody,
41
- } from "./openai-codex/request-transformer";
37
+ import { buildCodexPiBridge } from "./openai-codex/prompts/pi-codex-bridge";
38
+ import { buildCodexSystemPrompt } from "./openai-codex/prompts/system-prompt";
39
+ import { type CodexRequestOptions, type RequestBody, transformRequestBody } from "./openai-codex/request-transformer";
42
40
  import { parseCodexError, parseCodexSseStream } from "./openai-codex/response-handler";
43
41
  import { transformMessages } from "./transorm-messages";
44
42
 
@@ -94,6 +92,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
94
92
  model: model.id,
95
93
  input: messages,
96
94
  stream: true,
95
+ prompt_cache_key: options?.sessionId,
97
96
  };
98
97
 
99
98
  if (options?.maxTokens) {
@@ -108,8 +107,15 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
108
107
  params.tools = convertTools(context.tools);
109
108
  }
110
109
 
111
- const normalizedModel = normalizeModel(params.model);
112
- const codexInstructions = await getCodexInstructions(normalizedModel);
110
+ const codexInstructions = await getCodexInstructions(params.model);
111
+ const bridgeText = buildCodexPiBridge(context.tools);
112
+ const systemPrompt = buildCodexSystemPrompt({
113
+ codexInstructions,
114
+ bridgeText,
115
+ userSystemPrompt: context.systemPrompt,
116
+ });
117
+
118
+ params.instructions = systemPrompt.instructions;
113
119
 
114
120
  const codexOptions: CodexRequestOptions = {
115
121
  reasoningEffort: options?.reasoningEffort,
@@ -118,17 +124,14 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
118
124
  include: options?.include,
119
125
  };
120
126
 
121
- const transformedBody = await transformRequestBody(
122
- params,
123
- codexInstructions,
124
- codexOptions,
125
- options?.codexMode ?? true,
126
- );
127
+ const transformedBody = await transformRequestBody(params, codexOptions, systemPrompt);
127
128
 
128
- const headers = createCodexHeaders(model.headers, accountId, apiKey, transformedBody.prompt_cache_key);
129
+ const reasoningEffort = transformedBody.reasoning?.effort ?? null;
130
+ const headers = createCodexHeaders(model.headers, accountId, apiKey, options?.sessionId);
129
131
  logCodexDebug("codex request", {
130
132
  url,
131
133
  model: params.model,
134
+ reasoningEffort,
132
135
  headers: redactHeaders(headers),
133
136
  });
134
137
 
@@ -149,7 +152,9 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
149
152
 
150
153
  if (!response.ok) {
151
154
  const info = await parseCodexError(response);
152
- throw new Error(info.friendlyMessage || info.message);
155
+ const error = new Error(info.friendlyMessage || info.message);
156
+ (error as { headers?: Headers }).headers = response.headers;
157
+ throw error;
153
158
  }
154
159
 
155
160
  if (!response.body) {
@@ -340,10 +345,10 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
340
345
  }
341
346
  } else if (eventType === "error") {
342
347
  const code = (rawEvent as { code?: string }).code || "";
343
- const message = (rawEvent as { message?: string }).message || "Unknown error";
344
- throw new Error(code ? `Error Code ${code}: ${message}` : message);
348
+ const message = (rawEvent as { message?: string }).message || "";
349
+ throw new Error(formatCodexErrorEvent(rawEvent, code, message));
345
350
  } else if (eventType === "response.failed") {
346
- throw new Error("Unknown error");
351
+ throw new Error(formatCodexFailure(rawEvent) ?? "Codex response failed");
347
352
  }
348
353
  }
349
354
 
@@ -352,7 +357,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
352
357
  }
353
358
 
354
359
  if (output.stopReason === "aborted" || output.stopReason === "error") {
355
- throw new Error("An unknown error occurred");
360
+ throw new Error("Codex response failed");
356
361
  }
357
362
 
358
363
  stream.push({ type: "done", reason: output.stopReason, message: output });
@@ -360,7 +365,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
360
365
  } catch (error) {
361
366
  for (const block of output.content) delete (block as { index?: number }).index;
362
367
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
363
- output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
368
+ output.errorMessage = formatErrorMessageWithRetryAfter(error);
364
369
  stream.push({ type: "error", reason: output.stopReason, error: output });
365
370
  stream.end();
366
371
  }
@@ -406,11 +411,11 @@ function logCodexDebug(message: string, details?: Record<string, unknown>): void
406
411
 
407
412
  function redactHeaders(headers: Headers): Record<string, string> {
408
413
  const redacted: Record<string, string> = {};
409
- headers.forEach((value, key) => {
414
+ for (const [key, value] of headers.entries()) {
410
415
  const lower = key.toLowerCase();
411
416
  if (lower === "authorization") {
412
417
  redacted[key] = "Bearer [redacted]";
413
- return;
418
+ continue;
414
419
  }
415
420
  if (
416
421
  lower.includes("account") ||
@@ -419,10 +424,10 @@ function redactHeaders(headers: Headers): Record<string, string> {
419
424
  lower === "cookie"
420
425
  ) {
421
426
  redacted[key] = "[redacted]";
422
- return;
427
+ continue;
423
428
  }
424
429
  redacted[key] = value;
425
- });
430
+ }
426
431
  return redacted;
427
432
  }
428
433
 
@@ -617,3 +622,68 @@ function mapStopReason(status: string | undefined): StopReason {
617
622
  return "stop";
618
623
  }
619
624
  }
625
+
626
+ function asRecord(value: unknown): Record<string, unknown> | null {
627
+ if (value && typeof value === "object") {
628
+ return value as Record<string, unknown>;
629
+ }
630
+ return null;
631
+ }
632
+
633
+ function getString(value: unknown): string | undefined {
634
+ return typeof value === "string" ? value : undefined;
635
+ }
636
+
637
+ function truncate(text: string, limit: number): string {
638
+ if (text.length <= limit) return text;
639
+ return `${text.slice(0, limit)}...[truncated ${text.length - limit}]`;
640
+ }
641
+
642
+ function formatCodexFailure(rawEvent: Record<string, unknown>): string | null {
643
+ const response = asRecord(rawEvent.response);
644
+ const error = asRecord(rawEvent.error) ?? (response ? asRecord(response.error) : null);
645
+
646
+ const message = getString(error?.message) ?? getString(rawEvent.message) ?? getString(response?.message);
647
+ const code = getString(error?.code) ?? getString(error?.type) ?? getString(rawEvent.code);
648
+ const status = getString(response?.status) ?? getString(rawEvent.status);
649
+
650
+ const meta: string[] = [];
651
+ if (code) meta.push(`code=${code}`);
652
+ if (status) meta.push(`status=${status}`);
653
+
654
+ if (message) {
655
+ const metaText = meta.length ? ` (${meta.join(", ")})` : "";
656
+ return `Codex response failed: ${message}${metaText}`;
657
+ }
658
+
659
+ if (meta.length) {
660
+ return `Codex response failed (${meta.join(", ")})`;
661
+ }
662
+
663
+ try {
664
+ return `Codex response failed: ${truncate(JSON.stringify(rawEvent), 800)}`;
665
+ } catch {
666
+ return "Codex response failed";
667
+ }
668
+ }
669
+
670
+ function formatCodexErrorEvent(rawEvent: Record<string, unknown>, code: string, message: string): string {
671
+ const detail = formatCodexFailure(rawEvent);
672
+ if (detail) {
673
+ return detail.replace("response failed", "error event");
674
+ }
675
+
676
+ const meta: string[] = [];
677
+ if (code) meta.push(`code=${code}`);
678
+ if (message) meta.push(`message=${message}`);
679
+
680
+ if (meta.length > 0) {
681
+ return `Codex error event (${meta.join(", ")})`;
682
+ }
683
+
684
+ try {
685
+ return `Codex error event: ${truncate(JSON.stringify(rawEvent), 800)}`;
686
+ } catch {
687
+ return "Codex error event";
688
+ }
689
+ }
@@ -7,7 +7,7 @@ import type {
7
7
  ChatCompletionContentPartText,
8
8
  ChatCompletionMessageParam,
9
9
  ChatCompletionToolMessageParam,
10
- } from "openai/resources/chat/completions.js";
10
+ } from "openai/resources/chat/completions";
11
11
  import { calculateCost } from "../models";
12
12
  import { getEnvApiKey } from "../stream";
13
13
  import type {
@@ -26,6 +26,7 @@ import type {
26
26
  } from "../types";
27
27
  import { AssistantMessageEventStream } from "../utils/event-stream";
28
28
  import { parseStreamingJson } from "../utils/json-parse";
29
+ import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
29
30
  import { sanitizeSurrogates } from "../utils/sanitize-unicode";
30
31
  import { transformMessages } from "./transorm-messages";
31
32
 
@@ -196,34 +197,44 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
196
197
 
197
198
  // Some endpoints return reasoning in reasoning_content (llama.cpp),
198
199
  // or reasoning (other openai compatible endpoints)
200
+ // Use the first non-empty reasoning field to avoid duplication
201
+ // (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
199
202
  const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
203
+ let foundReasoningField: string | null = null;
200
204
  for (const field of reasoningFields) {
201
205
  if (
202
206
  (choice.delta as any)[field] !== null &&
203
207
  (choice.delta as any)[field] !== undefined &&
204
208
  (choice.delta as any)[field].length > 0
205
209
  ) {
206
- if (!currentBlock || currentBlock.type !== "thinking") {
207
- finishCurrentBlock(currentBlock);
208
- currentBlock = {
209
- type: "thinking",
210
- thinking: "",
211
- thinkingSignature: field,
212
- };
213
- output.content.push(currentBlock);
214
- stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
210
+ if (!foundReasoningField) {
211
+ foundReasoningField = field;
212
+ break;
215
213
  }
214
+ }
215
+ }
216
216
 
217
- if (currentBlock.type === "thinking") {
218
- const delta = (choice.delta as any)[field];
219
- currentBlock.thinking += delta;
220
- stream.push({
221
- type: "thinking_delta",
222
- contentIndex: blockIndex(),
223
- delta,
224
- partial: output,
225
- });
226
- }
217
+ if (foundReasoningField) {
218
+ if (!currentBlock || currentBlock.type !== "thinking") {
219
+ finishCurrentBlock(currentBlock);
220
+ currentBlock = {
221
+ type: "thinking",
222
+ thinking: "",
223
+ thinkingSignature: foundReasoningField,
224
+ };
225
+ output.content.push(currentBlock);
226
+ stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
227
+ }
228
+
229
+ if (currentBlock.type === "thinking") {
230
+ const delta = (choice.delta as any)[foundReasoningField];
231
+ currentBlock.thinking += delta;
232
+ stream.push({
233
+ type: "thinking_delta",
234
+ contentIndex: blockIndex(),
235
+ delta,
236
+ partial: output,
237
+ });
227
238
  }
228
239
  }
229
240
 
@@ -296,7 +307,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
296
307
  } catch (error) {
297
308
  for (const block of output.content) delete (block as any).index;
298
309
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
299
- output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
310
+ output.errorMessage = formatErrorMessageWithRetryAfter(error);
300
311
  stream.push({ type: "error", reason: output.stopReason, error: output });
301
312
  stream.end();
302
313
  }
@@ -480,10 +491,8 @@ function convertMessages(
480
491
  const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
481
492
  if (nonEmptyThinkingBlocks.length > 0) {
482
493
  if (compat.requiresThinkingAsText) {
483
- // Convert thinking blocks to text with <thinking> delimiters
484
- const thinkingText = nonEmptyThinkingBlocks
485
- .map((b) => `<thinking>\n${b.thinking}\n</thinking>`)
486
- .join("\n");
494
+ // Convert thinking blocks to plain text (no tags to avoid model mimicking them)
495
+ const thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n\n");
487
496
  const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
488
497
  if (textContent) {
489
498
  textContent.unshift({ type: "text", text: thinkingText });
@@ -633,8 +642,7 @@ function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
633
642
  baseUrl.includes("cerebras.ai") ||
634
643
  baseUrl.includes("api.x.ai") ||
635
644
  baseUrl.includes("mistral.ai") ||
636
- baseUrl.includes("chutes.ai") ||
637
- baseUrl.includes("localhost");
645
+ baseUrl.includes("chutes.ai");
638
646
 
639
647
  const useMaxTokens = baseUrl.includes("mistral.ai") || baseUrl.includes("chutes.ai");
640
648
 
@@ -9,7 +9,7 @@ import type {
9
9
  ResponseInputText,
10
10
  ResponseOutputMessage,
11
11
  ResponseReasoningItem,
12
- } from "openai/resources/responses/responses.js";
12
+ } from "openai/resources/responses/responses";
13
13
  import { calculateCost } from "../models";
14
14
  import { getEnvApiKey } from "../stream";
15
15
  import type {
@@ -27,6 +27,7 @@ import type {
27
27
  } from "../types";
28
28
  import { AssistantMessageEventStream } from "../utils/event-stream";
29
29
  import { parseStreamingJson } from "../utils/json-parse";
30
+ import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
30
31
  import { sanitizeSurrogates } from "../utils/sanitize-unicode";
31
32
  import { transformMessages } from "./transorm-messages";
32
33
 
@@ -303,7 +304,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
303
304
  } catch (error) {
304
305
  for (const block of output.content) delete (block as any).index;
305
306
  output.stopReason = options?.signal?.aborted ? "aborted" : "error";
306
- output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
307
+ output.errorMessage = formatErrorMessageWithRetryAfter(error);
307
308
  stream.push({ type: "error", reason: output.stopReason, error: output });
308
309
  stream.end();
309
310
  }
@@ -45,12 +45,13 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
45
45
  assistantMsg.api !== model.api;
46
46
 
47
47
  // Transform message from different provider/model
48
- const transformedContent = assistantMsg.content.map((block) => {
48
+ const transformedContent = assistantMsg.content.flatMap((block) => {
49
49
  if (block.type === "thinking") {
50
- // Convert thinking block to text block with <thinking> tags
50
+ // Skip empty thinking blocks, convert others to plain text
51
+ if (!block.thinking || block.thinking.trim() === "") return [];
51
52
  return {
52
53
  type: "text" as const,
53
- text: `<thinking>\n${block.thinking}\n</thinking>`,
54
+ text: block.thinking,
54
55
  };
55
56
  }
56
57
  // Normalize tool call IDs for github-copilot cross-API switches
package/src/stream.ts CHANGED
@@ -21,8 +21,9 @@ import type {
21
21
  KnownProvider,
22
22
  Model,
23
23
  OptionsForApi,
24
- ReasoningEffort,
25
24
  SimpleStreamOptions,
25
+ ThinkingBudgets,
26
+ ThinkingLevel,
26
27
  } from "./types";
27
28
 
28
29
  const VERTEX_ADC_CREDENTIALS_PATH = join(homedir(), ".config", "gcloud", "application_default_credentials.json");
@@ -64,7 +65,6 @@ export function getEnvApiKey(provider: any): string | undefined {
64
65
  if (hasCredentials && hasProject && hasLocation) {
65
66
  return "<authenticated>";
66
67
  }
67
- return undefined;
68
68
  }
69
69
 
70
70
  const envMap: Record<string, string> = {
@@ -76,6 +76,7 @@ export function getEnvApiKey(provider: any): string | undefined {
76
76
  openrouter: "OPENROUTER_API_KEY",
77
77
  zai: "ZAI_API_KEY",
78
78
  mistral: "MISTRAL_API_KEY",
79
+ opencode: "OPENCODE_API_KEY",
79
80
  };
80
81
 
81
82
  const envVar = envMap[provider];
@@ -178,10 +179,11 @@ function mapOptionsForApi<TApi extends Api>(
178
179
  maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
179
180
  signal: options?.signal,
180
181
  apiKey: apiKey || options?.apiKey,
182
+ sessionId: options?.sessionId,
181
183
  };
182
184
 
183
185
  // Helper to clamp xhigh to high for providers that don't support it
184
- const clampReasoning = (effort: ReasoningEffort | undefined) => (effort === "xhigh" ? "high" : effort);
186
+ const clampReasoning = (effort: ThinkingLevel | undefined) => (effort === "xhigh" ? "high" : effort);
185
187
 
186
188
  switch (model.api) {
187
189
  case "anthropic-messages": {
@@ -192,15 +194,17 @@ function mapOptionsForApi<TApi extends Api>(
192
194
 
193
195
  // Claude requires max_tokens > thinking.budget_tokens
194
196
  // So we need to ensure maxTokens accounts for both thinking and output
195
- const anthropicBudgets = {
197
+ const defaultBudgets: ThinkingBudgets = {
196
198
  minimal: 1024,
197
199
  low: 2048,
198
200
  medium: 8192,
199
201
  high: 16384,
200
202
  };
203
+ const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
201
204
 
202
205
  const minOutputTokens = 1024;
203
- let thinkingBudget = anthropicBudgets[clampReasoning(options.reasoning)!];
206
+ const level = clampReasoning(options.reasoning)!;
207
+ let thinkingBudget = budgets[level]!;
204
208
  // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
205
209
  const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
206
210
 
@@ -261,7 +265,7 @@ function mapOptionsForApi<TApi extends Api>(
261
265
  ...base,
262
266
  thinking: {
263
267
  enabled: true,
264
- budgetTokens: getGoogleBudget(googleModel, effort),
268
+ budgetTokens: getGoogleBudget(googleModel, effort, options?.thinkingBudgets),
265
269
  },
266
270
  } satisfies GoogleOptions;
267
271
  }
@@ -287,15 +291,16 @@ function mapOptionsForApi<TApi extends Api>(
287
291
  // Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
288
292
  // Claude requires max_tokens > thinking.budget_tokens
289
293
  // So we need to ensure maxTokens accounts for both thinking and output
290
- const budgets: Record<ClampedReasoningEffort, number> = {
294
+ const defaultBudgets: ThinkingBudgets = {
291
295
  minimal: 1024,
292
296
  low: 2048,
293
297
  medium: 8192,
294
298
  high: 16384,
295
299
  };
300
+ const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
296
301
 
297
302
  const minOutputTokens = 1024;
298
- let thinkingBudget = budgets[effort];
303
+ let thinkingBudget = budgets[effort]!;
299
304
  // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
300
305
  const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
301
306
 
@@ -316,22 +321,20 @@ function mapOptionsForApi<TApi extends Api>(
316
321
 
317
322
  case "google-vertex": {
318
323
  // Explicitly disable thinking when reasoning is not specified
319
- // This is needed because Gemini has "dynamic thinking" enabled by default
320
324
  if (!options?.reasoning) {
321
325
  return { ...base, thinking: { enabled: false } } satisfies GoogleVertexOptions;
322
326
  }
323
327
 
324
- const googleModel = model as Model<"google-vertex">;
328
+ const vertexModel = model as Model<"google-vertex">;
325
329
  const effort = clampReasoning(options.reasoning)!;
330
+ const geminiModel = vertexModel as unknown as Model<"google-generative-ai">;
326
331
 
327
- // Gemini 3 models use thinkingLevel exclusively instead of thinkingBudget.
328
- // https://ai.google.dev/gemini-api/docs/thinking#set-budget
329
- if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
332
+ if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
330
333
  return {
331
334
  ...base,
332
335
  thinking: {
333
336
  enabled: true,
334
- level: getGemini3ThinkingLevel(effort, googleModel),
337
+ level: getGemini3ThinkingLevel(effort, geminiModel),
335
338
  },
336
339
  } satisfies GoogleVertexOptions;
337
340
  }
@@ -340,7 +343,7 @@ function mapOptionsForApi<TApi extends Api>(
340
343
  ...base,
341
344
  thinking: {
342
345
  enabled: true,
343
- budgetTokens: getGoogleBudget(googleModel, effort),
346
+ budgetTokens: getGoogleBudget(geminiModel, effort, options?.thinkingBudgets),
344
347
  },
345
348
  } satisfies GoogleVertexOptions;
346
349
  }
@@ -353,21 +356,21 @@ function mapOptionsForApi<TApi extends Api>(
353
356
  }
354
357
  }
355
358
 
356
- type ClampedReasoningEffort = Exclude<ReasoningEffort, "xhigh">;
359
+ type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh">;
357
360
 
358
- function isGemini3ProModel(model: Model<"google-generative-ai"> | Model<"google-vertex">): boolean {
361
+ function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
359
362
  // Covers gemini-3-pro, gemini-3-pro-preview, and possible other prefixed ids in the future
360
363
  return model.id.includes("3-pro");
361
364
  }
362
365
 
363
- function isGemini3FlashModel(model: Model<"google-generative-ai"> | Model<"google-vertex">): boolean {
366
+ function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
364
367
  // Covers gemini-3-flash, gemini-3-flash-preview, and possible other prefixed ids in the future
365
368
  return model.id.includes("3-flash");
366
369
  }
367
370
 
368
371
  function getGemini3ThinkingLevel(
369
- effort: ClampedReasoningEffort,
370
- model: Model<"google-generative-ai"> | Model<"google-vertex">,
372
+ effort: ClampedThinkingLevel,
373
+ model: Model<"google-generative-ai">,
371
374
  ): GoogleThinkingLevel {
372
375
  if (isGemini3ProModel(model)) {
373
376
  // Gemini 3 Pro only supports LOW/HIGH (for now)
@@ -393,7 +396,7 @@ function getGemini3ThinkingLevel(
393
396
  }
394
397
  }
395
398
 
396
- function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: string): GoogleThinkingLevel {
399
+ function getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string): GoogleThinkingLevel {
397
400
  if (modelId.includes("3-pro")) {
398
401
  // Gemini 3 Pro only supports LOW/HIGH (for now)
399
402
  switch (effort) {
@@ -419,12 +422,18 @@ function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: stri
419
422
  }
420
423
 
421
424
  function getGoogleBudget(
422
- model: Model<"google-generative-ai"> | Model<"google-vertex">,
423
- effort: ClampedReasoningEffort,
425
+ model: Model<"google-generative-ai">,
426
+ effort: ClampedThinkingLevel,
427
+ customBudgets?: ThinkingBudgets,
424
428
  ): number {
429
+ // Custom budgets take precedence if provided for this level
430
+ if (customBudgets?.[effort] !== undefined) {
431
+ return customBudgets[effort]!;
432
+ }
433
+
425
434
  // See https://ai.google.dev/gemini-api/docs/thinking#set-budget
426
435
  if (model.id.includes("2.5-pro")) {
427
- const budgets: Record<ClampedReasoningEffort, number> = {
436
+ const budgets: Record<ClampedThinkingLevel, number> = {
428
437
  minimal: 128,
429
438
  low: 2048,
430
439
  medium: 8192,
@@ -435,7 +444,7 @@ function getGoogleBudget(
435
444
 
436
445
  if (model.id.includes("2.5-flash")) {
437
446
  // Covers 2.5-flash-lite as well
438
- const budgets: Record<ClampedReasoningEffort, number> = {
447
+ const budgets: Record<ClampedThinkingLevel, number> = {
439
448
  minimal: 128,
440
449
  low: 2048,
441
450
  medium: 8192,
package/src/types.ts CHANGED
@@ -54,10 +54,19 @@ export type KnownProvider =
54
54
  | "cerebras"
55
55
  | "openrouter"
56
56
  | "zai"
57
- | "mistral";
57
+ | "mistral"
58
+ | "opencode";
58
59
  export type Provider = KnownProvider | string;
59
60
 
60
- export type ReasoningEffort = "minimal" | "low" | "medium" | "high" | "xhigh";
61
+ export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
62
+
63
+ /** Token budgets for each thinking level (token-based providers only) */
64
+ export interface ThinkingBudgets {
65
+ minimal?: number;
66
+ low?: number;
67
+ medium?: number;
68
+ high?: number;
69
+ }
61
70
 
62
71
  // Base options all providers share
63
72
  export interface StreamOptions {
@@ -65,11 +74,19 @@ export interface StreamOptions {
65
74
  maxTokens?: number;
66
75
  signal?: AbortSignal;
67
76
  apiKey?: string;
77
+ /**
78
+ * Optional session identifier for providers that support session-based caching.
79
+ * Providers can use this to enable prompt caching, request routing, or other
80
+ * session-aware features. Ignored by providers that don't support it.
81
+ */
82
+ sessionId?: string;
68
83
  }
69
84
 
70
85
  // Unified options with reasoning passed to streamSimple() and completeSimple()
71
86
  export interface SimpleStreamOptions extends StreamOptions {
72
- reasoning?: ReasoningEffort;
87
+ reasoning?: ThinkingLevel;
88
+ /** Custom token budgets for thinking levels (token-based providers only) */
89
+ thinkingBudgets?: ThinkingBudgets;
73
90
  }
74
91
 
75
92
  // Generic StreamFunction with typed options
@@ -146,7 +163,7 @@ export interface ToolResultMessage<TDetails = any> {
146
163
  toolName: string;
147
164
  content: (TextContent | ImageContent)[]; // Supports text and images
148
165
  details?: TDetails;
149
- isError?: boolean;
166
+ isError: boolean;
150
167
  timestamp: number; // Unix timestamp in milliseconds
151
168
  }
152
169