npm - @oh-my-pi/pi-ai - Versions diffs - 3.20.1 → 3.35.0 - Mend

@oh-my-pi/pi-ai 3.20.1 → 3.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +69 -12
package/package.json +3 -10
package/src/cli.ts +89 -89
package/src/index.ts +2 -2
package/src/models.generated.ts +949 -178
package/src/models.ts +11 -17
package/src/providers/anthropic.ts +94 -29
package/src/providers/google-gemini-cli.ts +270 -134
package/src/providers/google-shared.ts +48 -5
package/src/providers/google-vertex.ts +15 -4
package/src/providers/google.ts +15 -4
package/src/providers/openai-codex/index.ts +7 -0
package/src/providers/openai-codex/prompts/codex.ts +26 -59
package/src/providers/openai-codex/prompts/pi-codex-bridge.ts +38 -31
package/src/providers/openai-codex/prompts/system-prompt.ts +26 -0
package/src/providers/openai-codex/request-transformer.ts +38 -203
package/src/providers/openai-codex-responses.ts +96 -26
package/src/providers/openai-completions.ts +35 -27
package/src/providers/openai-responses.ts +3 -2
package/src/providers/transorm-messages.ts +4 -3
package/src/stream.ts +34 -25
package/src/types.ts +21 -4
package/src/utils/oauth/github-copilot.ts +38 -3
package/src/utils/oauth/google-antigravity.ts +146 -55
package/src/utils/oauth/google-gemini-cli.ts +146 -55
package/src/utils/oauth/index.ts +5 -5
package/src/utils/oauth/openai-codex.ts +129 -54
package/src/utils/overflow.ts +1 -1
package/src/utils/retry-after.ts +110 -0
package/src/bun-imports.d.ts +0 -14

package/src/providers/openai-codex-responses.ts CHANGED Viewed

@@ -6,7 +6,7 @@ import type {
 	ResponseInputText,
 	ResponseOutputMessage,
 	ResponseReasoningItem,
-} from "openai/resources/responses/responses.js";
+} from "openai/resources/responses/responses";
 import { calculateCost } from "../models";
 import { getEnvApiKey } from "../stream";
 import type {
@@ -24,6 +24,7 @@ import type {
 } from "../types";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { parseStreamingJson } from "../utils/json-parse";
+import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode";
 import {
 	CODEX_BASE_URL,
@@ -33,12 +34,9 @@ import {
 	URL_PATHS,
 } from "./openai-codex/constants";
 import { getCodexInstructions } from "./openai-codex/prompts/codex";
-import {
-	type CodexRequestOptions,
-	normalizeModel,
-	type RequestBody,
-	transformRequestBody,
-} from "./openai-codex/request-transformer";
+import { buildCodexPiBridge } from "./openai-codex/prompts/pi-codex-bridge";
+import { buildCodexSystemPrompt } from "./openai-codex/prompts/system-prompt";
+import { type CodexRequestOptions, type RequestBody, transformRequestBody } from "./openai-codex/request-transformer";
 import { parseCodexError, parseCodexSseStream } from "./openai-codex/response-handler";
 import { transformMessages } from "./transorm-messages";
@@ -94,6 +92,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 				model: model.id,
 				input: messages,
 				stream: true,
+				prompt_cache_key: options?.sessionId,
 			};
 			if (options?.maxTokens) {
@@ -108,8 +107,15 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 				params.tools = convertTools(context.tools);
 			}
-			const normalizedModel = normalizeModel(params.model);
-			const codexInstructions = await getCodexInstructions(normalizedModel);
+			const codexInstructions = await getCodexInstructions(params.model);
+			const bridgeText = buildCodexPiBridge(context.tools);
+			const systemPrompt = buildCodexSystemPrompt({
+				codexInstructions,
+				bridgeText,
+				userSystemPrompt: context.systemPrompt,
+			});
+			params.instructions = systemPrompt.instructions;
 			const codexOptions: CodexRequestOptions = {
 				reasoningEffort: options?.reasoningEffort,
@@ -118,17 +124,14 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 				include: options?.include,
 			};
-			const transformedBody = await transformRequestBody(
-				params,
-				codexInstructions,
-				codexOptions,
-				options?.codexMode ?? true,
-			);
+			const transformedBody = await transformRequestBody(params, codexOptions, systemPrompt);
-			const headers = createCodexHeaders(model.headers, accountId, apiKey, transformedBody.prompt_cache_key);
+			const reasoningEffort = transformedBody.reasoning?.effort ?? null;
+			const headers = createCodexHeaders(model.headers, accountId, apiKey, options?.sessionId);
 			logCodexDebug("codex request", {
 				url,
 				model: params.model,
+				reasoningEffort,
 				headers: redactHeaders(headers),
 			});
@@ -149,7 +152,9 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 			if (!response.ok) {
 				const info = await parseCodexError(response);
-				throw new Error(info.friendlyMessage || info.message);
+				const error = new Error(info.friendlyMessage || info.message);
+				(error as { headers?: Headers }).headers = response.headers;
+				throw error;
 			}
 			if (!response.body) {
@@ -340,10 +345,10 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 					}
 				} else if (eventType === "error") {
 					const code = (rawEvent as { code?: string }).code || "";
-					const message = (rawEvent as { message?: string }).message || "Unknown error";
-					throw new Error(code ? `Error Code ${code}: ${message}` : message);
+					const message = (rawEvent as { message?: string }).message || "";
+					throw new Error(formatCodexErrorEvent(rawEvent, code, message));
 				} else if (eventType === "response.failed") {
-					throw new Error("Unknown error");
+					throw new Error(formatCodexFailure(rawEvent) ?? "Codex response failed");
 				}
 			}
@@ -352,7 +357,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 			}
 			if (output.stopReason === "aborted" || output.stopReason === "error") {
-				throw new Error("An unknown error occurred");
+				throw new Error("Codex response failed");
 			}
 			stream.push({ type: "done", reason: output.stopReason, message: output });
@@ -360,7 +365,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
 		} catch (error) {
 			for (const block of output.content) delete (block as { index?: number }).index;
 			output.stopReason = options?.signal?.aborted ? "aborted" : "error";
-			output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
+			output.errorMessage = formatErrorMessageWithRetryAfter(error);
 			stream.push({ type: "error", reason: output.stopReason, error: output });
 			stream.end();
 		}
@@ -406,11 +411,11 @@ function logCodexDebug(message: string, details?: Record<string, unknown>): void
 function redactHeaders(headers: Headers): Record<string, string> {
 	const redacted: Record<string, string> = {};
-	headers.forEach((value, key) => {
+	for (const [key, value] of headers.entries()) {
 		const lower = key.toLowerCase();
 		if (lower === "authorization") {
 			redacted[key] = "Bearer [redacted]";
-			return;
+			continue;
 		}
 		if (
 			lower.includes("account") ||
@@ -419,10 +424,10 @@ function redactHeaders(headers: Headers): Record<string, string> {
 			lower === "cookie"
 		) {
 			redacted[key] = "[redacted]";
-			return;
+			continue;
 		}
 		redacted[key] = value;
-	});
+	}
 	return redacted;
 }
@@ -617,3 +622,68 @@ function mapStopReason(status: string | undefined): StopReason {
 			return "stop";
 	}
 }
+function asRecord(value: unknown): Record<string, unknown> | null {
+	if (value && typeof value === "object") {
+		return value as Record<string, unknown>;
+	}
+	return null;
+}
+function getString(value: unknown): string | undefined {
+	return typeof value === "string" ? value : undefined;
+}
+function truncate(text: string, limit: number): string {
+	if (text.length <= limit) return text;
+	return `${text.slice(0, limit)}...[truncated ${text.length - limit}]`;
+}
+function formatCodexFailure(rawEvent: Record<string, unknown>): string | null {
+	const response = asRecord(rawEvent.response);
+	const error = asRecord(rawEvent.error) ?? (response ? asRecord(response.error) : null);
+	const message = getString(error?.message) ?? getString(rawEvent.message) ?? getString(response?.message);
+	const code = getString(error?.code) ?? getString(error?.type) ?? getString(rawEvent.code);
+	const status = getString(response?.status) ?? getString(rawEvent.status);
+	const meta: string[] = [];
+	if (code) meta.push(`code=${code}`);
+	if (status) meta.push(`status=${status}`);
+	if (message) {
+		const metaText = meta.length ? ` (${meta.join(", ")})` : "";
+		return `Codex response failed: ${message}${metaText}`;
+	}
+	if (meta.length) {
+		return `Codex response failed (${meta.join(", ")})`;
+	}
+	try {
+		return `Codex response failed: ${truncate(JSON.stringify(rawEvent), 800)}`;
+	} catch {
+		return "Codex response failed";
+	}
+}
+function formatCodexErrorEvent(rawEvent: Record<string, unknown>, code: string, message: string): string {
+	const detail = formatCodexFailure(rawEvent);
+	if (detail) {
+		return detail.replace("response failed", "error event");
+	}
+	const meta: string[] = [];
+	if (code) meta.push(`code=${code}`);
+	if (message) meta.push(`message=${message}`);
+	if (meta.length > 0) {
+		return `Codex error event (${meta.join(", ")})`;
+	}
+	try {
+		return `Codex error event: ${truncate(JSON.stringify(rawEvent), 800)}`;
+	} catch {
+		return "Codex error event";
+	}
+}

package/src/providers/openai-completions.ts CHANGED Viewed

@@ -7,7 +7,7 @@ import type {
 	ChatCompletionContentPartText,
 	ChatCompletionMessageParam,
 	ChatCompletionToolMessageParam,
-} from "openai/resources/chat/completions.js";
+} from "openai/resources/chat/completions";
 import { calculateCost } from "../models";
 import { getEnvApiKey } from "../stream";
 import type {
@@ -26,6 +26,7 @@ import type {
 } from "../types";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { parseStreamingJson } from "../utils/json-parse";
+import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode";
 import { transformMessages } from "./transorm-messages";
@@ -196,34 +197,44 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 					// Some endpoints return reasoning in reasoning_content (llama.cpp),
 					// or reasoning (other openai compatible endpoints)
+					// Use the first non-empty reasoning field to avoid duplication
+					// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)
 					const reasoningFields = ["reasoning_content", "reasoning", "reasoning_text"];
+					let foundReasoningField: string | null = null;
 					for (const field of reasoningFields) {
 						if (
 							(choice.delta as any)[field] !== null &&
 							(choice.delta as any)[field] !== undefined &&
 							(choice.delta as any)[field].length > 0
 						) {
-							if (!currentBlock || currentBlock.type !== "thinking") {
-								finishCurrentBlock(currentBlock);
-								currentBlock = {
-									type: "thinking",
-									thinking: "",
-									thinkingSignature: field,
-								};
-								output.content.push(currentBlock);
-								stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
+							if (!foundReasoningField) {
+								foundReasoningField = field;
+								break;
 							}
+						}
+					}
-							if (currentBlock.type === "thinking") {
-								const delta = (choice.delta as any)[field];
-								currentBlock.thinking += delta;
-								stream.push({
-									type: "thinking_delta",
-									contentIndex: blockIndex(),
-									delta,
-									partial: output,
-								});
-							}
+					if (foundReasoningField) {
+						if (!currentBlock || currentBlock.type !== "thinking") {
+							finishCurrentBlock(currentBlock);
+							currentBlock = {
+								type: "thinking",
+								thinking: "",
+								thinkingSignature: foundReasoningField,
+							};
+							output.content.push(currentBlock);
+							stream.push({ type: "thinking_start", contentIndex: blockIndex(), partial: output });
+						}
+						if (currentBlock.type === "thinking") {
+							const delta = (choice.delta as any)[foundReasoningField];
+							currentBlock.thinking += delta;
+							stream.push({
+								type: "thinking_delta",
+								contentIndex: blockIndex(),
+								delta,
+								partial: output,
+							});
 						}
 					}
@@ -296,7 +307,7 @@ export const streamOpenAICompletions: StreamFunction<"openai-completions"> = (
 		} catch (error) {
 			for (const block of output.content) delete (block as any).index;
 			output.stopReason = options?.signal?.aborted ? "aborted" : "error";
-			output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
+			output.errorMessage = formatErrorMessageWithRetryAfter(error);
 			stream.push({ type: "error", reason: output.stopReason, error: output });
 			stream.end();
 		}
@@ -480,10 +491,8 @@ function convertMessages(
 			const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
 			if (nonEmptyThinkingBlocks.length > 0) {
 				if (compat.requiresThinkingAsText) {
-					// Convert thinking blocks to text with <thinking> delimiters
-					const thinkingText = nonEmptyThinkingBlocks
-						.map((b) => `<thinking>\n${b.thinking}\n</thinking>`)
-						.join("\n");
+					// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
+					const thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n\n");
 					const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
 					if (textContent) {
 						textContent.unshift({ type: "text", text: thinkingText });
@@ -633,8 +642,7 @@ function detectCompatFromUrl(baseUrl: string): Required<OpenAICompat> {
 		baseUrl.includes("cerebras.ai") ||
 		baseUrl.includes("api.x.ai") ||
 		baseUrl.includes("mistral.ai") ||
-		baseUrl.includes("chutes.ai") ||
-		baseUrl.includes("localhost");
+		baseUrl.includes("chutes.ai");
 	const useMaxTokens = baseUrl.includes("mistral.ai") || baseUrl.includes("chutes.ai");

package/src/providers/openai-responses.ts CHANGED Viewed

@@ -9,7 +9,7 @@ import type {
 	ResponseInputText,
 	ResponseOutputMessage,
 	ResponseReasoningItem,
-} from "openai/resources/responses/responses.js";
+} from "openai/resources/responses/responses";
 import { calculateCost } from "../models";
 import { getEnvApiKey } from "../stream";
 import type {
@@ -27,6 +27,7 @@ import type {
 } from "../types";
 import { AssistantMessageEventStream } from "../utils/event-stream";
 import { parseStreamingJson } from "../utils/json-parse";
+import { formatErrorMessageWithRetryAfter } from "../utils/retry-after";
 import { sanitizeSurrogates } from "../utils/sanitize-unicode";
 import { transformMessages } from "./transorm-messages";
@@ -303,7 +304,7 @@ export const streamOpenAIResponses: StreamFunction<"openai-responses"> = (
 		} catch (error) {
 			for (const block of output.content) delete (block as any).index;
 			output.stopReason = options?.signal?.aborted ? "aborted" : "error";
-			output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
+			output.errorMessage = formatErrorMessageWithRetryAfter(error);
 			stream.push({ type: "error", reason: output.stopReason, error: output });
 			stream.end();
 		}

package/src/providers/transorm-messages.ts CHANGED Viewed

@@ -45,12 +45,13 @@ export function transformMessages<TApi extends Api>(messages: Message[], model:
 				assistantMsg.api !== model.api;
 			// Transform message from different provider/model
-			const transformedContent = assistantMsg.content.map((block) => {
+			const transformedContent = assistantMsg.content.flatMap((block) => {
 				if (block.type === "thinking") {
-					// Convert thinking block to text block with <thinking> tags
+					// Skip empty thinking blocks, convert others to plain text
+					if (!block.thinking || block.thinking.trim() === "") return [];
 					return {
 						type: "text" as const,
-						text: `<thinking>\n${block.thinking}\n</thinking>`,
+						text: block.thinking,
 					};
 				}
 				// Normalize tool call IDs for github-copilot cross-API switches

package/src/stream.ts CHANGED Viewed

@@ -21,8 +21,9 @@ import type {
 	KnownProvider,
 	Model,
 	OptionsForApi,
-	ReasoningEffort,
 	SimpleStreamOptions,
+	ThinkingBudgets,
+	ThinkingLevel,
 } from "./types";
 const VERTEX_ADC_CREDENTIALS_PATH = join(homedir(), ".config", "gcloud", "application_default_credentials.json");
@@ -64,7 +65,6 @@ export function getEnvApiKey(provider: any): string | undefined {
 		if (hasCredentials && hasProject && hasLocation) {
 			return "<authenticated>";
 		}
-		return undefined;
 	}
 	const envMap: Record<string, string> = {
@@ -76,6 +76,7 @@ export function getEnvApiKey(provider: any): string | undefined {
 		openrouter: "OPENROUTER_API_KEY",
 		zai: "ZAI_API_KEY",
 		mistral: "MISTRAL_API_KEY",
+		opencode: "OPENCODE_API_KEY",
 	};
 	const envVar = envMap[provider];
@@ -178,10 +179,11 @@ function mapOptionsForApi<TApi extends Api>(
 		maxTokens: options?.maxTokens || Math.min(model.maxTokens, 32000),
 		signal: options?.signal,
 		apiKey: apiKey || options?.apiKey,
+		sessionId: options?.sessionId,
 	};
 	// Helper to clamp xhigh to high for providers that don't support it
-	const clampReasoning = (effort: ReasoningEffort | undefined) => (effort === "xhigh" ? "high" : effort);
+	const clampReasoning = (effort: ThinkingLevel | undefined) => (effort === "xhigh" ? "high" : effort);
 	switch (model.api) {
 		case "anthropic-messages": {
@@ -192,15 +194,17 @@ function mapOptionsForApi<TApi extends Api>(
 			// Claude requires max_tokens > thinking.budget_tokens
 			// So we need to ensure maxTokens accounts for both thinking and output
-			const anthropicBudgets = {
+			const defaultBudgets: ThinkingBudgets = {
 				minimal: 1024,
 				low: 2048,
 				medium: 8192,
 				high: 16384,
 			};
+			const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
 			const minOutputTokens = 1024;
-			let thinkingBudget = anthropicBudgets[clampReasoning(options.reasoning)!];
+			const level = clampReasoning(options.reasoning)!;
+			let thinkingBudget = budgets[level]!;
 			// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
 			const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
@@ -261,7 +265,7 @@ function mapOptionsForApi<TApi extends Api>(
 				...base,
 				thinking: {
 					enabled: true,
-					budgetTokens: getGoogleBudget(googleModel, effort),
+					budgetTokens: getGoogleBudget(googleModel, effort, options?.thinkingBudgets),
 				},
 			} satisfies GoogleOptions;
 		}
@@ -287,15 +291,16 @@ function mapOptionsForApi<TApi extends Api>(
 			// Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
 			// Claude requires max_tokens > thinking.budget_tokens
 			// So we need to ensure maxTokens accounts for both thinking and output
-			const budgets: Record<ClampedReasoningEffort, number> = {
+			const defaultBudgets: ThinkingBudgets = {
 				minimal: 1024,
 				low: 2048,
 				medium: 8192,
 				high: 16384,
 			};
+			const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
 			const minOutputTokens = 1024;
-			let thinkingBudget = budgets[effort];
+			let thinkingBudget = budgets[effort]!;
 			// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
 			const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
@@ -316,22 +321,20 @@ function mapOptionsForApi<TApi extends Api>(
 		case "google-vertex": {
 			// Explicitly disable thinking when reasoning is not specified
-			// This is needed because Gemini has "dynamic thinking" enabled by default
 			if (!options?.reasoning) {
 				return { ...base, thinking: { enabled: false } } satisfies GoogleVertexOptions;
 			}
-			const googleModel = model as Model<"google-vertex">;
+			const vertexModel = model as Model<"google-vertex">;
 			const effort = clampReasoning(options.reasoning)!;
+			const geminiModel = vertexModel as unknown as Model<"google-generative-ai">;
-			// Gemini 3 models use thinkingLevel exclusively instead of thinkingBudget.
-			// https://ai.google.dev/gemini-api/docs/thinking#set-budget
-			if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
+			if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
 				return {
 					...base,
 					thinking: {
 						enabled: true,
-						level: getGemini3ThinkingLevel(effort, googleModel),
+						level: getGemini3ThinkingLevel(effort, geminiModel),
 					},
 				} satisfies GoogleVertexOptions;
 			}
@@ -340,7 +343,7 @@ function mapOptionsForApi<TApi extends Api>(
 				...base,
 				thinking: {
 					enabled: true,
-					budgetTokens: getGoogleBudget(googleModel, effort),
+					budgetTokens: getGoogleBudget(geminiModel, effort, options?.thinkingBudgets),
 				},
 			} satisfies GoogleVertexOptions;
 		}
@@ -353,21 +356,21 @@ function mapOptionsForApi<TApi extends Api>(
 	}
 }
-type ClampedReasoningEffort = Exclude<ReasoningEffort, "xhigh">;
+type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh">;
-function isGemini3ProModel(model: Model<"google-generative-ai"> | Model<"google-vertex">): boolean {
+function isGemini3ProModel(model: Model<"google-generative-ai">): boolean {
 	// Covers gemini-3-pro, gemini-3-pro-preview, and possible other prefixed ids in the future
 	return model.id.includes("3-pro");
 }
-function isGemini3FlashModel(model: Model<"google-generative-ai"> | Model<"google-vertex">): boolean {
+function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
 	// Covers gemini-3-flash, gemini-3-flash-preview, and possible other prefixed ids in the future
 	return model.id.includes("3-flash");
 }
 function getGemini3ThinkingLevel(
-	effort: ClampedReasoningEffort,
-	model: Model<"google-generative-ai"> | Model<"google-vertex">,
+	effort: ClampedThinkingLevel,
+	model: Model<"google-generative-ai">,
 ): GoogleThinkingLevel {
 	if (isGemini3ProModel(model)) {
 		// Gemini 3 Pro only supports LOW/HIGH (for now)
@@ -393,7 +396,7 @@ function getGemini3ThinkingLevel(
 	}
 }
-function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: string): GoogleThinkingLevel {
+function getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string): GoogleThinkingLevel {
 	if (modelId.includes("3-pro")) {
 		// Gemini 3 Pro only supports LOW/HIGH (for now)
 		switch (effort) {
@@ -419,12 +422,18 @@ function getGeminiCliThinkingLevel(effort: ClampedReasoningEffort, modelId: stri
 }
 function getGoogleBudget(
-	model: Model<"google-generative-ai"> | Model<"google-vertex">,
-	effort: ClampedReasoningEffort,
+	model: Model<"google-generative-ai">,
+	effort: ClampedThinkingLevel,
+	customBudgets?: ThinkingBudgets,
 ): number {
+	// Custom budgets take precedence if provided for this level
+	if (customBudgets?.[effort] !== undefined) {
+		return customBudgets[effort]!;
+	}
 	// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
 	if (model.id.includes("2.5-pro")) {
-		const budgets: Record<ClampedReasoningEffort, number> = {
+		const budgets: Record<ClampedThinkingLevel, number> = {
 			minimal: 128,
 			low: 2048,
 			medium: 8192,
@@ -435,7 +444,7 @@ function getGoogleBudget(
 	if (model.id.includes("2.5-flash")) {
 		// Covers 2.5-flash-lite as well
-		const budgets: Record<ClampedReasoningEffort, number> = {
+		const budgets: Record<ClampedThinkingLevel, number> = {
 			minimal: 128,
 			low: 2048,
 			medium: 8192,

package/src/types.ts CHANGED Viewed

@@ -54,10 +54,19 @@ export type KnownProvider =
 	| "cerebras"
 	| "openrouter"
 	| "zai"
-	| "mistral";
+	| "mistral"
+	| "opencode";
 export type Provider = KnownProvider | string;
-export type ReasoningEffort = "minimal" | "low" | "medium" | "high" | "xhigh";
+export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
+/** Token budgets for each thinking level (token-based providers only) */
+export interface ThinkingBudgets {
+	minimal?: number;
+	low?: number;
+	medium?: number;
+	high?: number;
+}
 // Base options all providers share
 export interface StreamOptions {
@@ -65,11 +74,19 @@ export interface StreamOptions {
 	maxTokens?: number;
 	signal?: AbortSignal;
 	apiKey?: string;
+	/**
+	 * Optional session identifier for providers that support session-based caching.
+	 * Providers can use this to enable prompt caching, request routing, or other
+	 * session-aware features. Ignored by providers that don't support it.
+	 */
+	sessionId?: string;
 }
 // Unified options with reasoning passed to streamSimple() and completeSimple()
 export interface SimpleStreamOptions extends StreamOptions {
-	reasoning?: ReasoningEffort;
+	reasoning?: ThinkingLevel;
+	/** Custom token budgets for thinking levels (token-based providers only) */
+	thinkingBudgets?: ThinkingBudgets;
 }
 // Generic StreamFunction with typed options
@@ -146,7 +163,7 @@ export interface ToolResultMessage<TDetails = any> {
 	toolName: string;
 	content: (TextContent | ImageContent)[]; // Supports text and images
 	details?: TDetails;
-	isError?: boolean;
+	isError: boolean;
 	timestamp: number; // Unix timestamp in milliseconds
 }