npm - @oh-my-pi/pi-ai - Versions diffs - 4.8.3 → 5.0.0 - Mend

@oh-my-pi/pi-ai 4.8.3 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json +1 -1
package/src/models.ts +1 -1
package/src/providers/anthropic.ts +7 -5
package/src/stream.ts +60 -43
package/src/types.ts +1 -6

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@oh-my-pi/pi-ai",
-	"version": "4.8.3",
+	"version": "5.0.0",
 	"description": "Unified LLM API with automatic model discovery and provider configuration",
 	"type": "module",
 	"main": "./src/index.ts",

package/src/models.ts CHANGED Viewed

@@ -53,7 +53,7 @@ const XHIGH_MODELS = new Set(["gpt-5.1-codex-max", "gpt-5.2", "gpt-5.2-codex"]);
  * Currently only certain OpenAI Codex models support this.
  */
 export function supportsXhigh<TApi extends Api>(model: Model<TApi>): boolean {
-	return XHIGH_MODELS.has(model.id);
+	return XHIGH_MODELS.has(model.id) || model.api === "anthropic-messages";
 }
 /**

package/src/providers/anthropic.ts CHANGED Viewed

@@ -5,7 +5,7 @@ import type {
 	MessageParam,
 } from "@anthropic-ai/sdk/resources/messages";
 import { calculateCost } from "../models";
-import { getEnvApiKey } from "../stream";
+import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
 import type {
 	Api,
 	AssistantMessage,
@@ -479,10 +479,9 @@ function ensureMaxTokensForThinking(params: MessageCreateParamsStreaming, model:
 	if (budgetTokens <= 0) return;
 	const maxTokens = params.max_tokens ?? 0;
-	const fallbackBuffer = 4000;
-	const requiredMaxTokens = model.maxTokens > 0 ? model.maxTokens : budgetTokens + fallbackBuffer;
+	const requiredMaxTokens = model.maxTokens > 0 ? model.maxTokens : budgetTokens + OUTPUT_FALLBACK_BUFFER;
 	if (maxTokens < requiredMaxTokens) {
-		params.max_tokens = requiredMaxTokens;
+		params.max_tokens = Math.min(requiredMaxTokens, model.maxTokens);
 	}
 }
@@ -535,7 +534,10 @@ function buildParams(
 	}
 	disableThinkingIfToolChoiceForced(params);
-	ensureMaxTokensForThinking(params, model);
+	if (!options?.interleavedThinking) {
+		ensureMaxTokensForThinking(params, model);
+	}
 	return params;
 }

package/src/stream.ts CHANGED Viewed

@@ -179,6 +179,26 @@ export async function completeSimple<TApi extends Api>(
 	return s.result();
 }
+const MIN_OUTPUT_TOKENS = 1024;
+export const OUTPUT_FALLBACK_BUFFER = 4000;
+const ANTHROPIC_USE_INTERLEAVED_THINKING = true;
+const ANTHROPIC_THINKING: Record<ThinkingLevel, number> = {
+	minimal: 3072,
+	low: 6144,
+	medium: 12288,
+	high: 24576,
+	xhigh: 49152,
+};
+const GOOGLE_THINKING: Record<ThinkingLevel, number> = {
+	minimal: 1024,
+	low: 4096,
+	medium: 8192,
+	high: 16384,
+	xhigh: 24575,
+};
 function mapOptionsForApi<TApi extends Api>(
 	model: Model<TApi>,
 	options?: SimpleStreamOptions,
@@ -199,37 +219,43 @@ function mapOptionsForApi<TApi extends Api>(
 	switch (model.api) {
 		case "anthropic-messages": {
 			// Explicitly disable thinking when reasoning is not specified
-			if (!options?.reasoning) {
+			const reasoning = options?.reasoning;
+			if (!reasoning) {
 				return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
 			}
-			// Claude requires max_tokens > thinking.budget_tokens
-			// So we need to ensure maxTokens accounts for both thinking and output
-			const defaultBudgets: ThinkingBudgets = {
-				minimal: 1024,
-				low: 2048,
-				medium: 8192,
-				high: 16384,
-			};
-			const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
-			const minOutputTokens = 1024;
-			const level = clampReasoning(options.reasoning)!;
-			let thinkingBudget = budgets[level]!;
+			let thinkingBudget = options.thinkingBudgets?.[reasoning] ?? ANTHROPIC_THINKING[reasoning];
+			if (thinkingBudget <= 0) {
+				return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
+			}
+			if (ANTHROPIC_USE_INTERLEAVED_THINKING) {
+				return {
+					...base,
+					thinkingEnabled: true,
+					thinkingBudgetTokens: thinkingBudget,
+				} satisfies AnthropicOptions;
+			}
 			// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
 			const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
 			// If not enough room for thinking + output, reduce thinking budget
 			if (maxTokens <= thinkingBudget) {
-				thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
+				thinkingBudget = maxTokens - MIN_OUTPUT_TOKENS;
 			}
-			return {
-				...base,
-				maxTokens,
-				thinkingEnabled: true,
-				thinkingBudgetTokens: thinkingBudget,
-			} satisfies AnthropicOptions;
+			// If thinking budget is too low, disable thinking
+			if (thinkingBudget <= 0) {
+				return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
+			} else {
+				return {
+					...base,
+					maxTokens,
+					thinkingEnabled: true,
+					thinkingBudgetTokens: thinkingBudget,
+				} satisfies AnthropicOptions;
+			}
 		}
 		case "openai-completions":
@@ -299,35 +325,26 @@ function mapOptionsForApi<TApi extends Api>(
 				} satisfies GoogleGeminiCliOptions;
 			}
-			// Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
-			// Claude requires max_tokens > thinking.budget_tokens
-			// So we need to ensure maxTokens accounts for both thinking and output
-			const defaultBudgets: ThinkingBudgets = {
-				minimal: 1024,
-				low: 2048,
-				medium: 8192,
-				high: 16384,
-			};
-			const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
-			const minOutputTokens = 1024;
-			let thinkingBudget = budgets[effort]!;
+			let thinkingBudget = options.thinkingBudgets?.[effort] ?? GOOGLE_THINKING[effort];
 			// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
 			const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
 			// If not enough room for thinking + output, reduce thinking budget
 			if (maxTokens <= thinkingBudget) {
-				thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
+				thinkingBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS) ?? 0;
 			}
-			return {
-				...base,
-				maxTokens,
-				thinking: {
-					enabled: true,
-					budgetTokens: thinkingBudget,
-				},
-			} satisfies GoogleGeminiCliOptions;
+			// If thinking budget is too low, disable thinking
+			if (thinkingBudget <= 0) {
+				return { ...base, thinking: { enabled: false } } satisfies GoogleGeminiCliOptions;
+			} else {
+				return {
+					...base,
+					maxTokens,
+					thinking: { enabled: true, budgetTokens: thinkingBudget },
+				} satisfies GoogleGeminiCliOptions;
+			}
 		}
 		case "google-vertex": {

package/src/types.ts CHANGED Viewed

@@ -82,12 +82,7 @@ export type Provider = KnownProvider | string;
 export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
 /** Token budgets for each thinking level (token-based providers only) */
-export interface ThinkingBudgets {
-	minimal?: number;
-	low?: number;
-	medium?: number;
-	high?: number;
-}
+export type ThinkingBudgets = { [key in ThinkingLevel]?: number };
 // Base options all providers share
 export interface StreamOptions {