@oh-my-pi/pi-ai 4.8.3 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oh-my-pi/pi-ai",
3
- "version": "4.8.3",
3
+ "version": "5.0.0",
4
4
  "description": "Unified LLM API with automatic model discovery and provider configuration",
5
5
  "type": "module",
6
6
  "main": "./src/index.ts",
package/src/models.ts CHANGED
@@ -53,7 +53,7 @@ const XHIGH_MODELS = new Set(["gpt-5.1-codex-max", "gpt-5.2", "gpt-5.2-codex"]);
53
53
  * Currently only certain OpenAI Codex models support this.
54
54
  */
55
55
  export function supportsXhigh<TApi extends Api>(model: Model<TApi>): boolean {
56
- return XHIGH_MODELS.has(model.id);
56
+ return XHIGH_MODELS.has(model.id) || model.api === "anthropic-messages";
57
57
  }
58
58
 
59
59
  /**
@@ -5,7 +5,7 @@ import type {
5
5
  MessageParam,
6
6
  } from "@anthropic-ai/sdk/resources/messages";
7
7
  import { calculateCost } from "../models";
8
- import { getEnvApiKey } from "../stream";
8
+ import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
9
9
  import type {
10
10
  Api,
11
11
  AssistantMessage,
@@ -479,10 +479,9 @@ function ensureMaxTokensForThinking(params: MessageCreateParamsStreaming, model:
479
479
  if (budgetTokens <= 0) return;
480
480
 
481
481
  const maxTokens = params.max_tokens ?? 0;
482
- const fallbackBuffer = 4000;
483
- const requiredMaxTokens = model.maxTokens > 0 ? model.maxTokens : budgetTokens + fallbackBuffer;
482
+ const requiredMaxTokens = model.maxTokens > 0 ? model.maxTokens : budgetTokens + OUTPUT_FALLBACK_BUFFER;
484
483
  if (maxTokens < requiredMaxTokens) {
485
- params.max_tokens = requiredMaxTokens;
484
+ params.max_tokens = Math.min(requiredMaxTokens, model.maxTokens);
486
485
  }
487
486
  }
488
487
 
@@ -535,7 +534,10 @@ function buildParams(
535
534
  }
536
535
 
537
536
  disableThinkingIfToolChoiceForced(params);
538
- ensureMaxTokensForThinking(params, model);
537
+
538
+ if (!options?.interleavedThinking) {
539
+ ensureMaxTokensForThinking(params, model);
540
+ }
539
541
 
540
542
  return params;
541
543
  }
package/src/stream.ts CHANGED
@@ -179,6 +179,26 @@ export async function completeSimple<TApi extends Api>(
179
179
  return s.result();
180
180
  }
181
181
 
182
+ const MIN_OUTPUT_TOKENS = 1024;
183
+ export const OUTPUT_FALLBACK_BUFFER = 4000;
184
+ const ANTHROPIC_USE_INTERLEAVED_THINKING = true;
185
+
186
+ const ANTHROPIC_THINKING: Record<ThinkingLevel, number> = {
187
+ minimal: 3072,
188
+ low: 6144,
189
+ medium: 12288,
190
+ high: 24576,
191
+ xhigh: 49152,
192
+ };
193
+
194
+ const GOOGLE_THINKING: Record<ThinkingLevel, number> = {
195
+ minimal: 1024,
196
+ low: 4096,
197
+ medium: 8192,
198
+ high: 16384,
199
+ xhigh: 24575,
200
+ };
201
+
182
202
  function mapOptionsForApi<TApi extends Api>(
183
203
  model: Model<TApi>,
184
204
  options?: SimpleStreamOptions,
@@ -199,37 +219,43 @@ function mapOptionsForApi<TApi extends Api>(
199
219
  switch (model.api) {
200
220
  case "anthropic-messages": {
201
221
  // Explicitly disable thinking when reasoning is not specified
202
- if (!options?.reasoning) {
222
+ const reasoning = options?.reasoning;
223
+ if (!reasoning) {
203
224
  return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
204
225
  }
205
226
 
206
- // Claude requires max_tokens > thinking.budget_tokens
207
- // So we need to ensure maxTokens accounts for both thinking and output
208
- const defaultBudgets: ThinkingBudgets = {
209
- minimal: 1024,
210
- low: 2048,
211
- medium: 8192,
212
- high: 16384,
213
- };
214
- const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
215
-
216
- const minOutputTokens = 1024;
217
- const level = clampReasoning(options.reasoning)!;
218
- let thinkingBudget = budgets[level]!;
227
+ let thinkingBudget = options.thinkingBudgets?.[reasoning] ?? ANTHROPIC_THINKING[reasoning];
228
+ if (thinkingBudget <= 0) {
229
+ return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
230
+ }
231
+
232
+ if (ANTHROPIC_USE_INTERLEAVED_THINKING) {
233
+ return {
234
+ ...base,
235
+ thinkingEnabled: true,
236
+ thinkingBudgetTokens: thinkingBudget,
237
+ } satisfies AnthropicOptions;
238
+ }
239
+
219
240
  // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
220
241
  const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
221
242
 
222
243
  // If not enough room for thinking + output, reduce thinking budget
223
244
  if (maxTokens <= thinkingBudget) {
224
- thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
245
+ thinkingBudget = maxTokens - MIN_OUTPUT_TOKENS;
225
246
  }
226
247
 
227
- return {
228
- ...base,
229
- maxTokens,
230
- thinkingEnabled: true,
231
- thinkingBudgetTokens: thinkingBudget,
232
- } satisfies AnthropicOptions;
248
+ // If thinking budget is too low, disable thinking
249
+ if (thinkingBudget <= 0) {
250
+ return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
251
+ } else {
252
+ return {
253
+ ...base,
254
+ maxTokens,
255
+ thinkingEnabled: true,
256
+ thinkingBudgetTokens: thinkingBudget,
257
+ } satisfies AnthropicOptions;
258
+ }
233
259
  }
234
260
 
235
261
  case "openai-completions":
@@ -299,35 +325,26 @@ function mapOptionsForApi<TApi extends Api>(
299
325
  } satisfies GoogleGeminiCliOptions;
300
326
  }
301
327
 
302
- // Models using thinkingBudget (Gemini 2.x, Claude via Antigravity)
303
- // Claude requires max_tokens > thinking.budget_tokens
304
- // So we need to ensure maxTokens accounts for both thinking and output
305
- const defaultBudgets: ThinkingBudgets = {
306
- minimal: 1024,
307
- low: 2048,
308
- medium: 8192,
309
- high: 16384,
310
- };
311
- const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
312
-
313
- const minOutputTokens = 1024;
314
- let thinkingBudget = budgets[effort]!;
328
+ let thinkingBudget = options.thinkingBudgets?.[effort] ?? GOOGLE_THINKING[effort];
329
+
315
330
  // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
316
331
  const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
317
332
 
318
333
  // If not enough room for thinking + output, reduce thinking budget
319
334
  if (maxTokens <= thinkingBudget) {
320
- thinkingBudget = Math.max(0, maxTokens - minOutputTokens);
335
+ thinkingBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS) ?? 0;
321
336
  }
322
337
 
323
- return {
324
- ...base,
325
- maxTokens,
326
- thinking: {
327
- enabled: true,
328
- budgetTokens: thinkingBudget,
329
- },
330
- } satisfies GoogleGeminiCliOptions;
338
+ // If thinking budget is too low, disable thinking
339
+ if (thinkingBudget <= 0) {
340
+ return { ...base, thinking: { enabled: false } } satisfies GoogleGeminiCliOptions;
341
+ } else {
342
+ return {
343
+ ...base,
344
+ maxTokens,
345
+ thinking: { enabled: true, budgetTokens: thinkingBudget },
346
+ } satisfies GoogleGeminiCliOptions;
347
+ }
331
348
  }
332
349
 
333
350
  case "google-vertex": {
package/src/types.ts CHANGED
@@ -82,12 +82,7 @@ export type Provider = KnownProvider | string;
82
82
  export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
83
83
 
84
84
  /** Token budgets for each thinking level (token-based providers only) */
85
- export interface ThinkingBudgets {
86
- minimal?: number;
87
- low?: number;
88
- medium?: number;
89
- high?: number;
90
- }
85
+ export type ThinkingBudgets = { [key in ThinkingLevel]?: number };
91
86
 
92
87
  // Base options all providers share
93
88
  export interface StreamOptions {