@oh-my-pi/pi-ai 4.9.0 → 5.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/models.ts +1 -1
- package/src/providers/anthropic.ts +7 -5
- package/src/stream.ts +60 -43
- package/src/types.ts +1 -6
package/package.json
CHANGED
package/src/models.ts
CHANGED
|
@@ -53,7 +53,7 @@ const XHIGH_MODELS = new Set(["gpt-5.1-codex-max", "gpt-5.2", "gpt-5.2-codex"]);
|
|
|
53
53
|
* Currently only certain OpenAI Codex models support this.
|
|
54
54
|
*/
|
|
55
55
|
export function supportsXhigh<TApi extends Api>(model: Model<TApi>): boolean {
|
|
56
|
-
return XHIGH_MODELS.has(model.id);
|
|
56
|
+
return XHIGH_MODELS.has(model.id) || model.api === "anthropic-messages";
|
|
57
57
|
}
|
|
58
58
|
|
|
59
59
|
/**
|
|
@@ -5,7 +5,7 @@ import type {
|
|
|
5
5
|
MessageParam,
|
|
6
6
|
} from "@anthropic-ai/sdk/resources/messages";
|
|
7
7
|
import { calculateCost } from "../models";
|
|
8
|
-
import { getEnvApiKey } from "../stream";
|
|
8
|
+
import { getEnvApiKey, OUTPUT_FALLBACK_BUFFER } from "../stream";
|
|
9
9
|
import type {
|
|
10
10
|
Api,
|
|
11
11
|
AssistantMessage,
|
|
@@ -479,10 +479,9 @@ function ensureMaxTokensForThinking(params: MessageCreateParamsStreaming, model:
|
|
|
479
479
|
if (budgetTokens <= 0) return;
|
|
480
480
|
|
|
481
481
|
const maxTokens = params.max_tokens ?? 0;
|
|
482
|
-
const
|
|
483
|
-
const requiredMaxTokens = model.maxTokens > 0 ? model.maxTokens : budgetTokens + fallbackBuffer;
|
|
482
|
+
const requiredMaxTokens = model.maxTokens > 0 ? model.maxTokens : budgetTokens + OUTPUT_FALLBACK_BUFFER;
|
|
484
483
|
if (maxTokens < requiredMaxTokens) {
|
|
485
|
-
params.max_tokens = requiredMaxTokens;
|
|
484
|
+
params.max_tokens = Math.min(requiredMaxTokens, model.maxTokens);
|
|
486
485
|
}
|
|
487
486
|
}
|
|
488
487
|
|
|
@@ -535,7 +534,10 @@ function buildParams(
|
|
|
535
534
|
}
|
|
536
535
|
|
|
537
536
|
disableThinkingIfToolChoiceForced(params);
|
|
538
|
-
|
|
537
|
+
|
|
538
|
+
if (!options?.interleavedThinking) {
|
|
539
|
+
ensureMaxTokensForThinking(params, model);
|
|
540
|
+
}
|
|
539
541
|
|
|
540
542
|
return params;
|
|
541
543
|
}
|
package/src/stream.ts
CHANGED
|
@@ -179,6 +179,26 @@ export async function completeSimple<TApi extends Api>(
|
|
|
179
179
|
return s.result();
|
|
180
180
|
}
|
|
181
181
|
|
|
182
|
+
const MIN_OUTPUT_TOKENS = 1024;
|
|
183
|
+
export const OUTPUT_FALLBACK_BUFFER = 4000;
|
|
184
|
+
const ANTHROPIC_USE_INTERLEAVED_THINKING = true;
|
|
185
|
+
|
|
186
|
+
const ANTHROPIC_THINKING: Record<ThinkingLevel, number> = {
|
|
187
|
+
minimal: 3072,
|
|
188
|
+
low: 6144,
|
|
189
|
+
medium: 12288,
|
|
190
|
+
high: 24576,
|
|
191
|
+
xhigh: 49152,
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
const GOOGLE_THINKING: Record<ThinkingLevel, number> = {
|
|
195
|
+
minimal: 1024,
|
|
196
|
+
low: 4096,
|
|
197
|
+
medium: 8192,
|
|
198
|
+
high: 16384,
|
|
199
|
+
xhigh: 24575,
|
|
200
|
+
};
|
|
201
|
+
|
|
182
202
|
function mapOptionsForApi<TApi extends Api>(
|
|
183
203
|
model: Model<TApi>,
|
|
184
204
|
options?: SimpleStreamOptions,
|
|
@@ -199,37 +219,43 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
199
219
|
switch (model.api) {
|
|
200
220
|
case "anthropic-messages": {
|
|
201
221
|
// Explicitly disable thinking when reasoning is not specified
|
|
202
|
-
|
|
222
|
+
const reasoning = options?.reasoning;
|
|
223
|
+
if (!reasoning) {
|
|
203
224
|
return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
|
|
204
225
|
}
|
|
205
226
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
227
|
+
let thinkingBudget = options.thinkingBudgets?.[reasoning] ?? ANTHROPIC_THINKING[reasoning];
|
|
228
|
+
if (thinkingBudget <= 0) {
|
|
229
|
+
return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if (ANTHROPIC_USE_INTERLEAVED_THINKING) {
|
|
233
|
+
return {
|
|
234
|
+
...base,
|
|
235
|
+
thinkingEnabled: true,
|
|
236
|
+
thinkingBudgetTokens: thinkingBudget,
|
|
237
|
+
} satisfies AnthropicOptions;
|
|
238
|
+
}
|
|
239
|
+
|
|
219
240
|
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
|
220
241
|
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
|
221
242
|
|
|
222
243
|
// If not enough room for thinking + output, reduce thinking budget
|
|
223
244
|
if (maxTokens <= thinkingBudget) {
|
|
224
|
-
thinkingBudget =
|
|
245
|
+
thinkingBudget = maxTokens - MIN_OUTPUT_TOKENS;
|
|
225
246
|
}
|
|
226
247
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
248
|
+
// If thinking budget is too low, disable thinking
|
|
249
|
+
if (thinkingBudget <= 0) {
|
|
250
|
+
return { ...base, thinkingEnabled: false } satisfies AnthropicOptions;
|
|
251
|
+
} else {
|
|
252
|
+
return {
|
|
253
|
+
...base,
|
|
254
|
+
maxTokens,
|
|
255
|
+
thinkingEnabled: true,
|
|
256
|
+
thinkingBudgetTokens: thinkingBudget,
|
|
257
|
+
} satisfies AnthropicOptions;
|
|
258
|
+
}
|
|
233
259
|
}
|
|
234
260
|
|
|
235
261
|
case "openai-completions":
|
|
@@ -299,35 +325,26 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
299
325
|
} satisfies GoogleGeminiCliOptions;
|
|
300
326
|
}
|
|
301
327
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
// So we need to ensure maxTokens accounts for both thinking and output
|
|
305
|
-
const defaultBudgets: ThinkingBudgets = {
|
|
306
|
-
minimal: 1024,
|
|
307
|
-
low: 2048,
|
|
308
|
-
medium: 8192,
|
|
309
|
-
high: 16384,
|
|
310
|
-
};
|
|
311
|
-
const budgets = { ...defaultBudgets, ...options?.thinkingBudgets };
|
|
312
|
-
|
|
313
|
-
const minOutputTokens = 1024;
|
|
314
|
-
let thinkingBudget = budgets[effort]!;
|
|
328
|
+
let thinkingBudget = options.thinkingBudgets?.[effort] ?? GOOGLE_THINKING[effort];
|
|
329
|
+
|
|
315
330
|
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
|
316
331
|
const maxTokens = Math.min((base.maxTokens || 0) + thinkingBudget, model.maxTokens);
|
|
317
332
|
|
|
318
333
|
// If not enough room for thinking + output, reduce thinking budget
|
|
319
334
|
if (maxTokens <= thinkingBudget) {
|
|
320
|
-
thinkingBudget = Math.max(0, maxTokens -
|
|
335
|
+
thinkingBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS) ?? 0;
|
|
321
336
|
}
|
|
322
337
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
338
|
+
// If thinking budget is too low, disable thinking
|
|
339
|
+
if (thinkingBudget <= 0) {
|
|
340
|
+
return { ...base, thinking: { enabled: false } } satisfies GoogleGeminiCliOptions;
|
|
341
|
+
} else {
|
|
342
|
+
return {
|
|
343
|
+
...base,
|
|
344
|
+
maxTokens,
|
|
345
|
+
thinking: { enabled: true, budgetTokens: thinkingBudget },
|
|
346
|
+
} satisfies GoogleGeminiCliOptions;
|
|
347
|
+
}
|
|
331
348
|
}
|
|
332
349
|
|
|
333
350
|
case "google-vertex": {
|
package/src/types.ts
CHANGED
|
@@ -82,12 +82,7 @@ export type Provider = KnownProvider | string;
|
|
|
82
82
|
export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
83
83
|
|
|
84
84
|
/** Token budgets for each thinking level (token-based providers only) */
|
|
85
|
-
export
|
|
86
|
-
minimal?: number;
|
|
87
|
-
low?: number;
|
|
88
|
-
medium?: number;
|
|
89
|
-
high?: number;
|
|
90
|
-
}
|
|
85
|
+
export type ThinkingBudgets = { [key in ThinkingLevel]?: number };
|
|
91
86
|
|
|
92
87
|
// Base options all providers share
|
|
93
88
|
export interface StreamOptions {
|