@oh-my-pi/pi-ai 13.8.0 → 13.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -0
- package/package.json +2 -2
- package/src/index.ts +1 -0
- package/src/models.json +1 -1
- package/src/prompts/turn-aborted-guidance.md +4 -0
- package/src/providers/amazon-bedrock.ts +7 -6
- package/src/providers/anthropic.ts +40 -1
- package/src/providers/azure-openai-responses.ts +3 -3
- package/src/providers/gitlab-duo.ts +11 -6
- package/src/providers/kimi.ts +6 -4
- package/src/providers/openai-codex-responses.ts +2 -2
- package/src/providers/openai-completions.ts +5 -5
- package/src/providers/openai-responses.ts +3 -3
- package/src/providers/synthetic.ts +6 -4
- package/src/providers/transform-messages.ts +16 -8
- package/src/stream.ts +99 -102
- package/src/thinking.ts +85 -0
- package/src/types.ts +8 -3
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,51 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [13.9.2] - 2026-03-05
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Support for redacted thinking blocks in Anthropic messages, enabling secure handling of encrypted reasoning content
|
|
10
|
+
- Preservation of latest Anthropic thinking blocks and redacted thinking content during message transformation, even when switching between Anthropic models
|
|
11
|
+
|
|
12
|
+
### Changed
|
|
13
|
+
|
|
14
|
+
- Assistant message content now includes `RedactedThinkingContent` type alongside existing text, thinking, and tool call blocks
|
|
15
|
+
- Message transformation logic now preserves signed thinking blocks and redacted thinking for the latest assistant message in Anthropic conversations
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
|
|
19
|
+
- Fixed Unicode normalization to consistently apply `toWellFormed()` to all text content, including thinking blocks, ensuring proper handling of malformed UTF-16 sequences
|
|
20
|
+
|
|
21
|
+
## [13.9.1] - 2026-03-05
|
|
22
|
+
### Breaking Changes
|
|
23
|
+
|
|
24
|
+
- Removed `THINKING_LEVELS`, `ALL_THINKING_LEVELS`, `ALL_THINKING_MODES`, `THINKING_MODE_DESCRIPTIONS`, and `THINKING_MODE_LABELS` exports
|
|
25
|
+
- Renamed `formatThinking()` to `getThinkingMetadata()` with changed return type from string to `ThinkingMetadata` object
|
|
26
|
+
- Renamed `getAvailableThinkingLevel()` to `getAvailableThinkingLevels()` and added default parameter
|
|
27
|
+
- Renamed `getAvailableThinkingEffort()` to `getAvailableThinkingEfforts()` and added default parameter
|
|
28
|
+
|
|
29
|
+
### Added
|
|
30
|
+
|
|
31
|
+
- Added `ThinkingMetadata` type to provide structured access to thinking mode information (value, label, description)
|
|
32
|
+
|
|
33
|
+
## [13.9.0] - 2026-03-05
|
|
34
|
+
### Added
|
|
35
|
+
|
|
36
|
+
- Exported new thinking module with `ThinkingEffort`, `ThinkingLevel`, and `ThinkingMode` types for managing reasoning effort levels
|
|
37
|
+
- Added `getAvailableThinkingEffort()` function to determine supported thinking effort levels based on model capabilities
|
|
38
|
+
- Added `parseThinkingEffort()`, `parseThinkingLevel()`, and `parseThinkingMode()` functions for parsing thinking configuration strings
|
|
39
|
+
- Added `THINKING_LEVELS`, `ALL_THINKING_LEVELS`, and `ALL_THINKING_MODES` constants for iterating over available thinking options
|
|
40
|
+
- Added `THINKING_MODE_DESCRIPTIONS` and `THINKING_MODE_LABELS` for displaying thinking modes in user interfaces
|
|
41
|
+
- Added `formatThinking()` function to format thinking modes as compact display labels
|
|
42
|
+
|
|
43
|
+
### Changed
|
|
44
|
+
|
|
45
|
+
- Refactored thinking level handling to distinguish between `ThinkingEffort` (provider-level, no "off") and `ThinkingLevel` (user-facing, includes "off")
|
|
46
|
+
- Updated `ThinkingBudgets` type to use `ThinkingEffort` instead of `ThinkingLevel` for more precise token budget configuration
|
|
47
|
+
- Improved reasoning option handling to explicitly support "off" value for disabling reasoning across all providers
|
|
48
|
+
- Simplified thinking effort mapping logic by centralizing provider-specific clamping behavior
|
|
49
|
+
|
|
5
50
|
## [13.7.8] - 2026-03-04
|
|
6
51
|
|
|
7
52
|
### Added
|
|
@@ -9,6 +54,7 @@
|
|
|
9
54
|
- Added ZenMux provider support with mixed API routing: Anthropic-owned models discovered from `https://zenmux.ai/api/v1/models` now use the Anthropic transport (`https://zenmux.ai/api/anthropic`), while other ZenMux models use the OpenAI-compatible transport.
|
|
10
55
|
|
|
11
56
|
## [13.7.7] - 2026-03-04
|
|
57
|
+
|
|
12
58
|
### Changed
|
|
13
59
|
|
|
14
60
|
- Modified response ID normalization to preserve existing item ID prefixes when truncating oversized IDs
|
|
@@ -19,6 +65,7 @@
|
|
|
19
65
|
- Fixed handling of reasoning item IDs to remain untouched during response normalization while function call IDs are properly normalized
|
|
20
66
|
|
|
21
67
|
## [13.7.2] - 2026-03-04
|
|
68
|
+
|
|
22
69
|
### Added
|
|
23
70
|
|
|
24
71
|
- Added support for Kagi API key authentication via `login kagi` command
|
|
@@ -31,6 +78,7 @@
|
|
|
31
78
|
- Tool schema compilation is now cached per schema identity, eliminating redundant recompilation on every tool call
|
|
32
79
|
|
|
33
80
|
## [13.6.0] - 2026-03-03
|
|
81
|
+
|
|
34
82
|
### Added
|
|
35
83
|
|
|
36
84
|
- Added Anthropic Foundry gateway mode controlled by `CLAUDE_CODE_USE_FOUNDRY`, with support for `FOUNDRY_BASE_URL`, `ANTHROPIC_FOUNDRY_API_KEY`, `ANTHROPIC_CUSTOM_HEADERS`, and optional mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS`)
|
|
@@ -43,6 +91,7 @@
|
|
|
43
91
|
- Anthropic auth base-URL fallback now prefers `FOUNDRY_BASE_URL` when `CLAUDE_CODE_USE_FOUNDRY` is enabled
|
|
44
92
|
|
|
45
93
|
## [13.5.8] - 2026-03-02
|
|
94
|
+
|
|
46
95
|
### Fixed
|
|
47
96
|
|
|
48
97
|
- Fixed schema compatibility issue where patternProperties in tool parameters caused failures when converting to legacy Antigravity format
|
|
@@ -59,6 +108,7 @@
|
|
|
59
108
|
- Anthropic cache-control normalization now removes later `ttl: "1h"` entries when a default/5m block has already appeared earlier in evaluation order
|
|
60
109
|
|
|
61
110
|
## [13.5.3] - 2026-03-01
|
|
111
|
+
|
|
62
112
|
### Fixed
|
|
63
113
|
|
|
64
114
|
- Fixed tool argument coercion to handle malformed JSON with trailing wrapper braces by parsing leading JSON containers
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "13.
|
|
4
|
+
"version": "13.9.2",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
"@aws-sdk/client-bedrock-runtime": "^3",
|
|
42
42
|
"@bufbuild/protobuf": "^2.11",
|
|
43
43
|
"@google/genai": "^1.43",
|
|
44
|
-
"@oh-my-pi/pi-utils": "13.
|
|
44
|
+
"@oh-my-pi/pi-utils": "13.9.2",
|
|
45
45
|
"@sinclair/typebox": "^0.34",
|
|
46
46
|
"@smithy/node-http-handler": "^4.4",
|
|
47
47
|
"ajv": "^8.18",
|
package/src/index.ts
CHANGED
|
@@ -21,6 +21,7 @@ export * from "./providers/openai-responses";
|
|
|
21
21
|
export * from "./providers/synthetic";
|
|
22
22
|
export * from "./rate-limit-utils";
|
|
23
23
|
export * from "./stream";
|
|
24
|
+
export * from "./thinking";
|
|
24
25
|
export * from "./types";
|
|
25
26
|
export * from "./usage";
|
|
26
27
|
export * from "./usage/claude";
|
package/src/models.json
CHANGED
|
@@ -38967,4 +38967,4 @@
|
|
|
38967
38967
|
"maxTokens": 128000
|
|
38968
38968
|
}
|
|
38969
38969
|
}
|
|
38970
|
-
}
|
|
38970
|
+
}
|
|
@@ -22,6 +22,7 @@ import {
|
|
|
22
22
|
import { $env } from "@oh-my-pi/pi-utils";
|
|
23
23
|
import { NodeHttpHandler } from "@smithy/node-http-handler";
|
|
24
24
|
import { calculateCost } from "../models";
|
|
25
|
+
import type { ThinkingEffort, ThinkingLevel } from "../thinking";
|
|
25
26
|
import type {
|
|
26
27
|
Api,
|
|
27
28
|
AssistantMessage,
|
|
@@ -35,7 +36,6 @@ import type {
|
|
|
35
36
|
TextContent,
|
|
36
37
|
ThinkingBudgets,
|
|
37
38
|
ThinkingContent,
|
|
38
|
-
ThinkingLevel,
|
|
39
39
|
Tool,
|
|
40
40
|
ToolCall,
|
|
41
41
|
ToolResultMessage,
|
|
@@ -622,14 +622,15 @@ function buildAdditionalModelRequestFields(
|
|
|
622
622
|
model: Model<"bedrock-converse-stream">,
|
|
623
623
|
options: BedrockOptions,
|
|
624
624
|
): Record<string, any> | undefined {
|
|
625
|
-
|
|
625
|
+
const reasoning = options.reasoning;
|
|
626
|
+
if (!reasoning || !model.reasoning || reasoning === "off") {
|
|
626
627
|
return undefined;
|
|
627
628
|
}
|
|
628
629
|
|
|
629
630
|
if (model.id.includes("anthropic.claude")) {
|
|
630
631
|
// Opus 4.6+ / Sonnet 4.6+ uses adaptive thinking with effort levels
|
|
631
632
|
if (supportsAdaptiveThinking(model.id)) {
|
|
632
|
-
let effort = mapThinkingLevelToEffort(
|
|
633
|
+
let effort = mapThinkingLevelToEffort(reasoning);
|
|
633
634
|
// "max" effort is only supported on Opus 4.6; clamp to "high" for Sonnet 4.6
|
|
634
635
|
const supportsMax = model.id.includes("opus-4-6") || model.id.includes("opus-4.6");
|
|
635
636
|
if (effort === "max" && !supportsMax) {
|
|
@@ -642,7 +643,7 @@ function buildAdditionalModelRequestFields(
|
|
|
642
643
|
return result;
|
|
643
644
|
}
|
|
644
645
|
|
|
645
|
-
const defaultBudgets: Record<
|
|
646
|
+
const defaultBudgets: Record<ThinkingEffort, number> = {
|
|
646
647
|
minimal: 1024,
|
|
647
648
|
low: 2048,
|
|
648
649
|
medium: 8192,
|
|
@@ -651,8 +652,8 @@ function buildAdditionalModelRequestFields(
|
|
|
651
652
|
};
|
|
652
653
|
|
|
653
654
|
// Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
|
|
654
|
-
const level =
|
|
655
|
-
const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[
|
|
655
|
+
const level = reasoning === "xhigh" ? "high" : reasoning;
|
|
656
|
+
const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[level];
|
|
656
657
|
|
|
657
658
|
const result: Record<string, any> = {
|
|
658
659
|
thinking: {
|
|
@@ -18,6 +18,7 @@ import type {
|
|
|
18
18
|
ImageContent,
|
|
19
19
|
Message,
|
|
20
20
|
Model,
|
|
21
|
+
RedactedThinkingContent,
|
|
21
22
|
SimpleStreamOptions,
|
|
22
23
|
StopReason,
|
|
23
24
|
StreamFunction,
|
|
@@ -613,7 +614,12 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
613
614
|
body: params,
|
|
614
615
|
};
|
|
615
616
|
|
|
616
|
-
type Block = (
|
|
617
|
+
type Block = (
|
|
618
|
+
| ThinkingContent
|
|
619
|
+
| RedactedThinkingContent
|
|
620
|
+
| TextContent
|
|
621
|
+
| (ToolCall & { partialJson: string })
|
|
622
|
+
) & { index: number };
|
|
617
623
|
const blocks = output.content as Block[];
|
|
618
624
|
stream.push({ type: "start", partial: output });
|
|
619
625
|
// Retry loop for transient errors from the stream.
|
|
@@ -664,6 +670,13 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
664
670
|
contentIndex: output.content.length - 1,
|
|
665
671
|
partial: output,
|
|
666
672
|
});
|
|
673
|
+
} else if (event.content_block.type === "redacted_thinking") {
|
|
674
|
+
const block: Block = {
|
|
675
|
+
type: "redactedThinking",
|
|
676
|
+
data: event.content_block.data,
|
|
677
|
+
index: event.index,
|
|
678
|
+
};
|
|
679
|
+
output.content.push(block);
|
|
667
680
|
} else if (event.content_block.type === "tool_use") {
|
|
668
681
|
const block: Block = {
|
|
669
682
|
type: "toolCall",
|
|
@@ -1403,6 +1416,10 @@ export function convertAnthropicMessages(
|
|
|
1403
1416
|
}
|
|
1404
1417
|
} else if (msg.role === "assistant") {
|
|
1405
1418
|
const blocks: ContentBlockParam[] = [];
|
|
1419
|
+
const hasSignedThinking = msg.content.some(
|
|
1420
|
+
block =>
|
|
1421
|
+
block.type === "thinking" && !!block.thinkingSignature && block.thinkingSignature.trim().length > 0,
|
|
1422
|
+
);
|
|
1406
1423
|
|
|
1407
1424
|
for (const block of msg.content) {
|
|
1408
1425
|
if (block.type === "text") {
|
|
@@ -1412,6 +1429,22 @@ export function convertAnthropicMessages(
|
|
|
1412
1429
|
text: block.text.toWellFormed(),
|
|
1413
1430
|
});
|
|
1414
1431
|
} else if (block.type === "thinking") {
|
|
1432
|
+
if (hasSignedThinking) {
|
|
1433
|
+
if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
|
|
1434
|
+
if (block.thinking.trim().length === 0) continue;
|
|
1435
|
+
blocks.push({
|
|
1436
|
+
type: "text",
|
|
1437
|
+
text: block.thinking.toWellFormed(),
|
|
1438
|
+
});
|
|
1439
|
+
continue;
|
|
1440
|
+
}
|
|
1441
|
+
blocks.push({
|
|
1442
|
+
type: "thinking",
|
|
1443
|
+
thinking: block.thinking,
|
|
1444
|
+
signature: block.thinkingSignature,
|
|
1445
|
+
});
|
|
1446
|
+
continue;
|
|
1447
|
+
}
|
|
1415
1448
|
if (block.thinking.trim().length === 0) continue;
|
|
1416
1449
|
if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
|
|
1417
1450
|
blocks.push({
|
|
@@ -1425,6 +1458,12 @@ export function convertAnthropicMessages(
|
|
|
1425
1458
|
signature: block.thinkingSignature,
|
|
1426
1459
|
});
|
|
1427
1460
|
}
|
|
1461
|
+
} else if (block.type === "redactedThinking") {
|
|
1462
|
+
if (block.data.trim().length === 0) continue;
|
|
1463
|
+
blocks.push({
|
|
1464
|
+
type: "redacted_thinking",
|
|
1465
|
+
data: block.data,
|
|
1466
|
+
});
|
|
1428
1467
|
} else if (block.type === "toolCall") {
|
|
1429
1468
|
blocks.push({
|
|
1430
1469
|
type: "tool_use",
|
|
@@ -61,7 +61,7 @@ function resolveDeploymentName(model: Model<"azure-openai-responses">, options?:
|
|
|
61
61
|
|
|
62
62
|
// Azure OpenAI Responses-specific options
|
|
63
63
|
export interface AzureOpenAIResponsesOptions extends StreamOptions {
|
|
64
|
-
|
|
64
|
+
reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
65
65
|
reasoningSummary?: "auto" | "detailed" | "concise" | null;
|
|
66
66
|
azureApiVersion?: string;
|
|
67
67
|
azureResourceName?: string;
|
|
@@ -499,9 +499,9 @@ function buildParams(
|
|
|
499
499
|
// See: https://github.com/can1357/oh-my-pi/issues/41
|
|
500
500
|
params.include = ["reasoning.encrypted_content"];
|
|
501
501
|
|
|
502
|
-
if (options?.
|
|
502
|
+
if (options?.reasoning || options?.reasoningSummary) {
|
|
503
503
|
params.reasoning = {
|
|
504
|
-
effort: options?.
|
|
504
|
+
effort: options?.reasoning || "medium",
|
|
505
505
|
summary: options?.reasoningSummary || "auto",
|
|
506
506
|
};
|
|
507
507
|
} else {
|
|
@@ -267,6 +267,11 @@ export function streamGitLabDuo(
|
|
|
267
267
|
...options.headers,
|
|
268
268
|
};
|
|
269
269
|
|
|
270
|
+
const reasoningEffort =
|
|
271
|
+
options.reasoning === "off"
|
|
272
|
+
? undefined
|
|
273
|
+
: (options.reasoning as "minimal" | "low" | "medium" | "high" | "xhigh" | undefined);
|
|
274
|
+
|
|
270
275
|
const inner =
|
|
271
276
|
mapping.provider === "anthropic"
|
|
272
277
|
? streamAnthropic(
|
|
@@ -295,11 +300,11 @@ export function streamGitLabDuo(
|
|
|
295
300
|
sessionId: options.sessionId,
|
|
296
301
|
providerSessionState: options.providerSessionState,
|
|
297
302
|
onPayload: options.onPayload,
|
|
298
|
-
thinkingEnabled: Boolean(
|
|
299
|
-
thinkingBudgetTokens:
|
|
300
|
-
? (options.thinkingBudgets?.[
|
|
303
|
+
thinkingEnabled: Boolean(reasoningEffort) && model.reasoning,
|
|
304
|
+
thinkingBudgetTokens: reasoningEffort
|
|
305
|
+
? (options.thinkingBudgets?.[reasoningEffort] ?? ANTHROPIC_THINKING[reasoningEffort])
|
|
301
306
|
: undefined,
|
|
302
|
-
reasoning:
|
|
307
|
+
reasoning: reasoningEffort,
|
|
303
308
|
toolChoice: mapAnthropicToolChoice(options.toolChoice),
|
|
304
309
|
},
|
|
305
310
|
)
|
|
@@ -329,7 +334,7 @@ export function streamGitLabDuo(
|
|
|
329
334
|
sessionId: options.sessionId,
|
|
330
335
|
providerSessionState: options.providerSessionState,
|
|
331
336
|
onPayload: options.onPayload,
|
|
332
|
-
|
|
337
|
+
reasoning: reasoningEffort,
|
|
333
338
|
toolChoice: options.toolChoice,
|
|
334
339
|
} satisfies OpenAIResponsesOptions,
|
|
335
340
|
)
|
|
@@ -358,7 +363,7 @@ export function streamGitLabDuo(
|
|
|
358
363
|
sessionId: options.sessionId,
|
|
359
364
|
providerSessionState: options.providerSessionState,
|
|
360
365
|
onPayload: options.onPayload,
|
|
361
|
-
|
|
366
|
+
reasoning: reasoningEffort,
|
|
362
367
|
toolChoice: options.toolChoice,
|
|
363
368
|
} satisfies OpenAICompletionsOptions,
|
|
364
369
|
);
|
package/src/providers/kimi.ts
CHANGED
|
@@ -62,9 +62,10 @@ export function streamKimi(
|
|
|
62
62
|
|
|
63
63
|
// Calculate thinking budget from reasoning level
|
|
64
64
|
const reasoning = options?.reasoning;
|
|
65
|
-
const
|
|
66
|
-
const
|
|
67
|
-
|
|
65
|
+
const reasoningEffort = reasoning === "off" ? undefined : reasoning;
|
|
66
|
+
const thinkingEnabled = !!reasoningEffort && model.reasoning;
|
|
67
|
+
const thinkingBudget = reasoningEffort
|
|
68
|
+
? (options?.thinkingBudgets?.[reasoningEffort] ?? ANTHROPIC_THINKING[reasoningEffort])
|
|
68
69
|
: undefined;
|
|
69
70
|
|
|
70
71
|
const innerStream = streamAnthropic(anthropicModel, context, {
|
|
@@ -89,6 +90,7 @@ export function streamKimi(
|
|
|
89
90
|
}
|
|
90
91
|
} else {
|
|
91
92
|
// OpenAI format - use original model with Kimi headers
|
|
93
|
+
const reasoningEffort = options?.reasoning === "off" ? undefined : options?.reasoning;
|
|
92
94
|
const innerStream = streamOpenAICompletions(model, context, {
|
|
93
95
|
apiKey: options?.apiKey,
|
|
94
96
|
temperature: options?.temperature,
|
|
@@ -102,7 +104,7 @@ export function streamKimi(
|
|
|
102
104
|
headers: mergedHeaders,
|
|
103
105
|
sessionId: options?.sessionId,
|
|
104
106
|
onPayload: options?.onPayload,
|
|
105
|
-
|
|
107
|
+
reasoning: reasoningEffort,
|
|
106
108
|
});
|
|
107
109
|
|
|
108
110
|
for await (const event of innerStream) {
|
|
@@ -49,7 +49,7 @@ import { parseCodexError } from "./openai-codex/response-handler";
|
|
|
49
49
|
import { transformMessages } from "./transform-messages";
|
|
50
50
|
|
|
51
51
|
export interface OpenAICodexResponsesOptions extends StreamOptions {
|
|
52
|
-
|
|
52
|
+
reasoning?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
53
53
|
reasoningSummary?: "auto" | "concise" | "detailed" | null;
|
|
54
54
|
textVerbosity?: "low" | "medium" | "high";
|
|
55
55
|
include?: string[];
|
|
@@ -369,7 +369,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
|
|
|
369
369
|
params.instructions = systemPrompt.instructions;
|
|
370
370
|
|
|
371
371
|
const codexOptions: CodexRequestOptions = {
|
|
372
|
-
reasoningEffort: options?.
|
|
372
|
+
reasoningEffort: options?.reasoning,
|
|
373
373
|
reasoningSummary: options?.reasoningSummary ?? "auto",
|
|
374
374
|
textVerbosity: options?.textVerbosity,
|
|
375
375
|
include: options?.include,
|
|
@@ -109,7 +109,7 @@ function hasToolHistory(messages: Message[]): boolean {
|
|
|
109
109
|
|
|
110
110
|
export interface OpenAICompletionsOptions extends StreamOptions {
|
|
111
111
|
toolChoice?: ToolChoice;
|
|
112
|
-
|
|
112
|
+
reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
113
113
|
}
|
|
114
114
|
|
|
115
115
|
type OpenAICompletionsSamplingParams = OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming & {
|
|
@@ -611,13 +611,13 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
|
|
|
611
611
|
if (compat.thinkingFormat === "zai" && model.reasoning) {
|
|
612
612
|
// Z.ai uses binary thinking: { type: "enabled" | "disabled" }
|
|
613
613
|
// Must explicitly disable since z.ai defaults to thinking enabled
|
|
614
|
-
(params as any).thinking = { type: options?.
|
|
614
|
+
(params as any).thinking = { type: options?.reasoning ? "enabled" : "disabled" };
|
|
615
615
|
} else if (compat.thinkingFormat === "qwen" && model.reasoning) {
|
|
616
616
|
// Qwen uses enable_thinking: boolean
|
|
617
|
-
(params as any).enable_thinking = !!options?.
|
|
618
|
-
} else if (options?.
|
|
617
|
+
(params as any).enable_thinking = !!options?.reasoning;
|
|
618
|
+
} else if (options?.reasoning && model.reasoning && compat.supportsReasoningEffort) {
|
|
619
619
|
// OpenAI-style reasoning_effort
|
|
620
|
-
params.reasoning_effort = options
|
|
620
|
+
params.reasoning_effort = options?.reasoning;
|
|
621
621
|
}
|
|
622
622
|
|
|
623
623
|
// OpenRouter provider routing preferences
|
|
@@ -57,7 +57,7 @@ function getPromptCacheRetention(baseUrl: string, cacheRetention: CacheRetention
|
|
|
57
57
|
|
|
58
58
|
// OpenAI Responses-specific options
|
|
59
59
|
export interface OpenAIResponsesOptions extends StreamOptions {
|
|
60
|
-
|
|
60
|
+
reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
61
61
|
reasoningSummary?: "auto" | "detailed" | "concise" | null;
|
|
62
62
|
serviceTier?: ResponseCreateParamsStreaming["service_tier"];
|
|
63
63
|
toolChoice?: ToolChoice;
|
|
@@ -480,9 +480,9 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
|
|
|
480
480
|
// See: https://github.com/can1357/oh-my-pi/issues/41
|
|
481
481
|
params.include = ["reasoning.encrypted_content"];
|
|
482
482
|
|
|
483
|
-
if (options?.
|
|
483
|
+
if (options?.reasoning || options?.reasoningSummary) {
|
|
484
484
|
params.reasoning = {
|
|
485
|
-
effort: options?.
|
|
485
|
+
effort: options?.reasoning || "medium",
|
|
486
486
|
summary: options?.reasoningSummary || "auto",
|
|
487
487
|
};
|
|
488
488
|
} else {
|
|
@@ -59,9 +59,10 @@ export function streamSynthetic(
|
|
|
59
59
|
|
|
60
60
|
// Calculate thinking budget from reasoning level
|
|
61
61
|
const reasoning = options?.reasoning;
|
|
62
|
-
const
|
|
63
|
-
const
|
|
64
|
-
|
|
62
|
+
const reasoningEffort = reasoning === "off" ? undefined : reasoning;
|
|
63
|
+
const thinkingEnabled = !!reasoningEffort && model.reasoning;
|
|
64
|
+
const thinkingBudget = reasoningEffort
|
|
65
|
+
? (options?.thinkingBudgets?.[reasoningEffort] ?? ANTHROPIC_THINKING[reasoningEffort])
|
|
65
66
|
: undefined;
|
|
66
67
|
|
|
67
68
|
const innerStream = streamAnthropic(anthropicModel, context, {
|
|
@@ -92,6 +93,7 @@ export function streamSynthetic(
|
|
|
92
93
|
headers: mergedHeaders,
|
|
93
94
|
};
|
|
94
95
|
|
|
96
|
+
const reasoningEffort = options?.reasoning === "off" ? undefined : options?.reasoning;
|
|
95
97
|
const innerStream = streamOpenAICompletions(syntheticModel, context, {
|
|
96
98
|
apiKey: options?.apiKey,
|
|
97
99
|
temperature: options?.temperature,
|
|
@@ -105,7 +107,7 @@ export function streamSynthetic(
|
|
|
105
107
|
headers: mergedHeaders,
|
|
106
108
|
sessionId: options?.sessionId,
|
|
107
109
|
onPayload: options?.onPayload,
|
|
108
|
-
|
|
110
|
+
reasoning: reasoningEffort,
|
|
109
111
|
});
|
|
110
112
|
|
|
111
113
|
for await (const event of innerStream) {
|
|
@@ -1,11 +1,6 @@
|
|
|
1
|
+
import turnAbortedGuidance from "../prompts/turn-aborted-guidance.md" with { type: "text" };
|
|
1
2
|
import type { Api, AssistantMessage, DeveloperMessage, Message, Model, ToolCall, ToolResultMessage } from "../types";
|
|
2
3
|
|
|
3
|
-
const TURN_ABORTED_GUIDANCE =
|
|
4
|
-
"<turn-aborted>\n" +
|
|
5
|
-
"The previous turn was aborted. Any running tools/commands were terminated. " +
|
|
6
|
-
"If tools were aborted, they may have partially executed; verify current state before retrying.\n" +
|
|
7
|
-
"</turn-aborted>";
|
|
8
|
-
|
|
9
4
|
const enum ToolCallStatus {
|
|
10
5
|
/** Tool call has received a result (real or synthetic for orphan) */
|
|
11
6
|
Resolved = 1,
|
|
@@ -31,8 +26,9 @@ export function transformMessages<TApi extends Api>(
|
|
|
31
26
|
// Build a map of original tool call IDs to normalized IDs
|
|
32
27
|
const toolCallIdMap = new Map<string, string>();
|
|
33
28
|
|
|
29
|
+
const latestAssistantIndex = messages.findLastIndex(msg => msg.role === "assistant");
|
|
34
30
|
// First pass: transform messages (thinking blocks, tool call ID normalization)
|
|
35
|
-
const transformed = messages.map(msg => {
|
|
31
|
+
const transformed = messages.map((msg, index) => {
|
|
36
32
|
// User and developer messages pass through unchanged
|
|
37
33
|
if (msg.role === "user" || msg.role === "developer") {
|
|
38
34
|
return msg;
|
|
@@ -55,8 +51,14 @@ export function transformMessages<TApi extends Api>(
|
|
|
55
51
|
assistantMsg.api === model.api &&
|
|
56
52
|
assistantMsg.model === model.id;
|
|
57
53
|
|
|
54
|
+
const mustPreserveLatestAnthropicThinking =
|
|
55
|
+
index === latestAssistantIndex &&
|
|
56
|
+
model.api === "anthropic-messages" &&
|
|
57
|
+
assistantMsg.api === "anthropic-messages";
|
|
58
|
+
|
|
58
59
|
const transformedContent = assistantMsg.content.flatMap(block => {
|
|
59
60
|
if (block.type === "thinking") {
|
|
61
|
+
if (mustPreserveLatestAnthropicThinking) return block;
|
|
60
62
|
// For same model: keep thinking blocks with signatures (needed for replay)
|
|
61
63
|
// even if the thinking text is empty (OpenAI encrypted reasoning)
|
|
62
64
|
if (isSameModel && block.thinkingSignature) return block;
|
|
@@ -69,6 +71,12 @@ export function transformMessages<TApi extends Api>(
|
|
|
69
71
|
};
|
|
70
72
|
}
|
|
71
73
|
|
|
74
|
+
if (block.type === "redactedThinking") {
|
|
75
|
+
if (mustPreserveLatestAnthropicThinking) return block;
|
|
76
|
+
if (isSameModel) return block;
|
|
77
|
+
return [];
|
|
78
|
+
}
|
|
79
|
+
|
|
72
80
|
if (block.type === "text") {
|
|
73
81
|
if (isSameModel) return block;
|
|
74
82
|
return {
|
|
@@ -163,7 +171,7 @@ export function transformMessages<TApi extends Api>(
|
|
|
163
171
|
// Inject turn_aborted guidance marker as developer message
|
|
164
172
|
result.push({
|
|
165
173
|
role: "developer",
|
|
166
|
-
content:
|
|
174
|
+
content: turnAbortedGuidance,
|
|
167
175
|
timestamp: assistantMsg.timestamp + 1,
|
|
168
176
|
} as DeveloperMessage);
|
|
169
177
|
|
package/src/stream.ts
CHANGED
|
@@ -21,6 +21,7 @@ import { streamOpenAICodexResponses } from "./providers/openai-codex-responses";
|
|
|
21
21
|
import { type OpenAICompletionsOptions, streamOpenAICompletions } from "./providers/openai-completions";
|
|
22
22
|
import { streamOpenAIResponses } from "./providers/openai-responses";
|
|
23
23
|
import { isSyntheticModel, streamSynthetic } from "./providers/synthetic";
|
|
24
|
+
import type { ThinkingEffort } from "./thinking";
|
|
24
25
|
import type {
|
|
25
26
|
Api,
|
|
26
27
|
AssistantMessage,
|
|
@@ -31,7 +32,6 @@ import type {
|
|
|
31
32
|
SimpleStreamOptions,
|
|
32
33
|
StreamOptions,
|
|
33
34
|
ThinkingBudgets,
|
|
34
|
-
ThinkingLevel,
|
|
35
35
|
ToolChoice,
|
|
36
36
|
} from "./types";
|
|
37
37
|
|
|
@@ -301,9 +301,9 @@ export async function completeSimple<TApi extends Api>(
|
|
|
301
301
|
|
|
302
302
|
const MIN_OUTPUT_TOKENS = 1024;
|
|
303
303
|
export const OUTPUT_FALLBACK_BUFFER = 4000;
|
|
304
|
-
const ANTHROPIC_USE_INTERLEAVED_THINKING =
|
|
304
|
+
const ANTHROPIC_USE_INTERLEAVED_THINKING = Bun.env.PI_NO_INTERLEAVED_THINKING !== "1";
|
|
305
305
|
|
|
306
|
-
export const ANTHROPIC_THINKING: Record<
|
|
306
|
+
export const ANTHROPIC_THINKING: Record<ThinkingEffort, number> = {
|
|
307
307
|
minimal: 1024,
|
|
308
308
|
low: 4096,
|
|
309
309
|
medium: 8192,
|
|
@@ -311,7 +311,7 @@ export const ANTHROPIC_THINKING: Record<ThinkingLevel, number> = {
|
|
|
311
311
|
xhigh: 32768,
|
|
312
312
|
};
|
|
313
313
|
|
|
314
|
-
const GOOGLE_THINKING: Record<
|
|
314
|
+
const GOOGLE_THINKING: Record<ThinkingEffort, number> = {
|
|
315
315
|
minimal: 1024,
|
|
316
316
|
low: 4096,
|
|
317
317
|
medium: 8192,
|
|
@@ -319,7 +319,7 @@ const GOOGLE_THINKING: Record<ThinkingLevel, number> = {
|
|
|
319
319
|
xhigh: 24575,
|
|
320
320
|
};
|
|
321
321
|
|
|
322
|
-
const BEDROCK_CLAUDE_THINKING: Record<
|
|
322
|
+
const BEDROCK_CLAUDE_THINKING: Record<ThinkingEffort, number> = {
|
|
323
323
|
minimal: 1024,
|
|
324
324
|
low: 2048,
|
|
325
325
|
medium: 8192,
|
|
@@ -330,8 +330,8 @@ const BEDROCK_CLAUDE_THINKING: Record<ThinkingLevel, number> = {
|
|
|
330
330
|
function resolveBedrockThinkingBudget(
|
|
331
331
|
model: Model<"bedrock-converse-stream">,
|
|
332
332
|
options?: SimpleStreamOptions,
|
|
333
|
-
): { budget: number; level:
|
|
334
|
-
if (!options?.reasoning || !model.reasoning) return null;
|
|
333
|
+
): { budget: number; level: ThinkingEffort } | null {
|
|
334
|
+
if (!options?.reasoning || !model.reasoning || options.reasoning === "off") return null;
|
|
335
335
|
if (!model.id.includes("anthropic.claude")) return null;
|
|
336
336
|
const level = options.reasoning === "xhigh" ? "high" : options.reasoning;
|
|
337
337
|
const budget = options.thinkingBudgets?.[level] ?? BEDROCK_CLAUDE_THINKING[level];
|
|
@@ -358,7 +358,7 @@ export function mapAnthropicToolChoice(choice?: ToolChoice): AnthropicOptions["t
|
|
|
358
358
|
/**
|
|
359
359
|
* Map ThinkingLevel to Anthropic effort levels for adaptive thinking (Opus 4.6+)
|
|
360
360
|
*/
|
|
361
|
-
function mapThinkingLevelToAnthropicEffort(level:
|
|
361
|
+
function mapThinkingLevelToAnthropicEffort(level: ThinkingEffort, supportsXhigh: boolean): AnthropicOptions["effort"] {
|
|
362
362
|
switch (level) {
|
|
363
363
|
case "minimal":
|
|
364
364
|
return "low";
|
|
@@ -369,7 +369,7 @@ function mapThinkingLevelToAnthropicEffort(level: ThinkingLevel): AnthropicOptio
|
|
|
369
369
|
case "high":
|
|
370
370
|
return "high";
|
|
371
371
|
case "xhigh":
|
|
372
|
-
return "max";
|
|
372
|
+
return supportsXhigh ? "max" : "high";
|
|
373
373
|
default:
|
|
374
374
|
return "high";
|
|
375
375
|
}
|
|
@@ -404,6 +404,18 @@ function mapOpenAiToolChoice(choice?: ToolChoice): OpenAICompletionsOptions["too
|
|
|
404
404
|
return undefined;
|
|
405
405
|
}
|
|
406
406
|
|
|
407
|
+
function resolveOpenAiReasoningEffort<TApi extends Api>(
|
|
408
|
+
model: Model<TApi>,
|
|
409
|
+
options?: SimpleStreamOptions,
|
|
410
|
+
): ThinkingEffort | undefined {
|
|
411
|
+
const reasoning = options?.reasoning;
|
|
412
|
+
if (!reasoning || reasoning === "off") return undefined;
|
|
413
|
+
if (reasoning === "xhigh" && !supportsXhigh(model)) return "high";
|
|
414
|
+
return reasoning;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
const castApi = <TApi extends Api>(api: OptionsForApi<TApi>): OptionsForApi<Api> => api as OptionsForApi<Api>;
|
|
418
|
+
|
|
407
419
|
function mapOptionsForApi<TApi extends Api>(
|
|
408
420
|
model: Model<TApi>,
|
|
409
421
|
options?: SimpleStreamOptions,
|
|
@@ -429,28 +441,25 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
429
441
|
execHandlers: options?.execHandlers,
|
|
430
442
|
};
|
|
431
443
|
|
|
432
|
-
// Helper to clamp xhigh to high for providers that don't support it
|
|
433
|
-
const clampReasoning = (effort: ThinkingLevel | undefined) => (effort === "xhigh" ? "high" : effort);
|
|
434
|
-
|
|
435
444
|
switch (model.api) {
|
|
436
445
|
case "anthropic-messages": {
|
|
437
446
|
// Explicitly disable thinking when reasoning is not specified
|
|
438
447
|
const reasoning = options?.reasoning;
|
|
439
|
-
if (!reasoning) {
|
|
440
|
-
return {
|
|
448
|
+
if (!reasoning || reasoning === "off") {
|
|
449
|
+
return castApi<"anthropic-messages">({
|
|
441
450
|
...base,
|
|
442
451
|
thinkingEnabled: false,
|
|
443
452
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
444
|
-
}
|
|
453
|
+
});
|
|
445
454
|
}
|
|
446
455
|
|
|
447
456
|
let thinkingBudget = options.thinkingBudgets?.[reasoning] ?? ANTHROPIC_THINKING[reasoning];
|
|
448
457
|
if (thinkingBudget <= 0) {
|
|
449
|
-
return {
|
|
458
|
+
return castApi<"anthropic-messages">({
|
|
450
459
|
...base,
|
|
451
460
|
thinkingEnabled: false,
|
|
452
461
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
453
|
-
}
|
|
462
|
+
});
|
|
454
463
|
}
|
|
455
464
|
|
|
456
465
|
// For Opus 4.6+ and Sonnet 4.6+: use adaptive thinking with effort level
|
|
@@ -462,24 +471,22 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
462
471
|
model.id.includes("sonnet-4.6")
|
|
463
472
|
) {
|
|
464
473
|
const supportsMaxEffort = model.id.includes("opus-4-6") || model.id.includes("opus-4.6");
|
|
465
|
-
const effort = mapThinkingLevelToAnthropicEffort(
|
|
466
|
-
|
|
467
|
-
);
|
|
468
|
-
return {
|
|
474
|
+
const effort = mapThinkingLevelToAnthropicEffort(reasoning, supportsMaxEffort);
|
|
475
|
+
return castApi<"anthropic-messages">({
|
|
469
476
|
...base,
|
|
470
477
|
thinkingEnabled: true,
|
|
471
478
|
effort,
|
|
472
479
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
473
|
-
}
|
|
480
|
+
});
|
|
474
481
|
}
|
|
475
482
|
|
|
476
483
|
if (ANTHROPIC_USE_INTERLEAVED_THINKING) {
|
|
477
|
-
return {
|
|
484
|
+
return castApi<"anthropic-messages">({
|
|
478
485
|
...base,
|
|
479
486
|
thinkingEnabled: true,
|
|
480
487
|
thinkingBudgetTokens: thinkingBudget,
|
|
481
488
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
482
|
-
}
|
|
489
|
+
});
|
|
483
490
|
}
|
|
484
491
|
|
|
485
492
|
// Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
|
|
@@ -492,19 +499,19 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
492
499
|
|
|
493
500
|
// If thinking budget is too low, disable thinking
|
|
494
501
|
if (thinkingBudget <= 0) {
|
|
495
|
-
return {
|
|
502
|
+
return castApi<"anthropic-messages">({
|
|
496
503
|
...base,
|
|
497
504
|
thinkingEnabled: false,
|
|
498
505
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
499
|
-
}
|
|
506
|
+
});
|
|
500
507
|
} else {
|
|
501
|
-
return {
|
|
508
|
+
return castApi<"anthropic-messages">({
|
|
502
509
|
...base,
|
|
503
510
|
maxTokens,
|
|
504
511
|
thinkingEnabled: true,
|
|
505
512
|
thinkingBudgetTokens: thinkingBudget,
|
|
506
513
|
toolChoice: mapAnthropicToolChoice(options?.toolChoice),
|
|
507
|
-
}
|
|
514
|
+
});
|
|
508
515
|
}
|
|
509
516
|
}
|
|
510
517
|
|
|
@@ -529,96 +536,98 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
529
536
|
const adjustedBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS);
|
|
530
537
|
thinkingBudgets = { ...(thinkingBudgets ?? {}), [budgetInfo.level]: adjustedBudget };
|
|
531
538
|
}
|
|
532
|
-
return { ...bedrockBase, maxTokens, thinkingBudgets }
|
|
539
|
+
return castApi<"bedrock-converse-stream">({ ...bedrockBase, maxTokens, thinkingBudgets });
|
|
533
540
|
}
|
|
534
541
|
|
|
535
542
|
case "openai-completions":
|
|
536
|
-
return {
|
|
543
|
+
return castApi<"openai-completions">({
|
|
537
544
|
...base,
|
|
538
|
-
|
|
545
|
+
reasoning: resolveOpenAiReasoningEffort(model, options),
|
|
539
546
|
toolChoice: mapOpenAiToolChoice(options?.toolChoice),
|
|
540
|
-
}
|
|
547
|
+
});
|
|
541
548
|
|
|
542
549
|
case "openai-responses":
|
|
543
|
-
return {
|
|
550
|
+
return castApi<"openai-responses">({
|
|
544
551
|
...base,
|
|
545
|
-
|
|
552
|
+
reasoning: resolveOpenAiReasoningEffort(model, options),
|
|
546
553
|
toolChoice: mapOpenAiToolChoice(options?.toolChoice),
|
|
547
|
-
}
|
|
554
|
+
});
|
|
548
555
|
|
|
549
556
|
case "azure-openai-responses":
|
|
550
|
-
return {
|
|
557
|
+
return castApi<"azure-openai-responses">({
|
|
551
558
|
...base,
|
|
552
|
-
|
|
559
|
+
reasoning: resolveOpenAiReasoningEffort(model, options),
|
|
553
560
|
toolChoice: mapOpenAiToolChoice(options?.toolChoice),
|
|
554
|
-
}
|
|
561
|
+
});
|
|
555
562
|
|
|
556
563
|
case "openai-codex-responses":
|
|
557
|
-
return {
|
|
564
|
+
return castApi<"openai-codex-responses">({
|
|
558
565
|
...base,
|
|
559
|
-
|
|
566
|
+
reasoning: resolveOpenAiReasoningEffort(model, options),
|
|
560
567
|
toolChoice: mapOpenAiToolChoice(options?.toolChoice),
|
|
561
568
|
preferWebsockets: options?.preferWebsockets,
|
|
562
|
-
}
|
|
569
|
+
});
|
|
563
570
|
|
|
564
571
|
case "google-generative-ai": {
|
|
565
572
|
// Explicitly disable thinking when reasoning is not specified
|
|
566
573
|
// This is needed because Gemini has "dynamic thinking" enabled by default
|
|
567
|
-
|
|
568
|
-
|
|
574
|
+
const reasoning = options?.reasoning;
|
|
575
|
+
if (!reasoning || reasoning === "off") {
|
|
576
|
+
return castApi<"google-generative-ai">({
|
|
569
577
|
...base,
|
|
570
578
|
thinking: { enabled: false },
|
|
571
579
|
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
572
|
-
}
|
|
580
|
+
});
|
|
573
581
|
}
|
|
574
582
|
|
|
575
583
|
const googleModel = model as Model<"google-generative-ai">;
|
|
576
|
-
const effort =
|
|
584
|
+
const effort = reasoning === "xhigh" ? "high" : reasoning;
|
|
577
585
|
|
|
578
586
|
// Gemini 3+ models use thinkingLevel exclusively instead of thinkingBudget.
|
|
579
587
|
// https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
580
588
|
if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
|
|
581
|
-
return {
|
|
589
|
+
return castApi<"google-generative-ai">({
|
|
582
590
|
...base,
|
|
583
591
|
thinking: {
|
|
584
592
|
enabled: true,
|
|
585
593
|
level: getGemini3ThinkingLevel(effort, googleModel),
|
|
586
594
|
},
|
|
587
595
|
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
588
|
-
}
|
|
596
|
+
});
|
|
589
597
|
}
|
|
590
598
|
|
|
591
|
-
return {
|
|
599
|
+
return castApi<"google-gemini-cli">({
|
|
592
600
|
...base,
|
|
593
601
|
thinking: {
|
|
594
602
|
enabled: true,
|
|
595
603
|
budgetTokens: getGoogleBudget(googleModel, effort, options?.thinkingBudgets),
|
|
596
604
|
},
|
|
597
605
|
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
598
|
-
}
|
|
606
|
+
});
|
|
599
607
|
}
|
|
600
608
|
|
|
601
609
|
case "google-gemini-cli": {
|
|
602
|
-
|
|
603
|
-
|
|
610
|
+
const reasoning = options?.reasoning;
|
|
611
|
+
if (!reasoning || reasoning === "off") {
|
|
612
|
+
return castApi<"google-gemini-cli">({
|
|
604
613
|
...base,
|
|
605
614
|
thinking: { enabled: false },
|
|
606
615
|
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
607
|
-
}
|
|
616
|
+
});
|
|
608
617
|
}
|
|
609
618
|
|
|
610
|
-
const effort =
|
|
619
|
+
const effort = reasoning === "xhigh" ? "high" : reasoning;
|
|
611
620
|
|
|
612
621
|
// Gemini 3+ models use thinkingLevel instead of thinkingBudget
|
|
613
622
|
if (isGemini3ProModelId(model.id) || isGemini3FlashModelId(model.id)) {
|
|
614
|
-
return {
|
|
623
|
+
return castApi<"google-vertex">({
|
|
615
624
|
...base,
|
|
616
625
|
thinking: {
|
|
617
626
|
enabled: true,
|
|
618
627
|
level: getGeminiCliThinkingLevel(effort, model.id),
|
|
619
628
|
},
|
|
620
629
|
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
621
|
-
}
|
|
630
|
+
});
|
|
622
631
|
}
|
|
623
632
|
|
|
624
633
|
let thinkingBudget = options.thinkingBudgets?.[effort] ?? GOOGLE_THINKING[effort];
|
|
@@ -633,64 +642,65 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
633
642
|
|
|
634
643
|
// If thinking budget is too low, disable thinking
|
|
635
644
|
if (thinkingBudget <= 0) {
|
|
636
|
-
return {
|
|
645
|
+
return castApi<"google-gemini-cli">({
|
|
637
646
|
...base,
|
|
638
647
|
thinking: { enabled: false },
|
|
639
648
|
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
640
|
-
}
|
|
649
|
+
});
|
|
641
650
|
} else {
|
|
642
|
-
return {
|
|
651
|
+
return castApi<"google-gemini-cli">({
|
|
643
652
|
...base,
|
|
644
653
|
maxTokens,
|
|
645
654
|
thinking: { enabled: true, budgetTokens: thinkingBudget },
|
|
646
655
|
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
647
|
-
}
|
|
656
|
+
});
|
|
648
657
|
}
|
|
649
658
|
}
|
|
650
659
|
|
|
651
660
|
case "google-vertex": {
|
|
652
661
|
// Explicitly disable thinking when reasoning is not specified
|
|
653
|
-
|
|
654
|
-
|
|
662
|
+
const reasoning = options?.reasoning;
|
|
663
|
+
if (!reasoning || reasoning === "off") {
|
|
664
|
+
return castApi<"google-vertex">({
|
|
655
665
|
...base,
|
|
656
666
|
thinking: { enabled: false },
|
|
657
667
|
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
658
|
-
}
|
|
668
|
+
});
|
|
659
669
|
}
|
|
660
670
|
|
|
661
671
|
const vertexModel = model as Model<"google-vertex">;
|
|
662
|
-
const effort =
|
|
672
|
+
const effort = reasoning === "xhigh" ? "high" : reasoning;
|
|
663
673
|
const geminiModel = vertexModel as unknown as Model<"google-generative-ai">;
|
|
664
674
|
|
|
665
675
|
if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
|
|
666
|
-
return {
|
|
676
|
+
return castApi<"google-vertex">({
|
|
667
677
|
...base,
|
|
668
678
|
thinking: {
|
|
669
679
|
enabled: true,
|
|
670
680
|
level: getGemini3ThinkingLevel(effort, geminiModel),
|
|
671
681
|
},
|
|
672
682
|
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
673
|
-
}
|
|
683
|
+
});
|
|
674
684
|
}
|
|
675
685
|
|
|
676
|
-
return {
|
|
686
|
+
return castApi<"google-vertex">({
|
|
677
687
|
...base,
|
|
678
688
|
thinking: {
|
|
679
689
|
enabled: true,
|
|
680
690
|
budgetTokens: getGoogleBudget(geminiModel, effort, options?.thinkingBudgets),
|
|
681
691
|
},
|
|
682
692
|
toolChoice: mapGoogleToolChoice(options?.toolChoice),
|
|
683
|
-
}
|
|
693
|
+
});
|
|
684
694
|
}
|
|
685
695
|
|
|
686
696
|
case "cursor-agent": {
|
|
687
697
|
const execHandlers = options?.cursorExecHandlers ?? options?.execHandlers;
|
|
688
698
|
const onToolResult = options?.cursorOnToolResult ?? execHandlers?.onToolResult;
|
|
689
|
-
return {
|
|
699
|
+
return castApi<"cursor-agent">({
|
|
690
700
|
...base,
|
|
691
701
|
execHandlers,
|
|
692
702
|
onToolResult,
|
|
693
|
-
}
|
|
703
|
+
});
|
|
694
704
|
}
|
|
695
705
|
|
|
696
706
|
default:
|
|
@@ -698,8 +708,6 @@ function mapOptionsForApi<TApi extends Api>(
|
|
|
698
708
|
}
|
|
699
709
|
}
|
|
700
710
|
|
|
701
|
-
type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh">;
|
|
702
|
-
|
|
703
711
|
function isGemini3ProModelId(modelId: string): boolean {
|
|
704
712
|
return /3(?:\.\d+)?-pro/.test(modelId);
|
|
705
713
|
}
|
|
@@ -718,18 +726,14 @@ function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
|
|
|
718
726
|
return isGemini3FlashModelId(model.id);
|
|
719
727
|
}
|
|
720
728
|
|
|
721
|
-
function getGemini3ThinkingLevel(
|
|
722
|
-
effort: ClampedThinkingLevel,
|
|
723
|
-
model: Model<"google-generative-ai">,
|
|
724
|
-
): GoogleThinkingLevel {
|
|
729
|
+
function getGemini3ThinkingLevel(effort: ThinkingEffort, model: Model<"google-generative-ai">): GoogleThinkingLevel {
|
|
725
730
|
if (isGemini3ProModel(model)) {
|
|
726
731
|
// Gemini 3 Pro only supports LOW/HIGH (for now)
|
|
727
732
|
switch (effort) {
|
|
728
733
|
case "minimal":
|
|
729
734
|
case "low":
|
|
730
735
|
return "LOW";
|
|
731
|
-
|
|
732
|
-
case "high":
|
|
736
|
+
default:
|
|
733
737
|
return "HIGH";
|
|
734
738
|
}
|
|
735
739
|
}
|
|
@@ -741,20 +745,19 @@ function getGemini3ThinkingLevel(
|
|
|
741
745
|
return "LOW";
|
|
742
746
|
case "medium":
|
|
743
747
|
return "MEDIUM";
|
|
744
|
-
|
|
748
|
+
default:
|
|
745
749
|
return "HIGH";
|
|
746
750
|
}
|
|
747
751
|
}
|
|
748
752
|
|
|
749
|
-
function getGeminiCliThinkingLevel(effort:
|
|
753
|
+
function getGeminiCliThinkingLevel(effort: ThinkingEffort, modelId: string): GoogleThinkingLevel {
|
|
750
754
|
if (isGemini3ProModelId(modelId)) {
|
|
751
755
|
// Gemini 3 Pro only supports LOW/HIGH (for now)
|
|
752
756
|
switch (effort) {
|
|
753
757
|
case "minimal":
|
|
754
758
|
case "low":
|
|
755
759
|
return "LOW";
|
|
756
|
-
|
|
757
|
-
case "high":
|
|
760
|
+
default:
|
|
758
761
|
return "HIGH";
|
|
759
762
|
}
|
|
760
763
|
}
|
|
@@ -766,41 +769,35 @@ function getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string
|
|
|
766
769
|
return "LOW";
|
|
767
770
|
case "medium":
|
|
768
771
|
return "MEDIUM";
|
|
769
|
-
|
|
772
|
+
default:
|
|
770
773
|
return "HIGH";
|
|
771
774
|
}
|
|
772
775
|
}
|
|
773
776
|
|
|
774
777
|
function getGoogleBudget(
|
|
775
778
|
model: Model<"google-generative-ai">,
|
|
776
|
-
effort:
|
|
779
|
+
effort: ThinkingEffort,
|
|
777
780
|
customBudgets?: ThinkingBudgets,
|
|
778
781
|
): number {
|
|
782
|
+
effort = effort === "xhigh" ? "high" : effort;
|
|
783
|
+
|
|
779
784
|
// Custom budgets take precedence if provided for this level
|
|
780
785
|
if (customBudgets?.[effort] !== undefined) {
|
|
781
786
|
return customBudgets[effort]!;
|
|
782
787
|
}
|
|
783
788
|
|
|
784
789
|
// See https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
785
|
-
if (model.id.includes("2.5-
|
|
786
|
-
|
|
787
|
-
minimal:
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
// Covers 2.5-flash-lite as well
|
|
797
|
-
const budgets: Record<ClampedThinkingLevel, number> = {
|
|
798
|
-
minimal: 128,
|
|
799
|
-
low: 2048,
|
|
800
|
-
medium: 8192,
|
|
801
|
-
high: 24576,
|
|
802
|
-
};
|
|
803
|
-
return budgets[effort];
|
|
790
|
+
if (model.id.includes("2.5-")) {
|
|
791
|
+
switch (effort) {
|
|
792
|
+
case "minimal":
|
|
793
|
+
return 128;
|
|
794
|
+
case "low":
|
|
795
|
+
return 2048;
|
|
796
|
+
case "medium":
|
|
797
|
+
return 8192;
|
|
798
|
+
default:
|
|
799
|
+
return model.id.includes("2.5-flash") ? 24576 : 32768;
|
|
800
|
+
}
|
|
804
801
|
}
|
|
805
802
|
|
|
806
803
|
// Unknown model - use dynamic
|
package/src/thinking.ts
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/** Provider-level thinking levels (no "off"), ordered least to most. */
|
|
2
|
+
export type ThinkingEffort = "minimal" | "low" | "medium" | "high" | "xhigh";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* ThinkingLevel extended with "off" to disable reasoning entirely.
|
|
6
|
+
* Used in UI, config, session state, and CLI args.
|
|
7
|
+
* "off" is never sent to providers — callers strip it before streaming.
|
|
8
|
+
*/
|
|
9
|
+
export type ThinkingLevel = ThinkingEffort | "off";
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* ThinkingSelector extended with "inherit" to indicate the role should
|
|
13
|
+
* use the session-level default rather than an explicit choice.
|
|
14
|
+
* Used in per-role model assignment UI.
|
|
15
|
+
*/
|
|
16
|
+
export type ThinkingMode = ThinkingLevel | "inherit";
|
|
17
|
+
|
|
18
|
+
/** Metadata for a thinking mode. */
|
|
19
|
+
export type ThinkingMetadata = {
|
|
20
|
+
/** The value of the thinking mode. */
|
|
21
|
+
value: ThinkingMode;
|
|
22
|
+
/** The label to display for the thinking mode. */
|
|
23
|
+
label: string;
|
|
24
|
+
/** The description to display for the thinking mode. */
|
|
25
|
+
description: string;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const THINKING_META: Record<ThinkingMode, ThinkingMetadata> = {
|
|
29
|
+
inherit: { value: "inherit", label: "inherit", description: "Inherit session default" },
|
|
30
|
+
off: { value: "off", label: "off", description: "No reasoning" },
|
|
31
|
+
minimal: { value: "minimal", label: "min", description: "Very brief reasoning (~1k tokens)" },
|
|
32
|
+
low: { value: "low", label: "low", description: "Light reasoning (~2k tokens)" },
|
|
33
|
+
medium: { value: "medium", label: "medium", description: "Moderate reasoning (~8k tokens)" },
|
|
34
|
+
high: { value: "high", label: "high", description: "Deep reasoning (~16k tokens)" },
|
|
35
|
+
xhigh: { value: "xhigh", label: "xhigh", description: "Maximum reasoning (~32k tokens)" },
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
const F_LEVEL = 3;
|
|
39
|
+
const F_SEL = 2;
|
|
40
|
+
const F_MODE = 1;
|
|
41
|
+
|
|
42
|
+
const F_THINKING: Record<string, number> = {
|
|
43
|
+
inherit: F_MODE,
|
|
44
|
+
off: F_SEL,
|
|
45
|
+
minimal: F_LEVEL,
|
|
46
|
+
low: F_LEVEL,
|
|
47
|
+
medium: F_LEVEL,
|
|
48
|
+
high: F_LEVEL,
|
|
49
|
+
xhigh: F_LEVEL,
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
// Parses an unknown value and returns a ThinkingLevel if valid, otherwise undefined.
|
|
53
|
+
export function parseThinkingEffort(level: string | null | undefined): ThinkingEffort | undefined {
|
|
54
|
+
return level && (F_THINKING[level] ?? 0) >= F_LEVEL ? (level as ThinkingEffort) : undefined;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Parses an unknown value and returns a ThinkingSelector if valid, otherwise undefined.
|
|
58
|
+
export function parseThinkingLevel(level: string | null | undefined): ThinkingLevel | undefined {
|
|
59
|
+
return level && (F_THINKING[level] ?? 0) >= F_SEL ? (level as ThinkingLevel) : undefined;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Parses an unknown value and returns a ThinkingMode if valid, otherwise undefined.
|
|
63
|
+
export function parseThinkingMode(level: string | null | undefined): ThinkingMode | undefined {
|
|
64
|
+
return level && (F_THINKING[level] ?? 0) >= F_MODE ? (level as ThinkingMode) : undefined;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** Get the information for a thinking mode. */
|
|
68
|
+
export function getThinkingMetadata(mode: ThinkingMode): ThinkingMetadata {
|
|
69
|
+
return THINKING_META[mode];
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const REG_LVL: readonly ThinkingLevel[] = ["off", "minimal", "low", "medium", "high"];
|
|
73
|
+
const XHI_LVL: readonly ThinkingLevel[] = ["off", "minimal", "low", "medium", "high", "xhigh"];
|
|
74
|
+
|
|
75
|
+
/** Returns the available thinking modes for a model based on whether it supports xhigh. */
|
|
76
|
+
export function getAvailableThinkingLevels(hasXhigh: boolean = true): ReadonlyArray<ThinkingLevel> {
|
|
77
|
+
return hasXhigh ? XHI_LVL : REG_LVL;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const REG_EFF: readonly ThinkingEffort[] = ["minimal", "low", "medium", "high"];
|
|
81
|
+
const XHI_EFF: readonly ThinkingEffort[] = ["minimal", "low", "medium", "high", "xhigh"];
|
|
82
|
+
|
|
83
|
+
export function getAvailableThinkingEfforts(hasXhigh: boolean = true): ReadonlyArray<ThinkingEffort> {
|
|
84
|
+
return hasXhigh ? XHI_EFF : REG_EFF;
|
|
85
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -109,10 +109,10 @@ export type KnownProvider =
|
|
|
109
109
|
| "lm-studio";
|
|
110
110
|
export type Provider = KnownProvider | string;
|
|
111
111
|
|
|
112
|
-
|
|
112
|
+
import type { ThinkingEffort, ThinkingLevel } from "./thinking";
|
|
113
113
|
|
|
114
114
|
/** Token budgets for each thinking level (token-based providers only) */
|
|
115
|
-
export type ThinkingBudgets = { [key in
|
|
115
|
+
export type ThinkingBudgets = { [key in ThinkingEffort]?: number };
|
|
116
116
|
|
|
117
117
|
export type MessageAttribution = "user" | "agent";
|
|
118
118
|
|
|
@@ -224,6 +224,11 @@ export interface ThinkingContent {
|
|
|
224
224
|
thinkingSignature?: string; // e.g., for OpenAI responses, the reasoning item ID
|
|
225
225
|
}
|
|
226
226
|
|
|
227
|
+
export interface RedactedThinkingContent {
|
|
228
|
+
type: "redactedThinking";
|
|
229
|
+
data: string;
|
|
230
|
+
}
|
|
231
|
+
|
|
227
232
|
export interface ImageContent {
|
|
228
233
|
type: "image";
|
|
229
234
|
data: string; // base64 encoded image data
|
|
@@ -277,7 +282,7 @@ export interface DeveloperMessage {
|
|
|
277
282
|
|
|
278
283
|
export interface AssistantMessage {
|
|
279
284
|
role: "assistant";
|
|
280
|
-
content: (TextContent | ThinkingContent | ToolCall)[];
|
|
285
|
+
content: (TextContent | ThinkingContent | RedactedThinkingContent | ToolCall)[];
|
|
281
286
|
api: Api;
|
|
282
287
|
provider: Provider;
|
|
283
288
|
model: string;
|