@oh-my-pi/pi-ai 13.8.0 → 13.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,51 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [13.9.2] - 2026-03-05
6
+
7
+ ### Added
8
+
9
+ - Support for redacted thinking blocks in Anthropic messages, enabling secure handling of encrypted reasoning content
10
+ - Preservation of latest Anthropic thinking blocks and redacted thinking content during message transformation, even when switching between Anthropic models
11
+
12
+ ### Changed
13
+
14
+ - Assistant message content now includes `RedactedThinkingContent` type alongside existing text, thinking, and tool call blocks
15
+ - Message transformation logic now preserves signed thinking blocks and redacted thinking for the latest assistant message in Anthropic conversations
16
+
17
+ ### Fixed
18
+
19
+ - Fixed Unicode normalization to consistently apply `toWellFormed()` to all text content, including thinking blocks, ensuring proper handling of malformed UTF-16 sequences
20
+
21
+ ## [13.9.1] - 2026-03-05
22
+ ### Breaking Changes
23
+
24
+ - Removed `THINKING_LEVELS`, `ALL_THINKING_LEVELS`, `ALL_THINKING_MODES`, `THINKING_MODE_DESCRIPTIONS`, and `THINKING_MODE_LABELS` exports
25
+ - Renamed `formatThinking()` to `getThinkingMetadata()` with changed return type from string to `ThinkingMetadata` object
26
+ - Renamed `getAvailableThinkingLevel()` to `getAvailableThinkingLevels()` and added default parameter
27
+ - Renamed `getAvailableThinkingEffort()` to `getAvailableThinkingEfforts()` and added default parameter
28
+
29
+ ### Added
30
+
31
+ - Added `ThinkingMetadata` type to provide structured access to thinking mode information (value, label, description)
32
+
33
+ ## [13.9.0] - 2026-03-05
34
+ ### Added
35
+
36
+ - Exported new thinking module with `ThinkingEffort`, `ThinkingLevel`, and `ThinkingMode` types for managing reasoning effort levels
37
+ - Added `getAvailableThinkingEffort()` function to determine supported thinking effort levels based on model capabilities
38
+ - Added `parseThinkingEffort()`, `parseThinkingLevel()`, and `parseThinkingMode()` functions for parsing thinking configuration strings
39
+ - Added `THINKING_LEVELS`, `ALL_THINKING_LEVELS`, and `ALL_THINKING_MODES` constants for iterating over available thinking options
40
+ - Added `THINKING_MODE_DESCRIPTIONS` and `THINKING_MODE_LABELS` for displaying thinking modes in user interfaces
41
+ - Added `formatThinking()` function to format thinking modes as compact display labels
42
+
43
+ ### Changed
44
+
45
+ - Refactored thinking level handling to distinguish between `ThinkingEffort` (provider-level, no "off") and `ThinkingLevel` (user-facing, includes "off")
46
+ - Updated `ThinkingBudgets` type to use `ThinkingEffort` instead of `ThinkingLevel` for more precise token budget configuration
47
+ - Improved reasoning option handling to explicitly support "off" value for disabling reasoning across all providers
48
+ - Simplified thinking effort mapping logic by centralizing provider-specific clamping behavior
49
+
5
50
  ## [13.7.8] - 2026-03-04
6
51
 
7
52
  ### Added
@@ -9,6 +54,7 @@
9
54
  - Added ZenMux provider support with mixed API routing: Anthropic-owned models discovered from `https://zenmux.ai/api/v1/models` now use the Anthropic transport (`https://zenmux.ai/api/anthropic`), while other ZenMux models use the OpenAI-compatible transport.
10
55
 
11
56
  ## [13.7.7] - 2026-03-04
57
+
12
58
  ### Changed
13
59
 
14
60
  - Modified response ID normalization to preserve existing item ID prefixes when truncating oversized IDs
@@ -19,6 +65,7 @@
19
65
  - Fixed handling of reasoning item IDs to remain untouched during response normalization while function call IDs are properly normalized
20
66
 
21
67
  ## [13.7.2] - 2026-03-04
68
+
22
69
  ### Added
23
70
 
24
71
  - Added support for Kagi API key authentication via `login kagi` command
@@ -31,6 +78,7 @@
31
78
  - Tool schema compilation is now cached per schema identity, eliminating redundant recompilation on every tool call
32
79
 
33
80
  ## [13.6.0] - 2026-03-03
81
+
34
82
  ### Added
35
83
 
36
84
  - Added Anthropic Foundry gateway mode controlled by `CLAUDE_CODE_USE_FOUNDRY`, with support for `FOUNDRY_BASE_URL`, `ANTHROPIC_FOUNDRY_API_KEY`, `ANTHROPIC_CUSTOM_HEADERS`, and optional mTLS material (`CLAUDE_CODE_CLIENT_CERT`, `CLAUDE_CODE_CLIENT_KEY`, `NODE_EXTRA_CA_CERTS`)
@@ -43,6 +91,7 @@
43
91
  - Anthropic auth base-URL fallback now prefers `FOUNDRY_BASE_URL` when `CLAUDE_CODE_USE_FOUNDRY` is enabled
44
92
 
45
93
  ## [13.5.8] - 2026-03-02
94
+
46
95
  ### Fixed
47
96
 
48
97
  - Fixed schema compatibility issue where patternProperties in tool parameters caused failures when converting to legacy Antigravity format
@@ -59,6 +108,7 @@
59
108
  - Anthropic cache-control normalization now removes later `ttl: "1h"` entries when a default/5m block has already appeared earlier in evaluation order
60
109
 
61
110
  ## [13.5.3] - 2026-03-01
111
+
62
112
  ### Fixed
63
113
 
64
114
  - Fixed tool argument coercion to handle malformed JSON with trailing wrapper braces by parsing leading JSON containers
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "13.8.0",
4
+ "version": "13.9.2",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -41,7 +41,7 @@
41
41
  "@aws-sdk/client-bedrock-runtime": "^3",
42
42
  "@bufbuild/protobuf": "^2.11",
43
43
  "@google/genai": "^1.43",
44
- "@oh-my-pi/pi-utils": "13.8.0",
44
+ "@oh-my-pi/pi-utils": "13.9.2",
45
45
  "@sinclair/typebox": "^0.34",
46
46
  "@smithy/node-http-handler": "^4.4",
47
47
  "ajv": "^8.18",
package/src/index.ts CHANGED
@@ -21,6 +21,7 @@ export * from "./providers/openai-responses";
21
21
  export * from "./providers/synthetic";
22
22
  export * from "./rate-limit-utils";
23
23
  export * from "./stream";
24
+ export * from "./thinking";
24
25
  export * from "./types";
25
26
  export * from "./usage";
26
27
  export * from "./usage/claude";
package/src/models.json CHANGED
@@ -38967,4 +38967,4 @@
38967
38967
  "maxTokens": 128000
38968
38968
  }
38969
38969
  }
38970
- }
38970
+ }
@@ -0,0 +1,4 @@
1
+ <turn-aborted>
2
+ The previous turn was aborted. Any running tools/commands were terminated.
3
+ If tools were aborted, they may have partially executed; verify current state before retrying.
4
+ </turn-aborted>
@@ -22,6 +22,7 @@ import {
22
22
  import { $env } from "@oh-my-pi/pi-utils";
23
23
  import { NodeHttpHandler } from "@smithy/node-http-handler";
24
24
  import { calculateCost } from "../models";
25
+ import type { ThinkingEffort, ThinkingLevel } from "../thinking";
25
26
  import type {
26
27
  Api,
27
28
  AssistantMessage,
@@ -35,7 +36,6 @@ import type {
35
36
  TextContent,
36
37
  ThinkingBudgets,
37
38
  ThinkingContent,
38
- ThinkingLevel,
39
39
  Tool,
40
40
  ToolCall,
41
41
  ToolResultMessage,
@@ -622,14 +622,15 @@ function buildAdditionalModelRequestFields(
622
622
  model: Model<"bedrock-converse-stream">,
623
623
  options: BedrockOptions,
624
624
  ): Record<string, any> | undefined {
625
- if (!options.reasoning || !model.reasoning) {
625
+ const reasoning = options.reasoning;
626
+ if (!reasoning || !model.reasoning || reasoning === "off") {
626
627
  return undefined;
627
628
  }
628
629
 
629
630
  if (model.id.includes("anthropic.claude")) {
630
631
  // Opus 4.6+ / Sonnet 4.6+ uses adaptive thinking with effort levels
631
632
  if (supportsAdaptiveThinking(model.id)) {
632
- let effort = mapThinkingLevelToEffort(options.reasoning);
633
+ let effort = mapThinkingLevelToEffort(reasoning);
633
634
  // "max" effort is only supported on Opus 4.6; clamp to "high" for Sonnet 4.6
634
635
  const supportsMax = model.id.includes("opus-4-6") || model.id.includes("opus-4.6");
635
636
  if (effort === "max" && !supportsMax) {
@@ -642,7 +643,7 @@ function buildAdditionalModelRequestFields(
642
643
  return result;
643
644
  }
644
645
 
645
- const defaultBudgets: Record<ThinkingLevel, number> = {
646
+ const defaultBudgets: Record<ThinkingEffort, number> = {
646
647
  minimal: 1024,
647
648
  low: 2048,
648
649
  medium: 8192,
@@ -651,8 +652,8 @@ function buildAdditionalModelRequestFields(
651
652
  };
652
653
 
653
654
  // Custom budgets override defaults (xhigh not in ThinkingBudgets, use high)
654
- const level = options.reasoning === "xhigh" ? "high" : options.reasoning;
655
- const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[options.reasoning];
655
+ const level = reasoning === "xhigh" ? "high" : reasoning;
656
+ const budget = options.thinkingBudgets?.[level] ?? defaultBudgets[level];
656
657
 
657
658
  const result: Record<string, any> = {
658
659
  thinking: {
@@ -18,6 +18,7 @@ import type {
18
18
  ImageContent,
19
19
  Message,
20
20
  Model,
21
+ RedactedThinkingContent,
21
22
  SimpleStreamOptions,
22
23
  StopReason,
23
24
  StreamFunction,
@@ -613,7 +614,12 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
613
614
  body: params,
614
615
  };
615
616
 
616
- type Block = (ThinkingContent | TextContent | (ToolCall & { partialJson: string })) & { index: number };
617
+ type Block = (
618
+ | ThinkingContent
619
+ | RedactedThinkingContent
620
+ | TextContent
621
+ | (ToolCall & { partialJson: string })
622
+ ) & { index: number };
617
623
  const blocks = output.content as Block[];
618
624
  stream.push({ type: "start", partial: output });
619
625
  // Retry loop for transient errors from the stream.
@@ -664,6 +670,13 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
664
670
  contentIndex: output.content.length - 1,
665
671
  partial: output,
666
672
  });
673
+ } else if (event.content_block.type === "redacted_thinking") {
674
+ const block: Block = {
675
+ type: "redactedThinking",
676
+ data: event.content_block.data,
677
+ index: event.index,
678
+ };
679
+ output.content.push(block);
667
680
  } else if (event.content_block.type === "tool_use") {
668
681
  const block: Block = {
669
682
  type: "toolCall",
@@ -1403,6 +1416,10 @@ export function convertAnthropicMessages(
1403
1416
  }
1404
1417
  } else if (msg.role === "assistant") {
1405
1418
  const blocks: ContentBlockParam[] = [];
1419
+ const hasSignedThinking = msg.content.some(
1420
+ block =>
1421
+ block.type === "thinking" && !!block.thinkingSignature && block.thinkingSignature.trim().length > 0,
1422
+ );
1406
1423
 
1407
1424
  for (const block of msg.content) {
1408
1425
  if (block.type === "text") {
@@ -1412,6 +1429,22 @@ export function convertAnthropicMessages(
1412
1429
  text: block.text.toWellFormed(),
1413
1430
  });
1414
1431
  } else if (block.type === "thinking") {
1432
+ if (hasSignedThinking) {
1433
+ if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
1434
+ if (block.thinking.trim().length === 0) continue;
1435
+ blocks.push({
1436
+ type: "text",
1437
+ text: block.thinking.toWellFormed(),
1438
+ });
1439
+ continue;
1440
+ }
1441
+ blocks.push({
1442
+ type: "thinking",
1443
+ thinking: block.thinking,
1444
+ signature: block.thinkingSignature,
1445
+ });
1446
+ continue;
1447
+ }
1415
1448
  if (block.thinking.trim().length === 0) continue;
1416
1449
  if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
1417
1450
  blocks.push({
@@ -1425,6 +1458,12 @@ export function convertAnthropicMessages(
1425
1458
  signature: block.thinkingSignature,
1426
1459
  });
1427
1460
  }
1461
+ } else if (block.type === "redactedThinking") {
1462
+ if (block.data.trim().length === 0) continue;
1463
+ blocks.push({
1464
+ type: "redacted_thinking",
1465
+ data: block.data,
1466
+ });
1428
1467
  } else if (block.type === "toolCall") {
1429
1468
  blocks.push({
1430
1469
  type: "tool_use",
@@ -61,7 +61,7 @@ function resolveDeploymentName(model: Model<"azure-openai-responses">, options?:
61
61
 
62
62
  // Azure OpenAI Responses-specific options
63
63
  export interface AzureOpenAIResponsesOptions extends StreamOptions {
64
- reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
64
+ reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
65
65
  reasoningSummary?: "auto" | "detailed" | "concise" | null;
66
66
  azureApiVersion?: string;
67
67
  azureResourceName?: string;
@@ -499,9 +499,9 @@ function buildParams(
499
499
  // See: https://github.com/can1357/oh-my-pi/issues/41
500
500
  params.include = ["reasoning.encrypted_content"];
501
501
 
502
- if (options?.reasoningEffort || options?.reasoningSummary) {
502
+ if (options?.reasoning || options?.reasoningSummary) {
503
503
  params.reasoning = {
504
- effort: options?.reasoningEffort || "medium",
504
+ effort: options?.reasoning || "medium",
505
505
  summary: options?.reasoningSummary || "auto",
506
506
  };
507
507
  } else {
@@ -267,6 +267,11 @@ export function streamGitLabDuo(
267
267
  ...options.headers,
268
268
  };
269
269
 
270
+ const reasoningEffort =
271
+ options.reasoning === "off"
272
+ ? undefined
273
+ : (options.reasoning as "minimal" | "low" | "medium" | "high" | "xhigh" | undefined);
274
+
270
275
  const inner =
271
276
  mapping.provider === "anthropic"
272
277
  ? streamAnthropic(
@@ -295,11 +300,11 @@ export function streamGitLabDuo(
295
300
  sessionId: options.sessionId,
296
301
  providerSessionState: options.providerSessionState,
297
302
  onPayload: options.onPayload,
298
- thinkingEnabled: Boolean(options.reasoning) && model.reasoning,
299
- thinkingBudgetTokens: options.reasoning
300
- ? (options.thinkingBudgets?.[options.reasoning] ?? ANTHROPIC_THINKING[options.reasoning])
303
+ thinkingEnabled: Boolean(reasoningEffort) && model.reasoning,
304
+ thinkingBudgetTokens: reasoningEffort
305
+ ? (options.thinkingBudgets?.[reasoningEffort] ?? ANTHROPIC_THINKING[reasoningEffort])
301
306
  : undefined,
302
- reasoning: options.reasoning,
307
+ reasoning: reasoningEffort,
303
308
  toolChoice: mapAnthropicToolChoice(options.toolChoice),
304
309
  },
305
310
  )
@@ -329,7 +334,7 @@ export function streamGitLabDuo(
329
334
  sessionId: options.sessionId,
330
335
  providerSessionState: options.providerSessionState,
331
336
  onPayload: options.onPayload,
332
- reasoningEffort: options.reasoning,
337
+ reasoning: reasoningEffort,
333
338
  toolChoice: options.toolChoice,
334
339
  } satisfies OpenAIResponsesOptions,
335
340
  )
@@ -358,7 +363,7 @@ export function streamGitLabDuo(
358
363
  sessionId: options.sessionId,
359
364
  providerSessionState: options.providerSessionState,
360
365
  onPayload: options.onPayload,
361
- reasoningEffort: options.reasoning,
366
+ reasoning: reasoningEffort,
362
367
  toolChoice: options.toolChoice,
363
368
  } satisfies OpenAICompletionsOptions,
364
369
  );
@@ -62,9 +62,10 @@ export function streamKimi(
62
62
 
63
63
  // Calculate thinking budget from reasoning level
64
64
  const reasoning = options?.reasoning;
65
- const thinkingEnabled = !!reasoning && model.reasoning;
66
- const thinkingBudget = reasoning
67
- ? (options?.thinkingBudgets?.[reasoning] ?? ANTHROPIC_THINKING[reasoning])
65
+ const reasoningEffort = reasoning === "off" ? undefined : reasoning;
66
+ const thinkingEnabled = !!reasoningEffort && model.reasoning;
67
+ const thinkingBudget = reasoningEffort
68
+ ? (options?.thinkingBudgets?.[reasoningEffort] ?? ANTHROPIC_THINKING[reasoningEffort])
68
69
  : undefined;
69
70
 
70
71
  const innerStream = streamAnthropic(anthropicModel, context, {
@@ -89,6 +90,7 @@ export function streamKimi(
89
90
  }
90
91
  } else {
91
92
  // OpenAI format - use original model with Kimi headers
93
+ const reasoningEffort = options?.reasoning === "off" ? undefined : options?.reasoning;
92
94
  const innerStream = streamOpenAICompletions(model, context, {
93
95
  apiKey: options?.apiKey,
94
96
  temperature: options?.temperature,
@@ -102,7 +104,7 @@ export function streamKimi(
102
104
  headers: mergedHeaders,
103
105
  sessionId: options?.sessionId,
104
106
  onPayload: options?.onPayload,
105
- reasoningEffort: options?.reasoning,
107
+ reasoning: reasoningEffort,
106
108
  });
107
109
 
108
110
  for await (const event of innerStream) {
@@ -49,7 +49,7 @@ import { parseCodexError } from "./openai-codex/response-handler";
49
49
  import { transformMessages } from "./transform-messages";
50
50
 
51
51
  export interface OpenAICodexResponsesOptions extends StreamOptions {
52
- reasoningEffort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh";
52
+ reasoning?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh";
53
53
  reasoningSummary?: "auto" | "concise" | "detailed" | null;
54
54
  textVerbosity?: "low" | "medium" | "high";
55
55
  include?: string[];
@@ -369,7 +369,7 @@ export const streamOpenAICodexResponses: StreamFunction<"openai-codex-responses"
369
369
  params.instructions = systemPrompt.instructions;
370
370
 
371
371
  const codexOptions: CodexRequestOptions = {
372
- reasoningEffort: options?.reasoningEffort,
372
+ reasoningEffort: options?.reasoning,
373
373
  reasoningSummary: options?.reasoningSummary ?? "auto",
374
374
  textVerbosity: options?.textVerbosity,
375
375
  include: options?.include,
@@ -109,7 +109,7 @@ function hasToolHistory(messages: Message[]): boolean {
109
109
 
110
110
  export interface OpenAICompletionsOptions extends StreamOptions {
111
111
  toolChoice?: ToolChoice;
112
- reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
112
+ reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
113
113
  }
114
114
 
115
115
  type OpenAICompletionsSamplingParams = OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming & {
@@ -611,13 +611,13 @@ function buildParams(model: Model<"openai-completions">, context: Context, optio
611
611
  if (compat.thinkingFormat === "zai" && model.reasoning) {
612
612
  // Z.ai uses binary thinking: { type: "enabled" | "disabled" }
613
613
  // Must explicitly disable since z.ai defaults to thinking enabled
614
- (params as any).thinking = { type: options?.reasoningEffort ? "enabled" : "disabled" };
614
+ (params as any).thinking = { type: options?.reasoning ? "enabled" : "disabled" };
615
615
  } else if (compat.thinkingFormat === "qwen" && model.reasoning) {
616
616
  // Qwen uses enable_thinking: boolean
617
- (params as any).enable_thinking = !!options?.reasoningEffort;
618
- } else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {
617
+ (params as any).enable_thinking = !!options?.reasoning;
618
+ } else if (options?.reasoning && model.reasoning && compat.supportsReasoningEffort) {
619
619
  // OpenAI-style reasoning_effort
620
- params.reasoning_effort = options.reasoningEffort;
620
+ params.reasoning_effort = options?.reasoning;
621
621
  }
622
622
 
623
623
  // OpenRouter provider routing preferences
@@ -57,7 +57,7 @@ function getPromptCacheRetention(baseUrl: string, cacheRetention: CacheRetention
57
57
 
58
58
  // OpenAI Responses-specific options
59
59
  export interface OpenAIResponsesOptions extends StreamOptions {
60
- reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
60
+ reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
61
61
  reasoningSummary?: "auto" | "detailed" | "concise" | null;
62
62
  serviceTier?: ResponseCreateParamsStreaming["service_tier"];
63
63
  toolChoice?: ToolChoice;
@@ -480,9 +480,9 @@ function buildParams(model: Model<"openai-responses">, context: Context, options
480
480
  // See: https://github.com/can1357/oh-my-pi/issues/41
481
481
  params.include = ["reasoning.encrypted_content"];
482
482
 
483
- if (options?.reasoningEffort || options?.reasoningSummary) {
483
+ if (options?.reasoning || options?.reasoningSummary) {
484
484
  params.reasoning = {
485
- effort: options?.reasoningEffort || "medium",
485
+ effort: options?.reasoning || "medium",
486
486
  summary: options?.reasoningSummary || "auto",
487
487
  };
488
488
  } else {
@@ -59,9 +59,10 @@ export function streamSynthetic(
59
59
 
60
60
  // Calculate thinking budget from reasoning level
61
61
  const reasoning = options?.reasoning;
62
- const thinkingEnabled = !!reasoning && model.reasoning;
63
- const thinkingBudget = reasoning
64
- ? (options?.thinkingBudgets?.[reasoning] ?? ANTHROPIC_THINKING[reasoning])
62
+ const reasoningEffort = reasoning === "off" ? undefined : reasoning;
63
+ const thinkingEnabled = !!reasoningEffort && model.reasoning;
64
+ const thinkingBudget = reasoningEffort
65
+ ? (options?.thinkingBudgets?.[reasoningEffort] ?? ANTHROPIC_THINKING[reasoningEffort])
65
66
  : undefined;
66
67
 
67
68
  const innerStream = streamAnthropic(anthropicModel, context, {
@@ -92,6 +93,7 @@ export function streamSynthetic(
92
93
  headers: mergedHeaders,
93
94
  };
94
95
 
96
+ const reasoningEffort = options?.reasoning === "off" ? undefined : options?.reasoning;
95
97
  const innerStream = streamOpenAICompletions(syntheticModel, context, {
96
98
  apiKey: options?.apiKey,
97
99
  temperature: options?.temperature,
@@ -105,7 +107,7 @@ export function streamSynthetic(
105
107
  headers: mergedHeaders,
106
108
  sessionId: options?.sessionId,
107
109
  onPayload: options?.onPayload,
108
- reasoningEffort: options?.reasoning,
110
+ reasoning: reasoningEffort,
109
111
  });
110
112
 
111
113
  for await (const event of innerStream) {
@@ -1,11 +1,6 @@
1
+ import turnAbortedGuidance from "../prompts/turn-aborted-guidance.md" with { type: "text" };
1
2
  import type { Api, AssistantMessage, DeveloperMessage, Message, Model, ToolCall, ToolResultMessage } from "../types";
2
3
 
3
- const TURN_ABORTED_GUIDANCE =
4
- "<turn-aborted>\n" +
5
- "The previous turn was aborted. Any running tools/commands were terminated. " +
6
- "If tools were aborted, they may have partially executed; verify current state before retrying.\n" +
7
- "</turn-aborted>";
8
-
9
4
  const enum ToolCallStatus {
10
5
  /** Tool call has received a result (real or synthetic for orphan) */
11
6
  Resolved = 1,
@@ -31,8 +26,9 @@ export function transformMessages<TApi extends Api>(
31
26
  // Build a map of original tool call IDs to normalized IDs
32
27
  const toolCallIdMap = new Map<string, string>();
33
28
 
29
+ const latestAssistantIndex = messages.findLastIndex(msg => msg.role === "assistant");
34
30
  // First pass: transform messages (thinking blocks, tool call ID normalization)
35
- const transformed = messages.map(msg => {
31
+ const transformed = messages.map((msg, index) => {
36
32
  // User and developer messages pass through unchanged
37
33
  if (msg.role === "user" || msg.role === "developer") {
38
34
  return msg;
@@ -55,8 +51,14 @@ export function transformMessages<TApi extends Api>(
55
51
  assistantMsg.api === model.api &&
56
52
  assistantMsg.model === model.id;
57
53
 
54
+ const mustPreserveLatestAnthropicThinking =
55
+ index === latestAssistantIndex &&
56
+ model.api === "anthropic-messages" &&
57
+ assistantMsg.api === "anthropic-messages";
58
+
58
59
  const transformedContent = assistantMsg.content.flatMap(block => {
59
60
  if (block.type === "thinking") {
61
+ if (mustPreserveLatestAnthropicThinking) return block;
60
62
  // For same model: keep thinking blocks with signatures (needed for replay)
61
63
  // even if the thinking text is empty (OpenAI encrypted reasoning)
62
64
  if (isSameModel && block.thinkingSignature) return block;
@@ -69,6 +71,12 @@ export function transformMessages<TApi extends Api>(
69
71
  };
70
72
  }
71
73
 
74
+ if (block.type === "redactedThinking") {
75
+ if (mustPreserveLatestAnthropicThinking) return block;
76
+ if (isSameModel) return block;
77
+ return [];
78
+ }
79
+
72
80
  if (block.type === "text") {
73
81
  if (isSameModel) return block;
74
82
  return {
@@ -163,7 +171,7 @@ export function transformMessages<TApi extends Api>(
163
171
  // Inject turn_aborted guidance marker as developer message
164
172
  result.push({
165
173
  role: "developer",
166
- content: TURN_ABORTED_GUIDANCE,
174
+ content: turnAbortedGuidance,
167
175
  timestamp: assistantMsg.timestamp + 1,
168
176
  } as DeveloperMessage);
169
177
 
package/src/stream.ts CHANGED
@@ -21,6 +21,7 @@ import { streamOpenAICodexResponses } from "./providers/openai-codex-responses";
21
21
  import { type OpenAICompletionsOptions, streamOpenAICompletions } from "./providers/openai-completions";
22
22
  import { streamOpenAIResponses } from "./providers/openai-responses";
23
23
  import { isSyntheticModel, streamSynthetic } from "./providers/synthetic";
24
+ import type { ThinkingEffort } from "./thinking";
24
25
  import type {
25
26
  Api,
26
27
  AssistantMessage,
@@ -31,7 +32,6 @@ import type {
31
32
  SimpleStreamOptions,
32
33
  StreamOptions,
33
34
  ThinkingBudgets,
34
- ThinkingLevel,
35
35
  ToolChoice,
36
36
  } from "./types";
37
37
 
@@ -301,9 +301,9 @@ export async function completeSimple<TApi extends Api>(
301
301
 
302
302
  const MIN_OUTPUT_TOKENS = 1024;
303
303
  export const OUTPUT_FALLBACK_BUFFER = 4000;
304
- const ANTHROPIC_USE_INTERLEAVED_THINKING = true;
304
+ const ANTHROPIC_USE_INTERLEAVED_THINKING = Bun.env.PI_NO_INTERLEAVED_THINKING !== "1";
305
305
 
306
- export const ANTHROPIC_THINKING: Record<ThinkingLevel, number> = {
306
+ export const ANTHROPIC_THINKING: Record<ThinkingEffort, number> = {
307
307
  minimal: 1024,
308
308
  low: 4096,
309
309
  medium: 8192,
@@ -311,7 +311,7 @@ export const ANTHROPIC_THINKING: Record<ThinkingLevel, number> = {
311
311
  xhigh: 32768,
312
312
  };
313
313
 
314
- const GOOGLE_THINKING: Record<ThinkingLevel, number> = {
314
+ const GOOGLE_THINKING: Record<ThinkingEffort, number> = {
315
315
  minimal: 1024,
316
316
  low: 4096,
317
317
  medium: 8192,
@@ -319,7 +319,7 @@ const GOOGLE_THINKING: Record<ThinkingLevel, number> = {
319
319
  xhigh: 24575,
320
320
  };
321
321
 
322
- const BEDROCK_CLAUDE_THINKING: Record<ThinkingLevel, number> = {
322
+ const BEDROCK_CLAUDE_THINKING: Record<ThinkingEffort, number> = {
323
323
  minimal: 1024,
324
324
  low: 2048,
325
325
  medium: 8192,
@@ -330,8 +330,8 @@ const BEDROCK_CLAUDE_THINKING: Record<ThinkingLevel, number> = {
330
330
  function resolveBedrockThinkingBudget(
331
331
  model: Model<"bedrock-converse-stream">,
332
332
  options?: SimpleStreamOptions,
333
- ): { budget: number; level: ThinkingLevel } | null {
334
- if (!options?.reasoning || !model.reasoning) return null;
333
+ ): { budget: number; level: ThinkingEffort } | null {
334
+ if (!options?.reasoning || !model.reasoning || options.reasoning === "off") return null;
335
335
  if (!model.id.includes("anthropic.claude")) return null;
336
336
  const level = options.reasoning === "xhigh" ? "high" : options.reasoning;
337
337
  const budget = options.thinkingBudgets?.[level] ?? BEDROCK_CLAUDE_THINKING[level];
@@ -358,7 +358,7 @@ export function mapAnthropicToolChoice(choice?: ToolChoice): AnthropicOptions["t
358
358
  /**
359
359
  * Map ThinkingLevel to Anthropic effort levels for adaptive thinking (Opus 4.6+)
360
360
  */
361
- function mapThinkingLevelToAnthropicEffort(level: ThinkingLevel): AnthropicOptions["effort"] {
361
+ function mapThinkingLevelToAnthropicEffort(level: ThinkingEffort, supportsXhigh: boolean): AnthropicOptions["effort"] {
362
362
  switch (level) {
363
363
  case "minimal":
364
364
  return "low";
@@ -369,7 +369,7 @@ function mapThinkingLevelToAnthropicEffort(level: ThinkingLevel): AnthropicOptio
369
369
  case "high":
370
370
  return "high";
371
371
  case "xhigh":
372
- return "max";
372
+ return supportsXhigh ? "max" : "high";
373
373
  default:
374
374
  return "high";
375
375
  }
@@ -404,6 +404,18 @@ function mapOpenAiToolChoice(choice?: ToolChoice): OpenAICompletionsOptions["too
404
404
  return undefined;
405
405
  }
406
406
 
407
+ function resolveOpenAiReasoningEffort<TApi extends Api>(
408
+ model: Model<TApi>,
409
+ options?: SimpleStreamOptions,
410
+ ): ThinkingEffort | undefined {
411
+ const reasoning = options?.reasoning;
412
+ if (!reasoning || reasoning === "off") return undefined;
413
+ if (reasoning === "xhigh" && !supportsXhigh(model)) return "high";
414
+ return reasoning;
415
+ }
416
+
417
+ const castApi = <TApi extends Api>(api: OptionsForApi<TApi>): OptionsForApi<Api> => api as OptionsForApi<Api>;
418
+
407
419
  function mapOptionsForApi<TApi extends Api>(
408
420
  model: Model<TApi>,
409
421
  options?: SimpleStreamOptions,
@@ -429,28 +441,25 @@ function mapOptionsForApi<TApi extends Api>(
429
441
  execHandlers: options?.execHandlers,
430
442
  };
431
443
 
432
- // Helper to clamp xhigh to high for providers that don't support it
433
- const clampReasoning = (effort: ThinkingLevel | undefined) => (effort === "xhigh" ? "high" : effort);
434
-
435
444
  switch (model.api) {
436
445
  case "anthropic-messages": {
437
446
  // Explicitly disable thinking when reasoning is not specified
438
447
  const reasoning = options?.reasoning;
439
- if (!reasoning) {
440
- return {
448
+ if (!reasoning || reasoning === "off") {
449
+ return castApi<"anthropic-messages">({
441
450
  ...base,
442
451
  thinkingEnabled: false,
443
452
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
444
- } as OptionsForApi<TApi>;
453
+ });
445
454
  }
446
455
 
447
456
  let thinkingBudget = options.thinkingBudgets?.[reasoning] ?? ANTHROPIC_THINKING[reasoning];
448
457
  if (thinkingBudget <= 0) {
449
- return {
458
+ return castApi<"anthropic-messages">({
450
459
  ...base,
451
460
  thinkingEnabled: false,
452
461
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
453
- } as OptionsForApi<TApi>;
462
+ });
454
463
  }
455
464
 
456
465
  // For Opus 4.6+ and Sonnet 4.6+: use adaptive thinking with effort level
@@ -462,24 +471,22 @@ function mapOptionsForApi<TApi extends Api>(
462
471
  model.id.includes("sonnet-4.6")
463
472
  ) {
464
473
  const supportsMaxEffort = model.id.includes("opus-4-6") || model.id.includes("opus-4.6");
465
- const effort = mapThinkingLevelToAnthropicEffort(
466
- supportsMaxEffort ? reasoning : (clampReasoning(reasoning) ?? reasoning),
467
- );
468
- return {
474
+ const effort = mapThinkingLevelToAnthropicEffort(reasoning, supportsMaxEffort);
475
+ return castApi<"anthropic-messages">({
469
476
  ...base,
470
477
  thinkingEnabled: true,
471
478
  effort,
472
479
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
473
- } as OptionsForApi<TApi>;
480
+ });
474
481
  }
475
482
 
476
483
  if (ANTHROPIC_USE_INTERLEAVED_THINKING) {
477
- return {
484
+ return castApi<"anthropic-messages">({
478
485
  ...base,
479
486
  thinkingEnabled: true,
480
487
  thinkingBudgetTokens: thinkingBudget,
481
488
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
482
- } as OptionsForApi<TApi>;
489
+ });
483
490
  }
484
491
 
485
492
  // Caller's maxTokens is the desired output; add thinking budget on top, capped at model limit
@@ -492,19 +499,19 @@ function mapOptionsForApi<TApi extends Api>(
492
499
 
493
500
  // If thinking budget is too low, disable thinking
494
501
  if (thinkingBudget <= 0) {
495
- return {
502
+ return castApi<"anthropic-messages">({
496
503
  ...base,
497
504
  thinkingEnabled: false,
498
505
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
499
- } as OptionsForApi<TApi>;
506
+ });
500
507
  } else {
501
- return {
508
+ return castApi<"anthropic-messages">({
502
509
  ...base,
503
510
  maxTokens,
504
511
  thinkingEnabled: true,
505
512
  thinkingBudgetTokens: thinkingBudget,
506
513
  toolChoice: mapAnthropicToolChoice(options?.toolChoice),
507
- } as OptionsForApi<TApi>;
514
+ });
508
515
  }
509
516
  }
510
517
 
@@ -529,96 +536,98 @@ function mapOptionsForApi<TApi extends Api>(
529
536
  const adjustedBudget = Math.max(0, maxTokens - MIN_OUTPUT_TOKENS);
530
537
  thinkingBudgets = { ...(thinkingBudgets ?? {}), [budgetInfo.level]: adjustedBudget };
531
538
  }
532
- return { ...bedrockBase, maxTokens, thinkingBudgets } as OptionsForApi<TApi>;
539
+ return castApi<"bedrock-converse-stream">({ ...bedrockBase, maxTokens, thinkingBudgets });
533
540
  }
534
541
 
535
542
  case "openai-completions":
536
- return {
543
+ return castApi<"openai-completions">({
537
544
  ...base,
538
- reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
545
+ reasoning: resolveOpenAiReasoningEffort(model, options),
539
546
  toolChoice: mapOpenAiToolChoice(options?.toolChoice),
540
- } as OptionsForApi<TApi>;
547
+ });
541
548
 
542
549
  case "openai-responses":
543
- return {
550
+ return castApi<"openai-responses">({
544
551
  ...base,
545
- reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
552
+ reasoning: resolveOpenAiReasoningEffort(model, options),
546
553
  toolChoice: mapOpenAiToolChoice(options?.toolChoice),
547
- } as OptionsForApi<TApi>;
554
+ });
548
555
 
549
556
  case "azure-openai-responses":
550
- return {
557
+ return castApi<"azure-openai-responses">({
551
558
  ...base,
552
- reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
559
+ reasoning: resolveOpenAiReasoningEffort(model, options),
553
560
  toolChoice: mapOpenAiToolChoice(options?.toolChoice),
554
- } as OptionsForApi<TApi>;
561
+ });
555
562
 
556
563
  case "openai-codex-responses":
557
- return {
564
+ return castApi<"openai-codex-responses">({
558
565
  ...base,
559
- reasoningEffort: supportsXhigh(model) ? options?.reasoning : clampReasoning(options?.reasoning),
566
+ reasoning: resolveOpenAiReasoningEffort(model, options),
560
567
  toolChoice: mapOpenAiToolChoice(options?.toolChoice),
561
568
  preferWebsockets: options?.preferWebsockets,
562
- } as OptionsForApi<TApi>;
569
+ });
563
570
 
564
571
  case "google-generative-ai": {
565
572
  // Explicitly disable thinking when reasoning is not specified
566
573
  // This is needed because Gemini has "dynamic thinking" enabled by default
567
- if (!options?.reasoning) {
568
- return {
574
+ const reasoning = options?.reasoning;
575
+ if (!reasoning || reasoning === "off") {
576
+ return castApi<"google-generative-ai">({
569
577
  ...base,
570
578
  thinking: { enabled: false },
571
579
  toolChoice: mapGoogleToolChoice(options?.toolChoice),
572
- } as OptionsForApi<TApi>;
580
+ });
573
581
  }
574
582
 
575
583
  const googleModel = model as Model<"google-generative-ai">;
576
- const effort = clampReasoning(options.reasoning)!;
584
+ const effort = reasoning === "xhigh" ? "high" : reasoning;
577
585
 
578
586
  // Gemini 3+ models use thinkingLevel exclusively instead of thinkingBudget.
579
587
  // https://ai.google.dev/gemini-api/docs/thinking#set-budget
580
588
  if (isGemini3ProModel(googleModel) || isGemini3FlashModel(googleModel)) {
581
- return {
589
+ return castApi<"google-generative-ai">({
582
590
  ...base,
583
591
  thinking: {
584
592
  enabled: true,
585
593
  level: getGemini3ThinkingLevel(effort, googleModel),
586
594
  },
587
595
  toolChoice: mapGoogleToolChoice(options?.toolChoice),
588
- } as OptionsForApi<TApi>;
596
+ });
589
597
  }
590
598
 
591
- return {
599
+ return castApi<"google-gemini-cli">({
592
600
  ...base,
593
601
  thinking: {
594
602
  enabled: true,
595
603
  budgetTokens: getGoogleBudget(googleModel, effort, options?.thinkingBudgets),
596
604
  },
597
605
  toolChoice: mapGoogleToolChoice(options?.toolChoice),
598
- } as OptionsForApi<TApi>;
606
+ });
599
607
  }
600
608
 
601
609
  case "google-gemini-cli": {
602
- if (!options?.reasoning) {
603
- return {
610
+ const reasoning = options?.reasoning;
611
+ if (!reasoning || reasoning === "off") {
612
+ return castApi<"google-gemini-cli">({
604
613
  ...base,
605
614
  thinking: { enabled: false },
606
615
  toolChoice: mapGoogleToolChoice(options?.toolChoice),
607
- } as OptionsForApi<TApi>;
616
+ });
608
617
  }
609
618
 
610
- const effort = clampReasoning(options.reasoning)!;
619
+ const effort = reasoning === "xhigh" ? "high" : reasoning;
611
620
 
612
621
  // Gemini 3+ models use thinkingLevel instead of thinkingBudget
613
622
  if (isGemini3ProModelId(model.id) || isGemini3FlashModelId(model.id)) {
614
- return {
623
+ return castApi<"google-vertex">({
615
624
  ...base,
616
625
  thinking: {
617
626
  enabled: true,
618
627
  level: getGeminiCliThinkingLevel(effort, model.id),
619
628
  },
620
629
  toolChoice: mapGoogleToolChoice(options?.toolChoice),
621
- } as OptionsForApi<TApi>;
630
+ });
622
631
  }
623
632
 
624
633
  let thinkingBudget = options.thinkingBudgets?.[effort] ?? GOOGLE_THINKING[effort];
@@ -633,64 +642,65 @@ function mapOptionsForApi<TApi extends Api>(
633
642
 
634
643
  // If thinking budget is too low, disable thinking
635
644
  if (thinkingBudget <= 0) {
636
- return {
645
+ return castApi<"google-gemini-cli">({
637
646
  ...base,
638
647
  thinking: { enabled: false },
639
648
  toolChoice: mapGoogleToolChoice(options?.toolChoice),
640
- } as OptionsForApi<TApi>;
649
+ });
641
650
  } else {
642
- return {
651
+ return castApi<"google-gemini-cli">({
643
652
  ...base,
644
653
  maxTokens,
645
654
  thinking: { enabled: true, budgetTokens: thinkingBudget },
646
655
  toolChoice: mapGoogleToolChoice(options?.toolChoice),
647
- } as OptionsForApi<TApi>;
656
+ });
648
657
  }
649
658
  }
650
659
 
651
660
  case "google-vertex": {
652
661
  // Explicitly disable thinking when reasoning is not specified
653
- if (!options?.reasoning) {
654
- return {
662
+ const reasoning = options?.reasoning;
663
+ if (!reasoning || reasoning === "off") {
664
+ return castApi<"google-vertex">({
655
665
  ...base,
656
666
  thinking: { enabled: false },
657
667
  toolChoice: mapGoogleToolChoice(options?.toolChoice),
658
- } as OptionsForApi<TApi>;
668
+ });
659
669
  }
660
670
 
661
671
  const vertexModel = model as Model<"google-vertex">;
662
- const effort = clampReasoning(options.reasoning)!;
672
+ const effort = reasoning === "xhigh" ? "high" : reasoning;
663
673
  const geminiModel = vertexModel as unknown as Model<"google-generative-ai">;
664
674
 
665
675
  if (isGemini3ProModel(geminiModel) || isGemini3FlashModel(geminiModel)) {
666
- return {
676
+ return castApi<"google-vertex">({
667
677
  ...base,
668
678
  thinking: {
669
679
  enabled: true,
670
680
  level: getGemini3ThinkingLevel(effort, geminiModel),
671
681
  },
672
682
  toolChoice: mapGoogleToolChoice(options?.toolChoice),
673
- } as OptionsForApi<TApi>;
683
+ });
674
684
  }
675
685
 
676
- return {
686
+ return castApi<"google-vertex">({
677
687
  ...base,
678
688
  thinking: {
679
689
  enabled: true,
680
690
  budgetTokens: getGoogleBudget(geminiModel, effort, options?.thinkingBudgets),
681
691
  },
682
692
  toolChoice: mapGoogleToolChoice(options?.toolChoice),
683
- } as OptionsForApi<TApi>;
693
+ });
684
694
  }
685
695
 
686
696
  case "cursor-agent": {
687
697
  const execHandlers = options?.cursorExecHandlers ?? options?.execHandlers;
688
698
  const onToolResult = options?.cursorOnToolResult ?? execHandlers?.onToolResult;
689
- return {
699
+ return castApi<"cursor-agent">({
690
700
  ...base,
691
701
  execHandlers,
692
702
  onToolResult,
693
- } as OptionsForApi<TApi>;
703
+ });
694
704
  }
695
705
 
696
706
  default:
@@ -698,8 +708,6 @@ function mapOptionsForApi<TApi extends Api>(
698
708
  }
699
709
  }
700
710
 
701
- type ClampedThinkingLevel = Exclude<ThinkingLevel, "xhigh">;
702
-
703
711
  function isGemini3ProModelId(modelId: string): boolean {
704
712
  return /3(?:\.\d+)?-pro/.test(modelId);
705
713
  }
@@ -718,18 +726,14 @@ function isGemini3FlashModel(model: Model<"google-generative-ai">): boolean {
718
726
  return isGemini3FlashModelId(model.id);
719
727
  }
720
728
 
721
- function getGemini3ThinkingLevel(
722
- effort: ClampedThinkingLevel,
723
- model: Model<"google-generative-ai">,
724
- ): GoogleThinkingLevel {
729
+ function getGemini3ThinkingLevel(effort: ThinkingEffort, model: Model<"google-generative-ai">): GoogleThinkingLevel {
725
730
  if (isGemini3ProModel(model)) {
726
731
  // Gemini 3 Pro only supports LOW/HIGH (for now)
727
732
  switch (effort) {
728
733
  case "minimal":
729
734
  case "low":
730
735
  return "LOW";
731
- case "medium":
732
- case "high":
736
+ default:
733
737
  return "HIGH";
734
738
  }
735
739
  }
@@ -741,20 +745,19 @@ function getGemini3ThinkingLevel(
741
745
  return "LOW";
742
746
  case "medium":
743
747
  return "MEDIUM";
744
- case "high":
748
+ default:
745
749
  return "HIGH";
746
750
  }
747
751
  }
748
752
 
749
- function getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string): GoogleThinkingLevel {
753
+ function getGeminiCliThinkingLevel(effort: ThinkingEffort, modelId: string): GoogleThinkingLevel {
750
754
  if (isGemini3ProModelId(modelId)) {
751
755
  // Gemini 3 Pro only supports LOW/HIGH (for now)
752
756
  switch (effort) {
753
757
  case "minimal":
754
758
  case "low":
755
759
  return "LOW";
756
- case "medium":
757
- case "high":
760
+ default:
758
761
  return "HIGH";
759
762
  }
760
763
  }
@@ -766,41 +769,35 @@ function getGeminiCliThinkingLevel(effort: ClampedThinkingLevel, modelId: string
766
769
  return "LOW";
767
770
  case "medium":
768
771
  return "MEDIUM";
769
- case "high":
772
+ default:
770
773
  return "HIGH";
771
774
  }
772
775
  }
773
776
 
774
777
  function getGoogleBudget(
775
778
  model: Model<"google-generative-ai">,
776
- effort: ClampedThinkingLevel,
779
+ effort: ThinkingEffort,
777
780
  customBudgets?: ThinkingBudgets,
778
781
  ): number {
782
+ effort = effort === "xhigh" ? "high" : effort;
783
+
779
784
  // Custom budgets take precedence if provided for this level
780
785
  if (customBudgets?.[effort] !== undefined) {
781
786
  return customBudgets[effort]!;
782
787
  }
783
788
 
784
789
  // See https://ai.google.dev/gemini-api/docs/thinking#set-budget
785
- if (model.id.includes("2.5-pro")) {
786
- const budgets: Record<ClampedThinkingLevel, number> = {
787
- minimal: 128,
788
- low: 2048,
789
- medium: 8192,
790
- high: 32768,
791
- };
792
- return budgets[effort];
793
- }
794
-
795
- if (model.id.includes("2.5-flash")) {
796
- // Covers 2.5-flash-lite as well
797
- const budgets: Record<ClampedThinkingLevel, number> = {
798
- minimal: 128,
799
- low: 2048,
800
- medium: 8192,
801
- high: 24576,
802
- };
803
- return budgets[effort];
790
+ if (model.id.includes("2.5-")) {
791
+ switch (effort) {
792
+ case "minimal":
793
+ return 128;
794
+ case "low":
795
+ return 2048;
796
+ case "medium":
797
+ return 8192;
798
+ default:
799
+ return model.id.includes("2.5-flash") ? 24576 : 32768;
800
+ }
804
801
  }
805
802
 
806
803
  // Unknown model - use dynamic
@@ -0,0 +1,85 @@
1
+ /** Provider-level thinking levels (no "off"), ordered least to most. */
2
+ export type ThinkingEffort = "minimal" | "low" | "medium" | "high" | "xhigh";
3
+
4
+ /**
5
+ * ThinkingLevel extended with "off" to disable reasoning entirely.
6
+ * Used in UI, config, session state, and CLI args.
7
+ * "off" is never sent to providers — callers strip it before streaming.
8
+ */
9
+ export type ThinkingLevel = ThinkingEffort | "off";
10
+
11
+ /**
12
+ * ThinkingSelector extended with "inherit" to indicate the role should
13
+ * use the session-level default rather than an explicit choice.
14
+ * Used in per-role model assignment UI.
15
+ */
16
+ export type ThinkingMode = ThinkingLevel | "inherit";
17
+
18
+ /** Metadata for a thinking mode. */
19
+ export type ThinkingMetadata = {
20
+ /** The value of the thinking mode. */
21
+ value: ThinkingMode;
22
+ /** The label to display for the thinking mode. */
23
+ label: string;
24
+ /** The description to display for the thinking mode. */
25
+ description: string;
26
+ };
27
+
28
+ const THINKING_META: Record<ThinkingMode, ThinkingMetadata> = {
29
+ inherit: { value: "inherit", label: "inherit", description: "Inherit session default" },
30
+ off: { value: "off", label: "off", description: "No reasoning" },
31
+ minimal: { value: "minimal", label: "min", description: "Very brief reasoning (~1k tokens)" },
32
+ low: { value: "low", label: "low", description: "Light reasoning (~2k tokens)" },
33
+ medium: { value: "medium", label: "medium", description: "Moderate reasoning (~8k tokens)" },
34
+ high: { value: "high", label: "high", description: "Deep reasoning (~16k tokens)" },
35
+ xhigh: { value: "xhigh", label: "xhigh", description: "Maximum reasoning (~32k tokens)" },
36
+ };
37
+
38
+ const F_LEVEL = 3;
39
+ const F_SEL = 2;
40
+ const F_MODE = 1;
41
+
42
+ const F_THINKING: Record<string, number> = {
43
+ inherit: F_MODE,
44
+ off: F_SEL,
45
+ minimal: F_LEVEL,
46
+ low: F_LEVEL,
47
+ medium: F_LEVEL,
48
+ high: F_LEVEL,
49
+ xhigh: F_LEVEL,
50
+ };
51
+
52
+ // Parses an unknown value and returns a ThinkingLevel if valid, otherwise undefined.
53
+ export function parseThinkingEffort(level: string | null | undefined): ThinkingEffort | undefined {
54
+ return level && (F_THINKING[level] ?? 0) >= F_LEVEL ? (level as ThinkingEffort) : undefined;
55
+ }
56
+
57
+ // Parses an unknown value and returns a ThinkingSelector if valid, otherwise undefined.
58
+ export function parseThinkingLevel(level: string | null | undefined): ThinkingLevel | undefined {
59
+ return level && (F_THINKING[level] ?? 0) >= F_SEL ? (level as ThinkingLevel) : undefined;
60
+ }
61
+
62
+ // Parses an unknown value and returns a ThinkingMode if valid, otherwise undefined.
63
+ export function parseThinkingMode(level: string | null | undefined): ThinkingMode | undefined {
64
+ return level && (F_THINKING[level] ?? 0) >= F_MODE ? (level as ThinkingMode) : undefined;
65
+ }
66
+
67
+ /** Get the information for a thinking mode. */
68
+ export function getThinkingMetadata(mode: ThinkingMode): ThinkingMetadata {
69
+ return THINKING_META[mode];
70
+ }
71
+
72
+ const REG_LVL: readonly ThinkingLevel[] = ["off", "minimal", "low", "medium", "high"];
73
+ const XHI_LVL: readonly ThinkingLevel[] = ["off", "minimal", "low", "medium", "high", "xhigh"];
74
+
75
+ /** Returns the available thinking modes for a model based on whether it supports xhigh. */
76
+ export function getAvailableThinkingLevels(hasXhigh: boolean = true): ReadonlyArray<ThinkingLevel> {
77
+ return hasXhigh ? XHI_LVL : REG_LVL;
78
+ }
79
+
80
+ const REG_EFF: readonly ThinkingEffort[] = ["minimal", "low", "medium", "high"];
81
+ const XHI_EFF: readonly ThinkingEffort[] = ["minimal", "low", "medium", "high", "xhigh"];
82
+
83
+ export function getAvailableThinkingEfforts(hasXhigh: boolean = true): ReadonlyArray<ThinkingEffort> {
84
+ return hasXhigh ? XHI_EFF : REG_EFF;
85
+ }
package/src/types.ts CHANGED
@@ -109,10 +109,10 @@ export type KnownProvider =
109
109
  | "lm-studio";
110
110
  export type Provider = KnownProvider | string;
111
111
 
112
- export type ThinkingLevel = "minimal" | "low" | "medium" | "high" | "xhigh";
112
+ import type { ThinkingEffort, ThinkingLevel } from "./thinking";
113
113
 
114
114
  /** Token budgets for each thinking level (token-based providers only) */
115
- export type ThinkingBudgets = { [key in ThinkingLevel]?: number };
115
+ export type ThinkingBudgets = { [key in ThinkingEffort]?: number };
116
116
 
117
117
  export type MessageAttribution = "user" | "agent";
118
118
 
@@ -224,6 +224,11 @@ export interface ThinkingContent {
224
224
  thinkingSignature?: string; // e.g., for OpenAI responses, the reasoning item ID
225
225
  }
226
226
 
227
+ export interface RedactedThinkingContent {
228
+ type: "redactedThinking";
229
+ data: string;
230
+ }
231
+
227
232
  export interface ImageContent {
228
233
  type: "image";
229
234
  data: string; // base64 encoded image data
@@ -277,7 +282,7 @@ export interface DeveloperMessage {
277
282
 
278
283
  export interface AssistantMessage {
279
284
  role: "assistant";
280
- content: (TextContent | ThinkingContent | ToolCall)[];
285
+ content: (TextContent | ThinkingContent | RedactedThinkingContent | ToolCall)[];
281
286
  api: Api;
282
287
  provider: Provider;
283
288
  model: string;