@oh-my-pi/pi-ai 14.5.14 → 14.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,34 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [14.6.0] - 2026-05-02
6
+
7
+ ### Added
8
+
9
+ - Added `disableReasoning` to stream and OpenAI completion options to force reasoning off for models that support it, sending `reasoning: { enabled: false }` for OpenRouter-compatible requests
10
+ - Added `thinkingDisplay` option to Anthropic options to control whether adaptive and explicit reasoning is returned as `summarized` or `omitted`
11
+ - Added Anthropic model compatibility flags `supportsEagerToolInputStreaming` and `supportsLongCacheRetention` for API-capability-specific request behavior
12
+
13
+ ### Changed
14
+
15
+ - Changed Anthropic request payloads to send `thinking: { type: "disabled" }` when `thinkingEnabled` is explicitly `false` on reasoning-enabled models
16
+ - Changed Anthropic cache retention handling so `cacheRetention: "long"` now uses `ttl: "1h"` only for canonical Anthropic endpoints with long-cache support
17
+ - Changed Anthropic tool schema generation to include `eager_input_streaming` only on models that advertise support
18
+ - Changed Anthropic OAuth login flow to include browser fallback guidance and richer error context when token exchange or refresh fails
19
+
20
+ ### Fixed
21
+
22
+ - Fixed Anthropic non-thinking requests to include the caller-provided `temperature` value in request payloads
23
+ - Fixed Anthropic `claude-opus-4-7` non-thinking payloads to omit sampling fields (`temperature`, `top_p`, and `top_k`)
24
+ - Fixed OpenAI Codex base URL normalization so configured base URLs with or without `/codex` or `/codex/responses` now resolve to `/codex/responses`
25
+ - Fixed OpenAI Codex websocket handling to parse JSON from non-string message payloads including `ArrayBuffer`, typed arrays, and `Blob` values
26
+ - Fixed OpenAI Codex websocket handshakes to replace stale `openai-beta` values with the websocket beta and avoid sending request-body headers over websocket transport
27
+ - Fixed abort tracking so caller-initiated cancellations are treated as user aborts even after local watchdog timeouts, preventing unintended automatic retries
28
+ - Fixed Anthropic stream handling to parse raw SSE envelopes directly, ignore unrelated events, and repair malformed JSON in SSE payloads
29
+ - Fixed Anthropic streaming to emit an explicit error when the SSE stream ends without a `message_stop` event
30
+ - Fixed OpenAI Codex websocket continuations to send true `previous_response_id` deltas for `store: false` transcripts, expose request stats, and default text verbosity to `low` unless explicitly overridden.
31
+ - Fixed OpenAI Codex websocket append reuse after `response.completed` terminal events.
32
+
5
33
  ## [14.5.14] - 2026-05-01
6
34
  ### Added
7
35
 
@@ -2208,4 +2236,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
2208
2236
 
2209
2237
  ## [0.9.4] - 2025-11-26
2210
2238
 
2211
- Initial release with multi-provider LLM support.
2239
+ Initial release with multi-provider LLM support.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "@oh-my-pi/pi-ai",
4
- "version": "14.5.14",
4
+ "version": "14.6.0",
5
5
  "description": "Unified LLM API with automatic model discovery and provider configuration",
6
6
  "homepage": "https://github.com/can1357/oh-my-pi",
7
7
  "author": "Can Boluk",
@@ -46,8 +46,8 @@
46
46
  "@aws-sdk/credential-provider-node": "^3.972.36",
47
47
  "@bufbuild/protobuf": "^2.12.0",
48
48
  "@google/genai": "^1.50.1",
49
- "@oh-my-pi/pi-natives": "14.5.14",
50
- "@oh-my-pi/pi-utils": "14.5.14",
49
+ "@oh-my-pi/pi-natives": "14.6.0",
50
+ "@oh-my-pi/pi-utils": "14.6.0",
51
51
  "@sinclair/typebox": "^0.34.49",
52
52
  "@smithy/node-http-handler": "^4.6.1",
53
53
  "ajv": "^8.20.0",
package/src/index.ts CHANGED
@@ -18,7 +18,7 @@ export * from "./providers/google-gemini-headers";
18
18
  export type * from "./providers/google-vertex";
19
19
  export * from "./providers/kimi";
20
20
  export * from "./providers/ollama";
21
- export type { OpenAICodexResponsesOptions } from "./providers/openai-codex-responses";
21
+ export * from "./providers/openai-codex-responses";
22
22
  export * from "./providers/openai-completions";
23
23
  export * from "./providers/openai-responses";
24
24
  export * from "./providers/synthetic";
@@ -6,6 +6,7 @@ import type {
6
6
  ContentBlockParam,
7
7
  MessageCreateParamsStreaming,
8
8
  MessageParam,
9
+ RawMessageStreamEvent,
9
10
  } from "@anthropic-ai/sdk/resources/messages";
10
11
  import { $env, abortableSleep, isEnoent } from "@oh-my-pi/pi-utils";
11
12
  import { hasOpus47ApiRestrictions, mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
@@ -38,7 +39,7 @@ import { AssistantMessageEventStream } from "../utils/event-stream";
38
39
  import { isFoundryEnabled } from "../utils/foundry";
39
40
  import { finalizeErrorMessage, type RawHttpRequestDump, rewriteCopilotError } from "../utils/http-inspector";
40
41
  import { createWatchdog, getStreamFirstEventTimeoutMs } from "../utils/idle-iterator";
41
- import { parseStreamingJson } from "../utils/json-parse";
42
+ import { parseJsonWithRepair, parseStreamingJson } from "../utils/json-parse";
42
43
  import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
43
44
  import { notifyProviderResponse } from "../utils/provider-response";
44
45
  import { extractHttpStatusFromError, isCopilotRetryableError, isUnexpectedSocketCloseMessage } from "../utils/retry";
@@ -57,6 +58,7 @@ export type AnthropicHeaderOptions = {
57
58
  extraBetas?: string[];
58
59
  stream?: boolean;
59
60
  modelHeaders?: Record<string, string>;
61
+ isCloudflareAiGateway?: boolean;
60
62
  };
61
63
 
62
64
  export function normalizeAnthropicBaseUrl(baseUrl?: string): string | undefined {
@@ -88,6 +90,9 @@ const claudeCodeBetaDefaults = [
88
90
  "context-management-2025-06-27",
89
91
  "prompt-caching-scope-2026-01-05",
90
92
  ];
93
+ const fineGrainedToolStreamingBeta = "fine-grained-tool-streaming-2025-05-14";
94
+ const interleavedThinkingBeta = "interleaved-thinking-2025-05-14";
95
+
91
96
  function getHeaderCaseInsensitive(headers: Record<string, string> | undefined, headerName: string): string | undefined {
92
97
  if (!headers) return undefined;
93
98
  const normalizedName = headerName.toLowerCase();
@@ -131,6 +136,16 @@ export function buildAnthropicHeaders(options: AnthropicHeaderOptions): Record<s
131
136
  Object.entries(options.modelHeaders ?? {}).filter(([key]) => !enforcedHeaderKeys.has(key.toLowerCase())),
132
137
  );
133
138
 
139
+ if (options.isCloudflareAiGateway) {
140
+ return {
141
+ ...modelHeaders,
142
+ Accept: acceptHeader,
143
+ ...sharedHeaders,
144
+ "Anthropic-Beta": betaHeader,
145
+ "cf-aig-authorization": `Bearer ${options.apiKey}`,
146
+ };
147
+ }
148
+
134
149
  if (oauthToken) {
135
150
  const incomingUserAgent = getHeaderCaseInsensitive(options.modelHeaders, "User-Agent");
136
151
  const userAgent = isClaudeCodeClientUserAgent(incomingUserAgent)
@@ -235,6 +250,7 @@ function dropAnthropicStrictTools(params: MessageCreateParamsStreaming): void {
235
250
  }
236
251
 
237
252
  function getCacheControl(
253
+ model: Model<"anthropic-messages">,
238
254
  baseUrl: string,
239
255
  cacheRetention?: CacheRetention,
240
256
  ): { retention: CacheRetention; cacheControl?: AnthropicCacheControl } {
@@ -242,7 +258,10 @@ function getCacheControl(
242
258
  if (retention === "none") {
243
259
  return { retention };
244
260
  }
245
- const ttl = retention === "long" && baseUrl.includes("api.anthropic.com") ? "1h" : undefined;
261
+ const ttl =
262
+ retention === "long" && isAnthropicApiBaseUrl(baseUrl) && getAnthropicCompat(model).supportsLongCacheRetention
263
+ ? "1h"
264
+ : undefined;
246
265
  return {
247
266
  retention,
248
267
  cacheControl: { type: "ephemeral", ...(ttl && { ttl }) },
@@ -312,6 +331,7 @@ const enforcedHeaderKeys = new Set(
312
331
  "X-App",
313
332
  "Authorization",
314
333
  "X-Api-Key",
334
+ "cf-aig-authorization",
315
335
  ].map(key => key.toLowerCase()),
316
336
  );
317
337
 
@@ -424,6 +444,7 @@ function convertContentBlocks(content: (TextContent | ImageContent)[]):
424
444
  }
425
445
 
426
446
  export type AnthropicEffort = "low" | "medium" | "high" | "xhigh" | "max";
447
+ export type AnthropicThinkingDisplay = "summarized" | "omitted";
427
448
 
428
449
  export interface AnthropicOptions extends StreamOptions {
429
450
  /**
@@ -452,6 +473,12 @@ export interface AnthropicOptions extends StreamOptions {
452
473
  * Converted to adaptive effort when effort is not explicitly provided.
453
474
  */
454
475
  reasoning?: SimpleStreamOptions["reasoning"];
476
+ /**
477
+ * Controls how Anthropic returns thinking content when the selected thinking
478
+ * transport supports a display option. Defaults to "summarized" where the
479
+ * API accepts it.
480
+ */
481
+ thinkingDisplay?: AnthropicThinkingDisplay;
455
482
  interleavedThinking?: boolean;
456
483
  toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
457
484
  betas?: string[] | string;
@@ -474,12 +501,13 @@ export type AnthropicClientOptionsArgs = {
474
501
  headers?: Record<string, string>;
475
502
  dynamicHeaders?: Record<string, string>;
476
503
  isOAuth?: boolean;
504
+ hasTools?: boolean;
477
505
  };
478
506
 
479
507
  export type AnthropicClientOptionsResult = {
480
508
  isOAuthToken: boolean;
481
509
  apiKey: string | null;
482
- authToken?: string;
510
+ authToken?: string | null;
483
511
  baseURL?: string;
484
512
  maxRetries: number;
485
513
  dangerouslyAllowBrowser: boolean;
@@ -624,6 +652,248 @@ function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]):
624
652
  // We surface the resulting provider error ourselves, so keep the SDK quiet.
625
653
  const ANTHROPIC_SDK_LOG_LEVEL = "off" as const;
626
654
 
655
+ interface ServerSentEvent {
656
+ event: string | null;
657
+ data: string;
658
+ raw: string[];
659
+ }
660
+
661
+ interface SseDecoderState {
662
+ event: string | null;
663
+ data: string[];
664
+ raw: string[];
665
+ }
666
+
667
+ const ANTHROPIC_MESSAGE_EVENTS: ReadonlySet<string> = new Set([
668
+ "message_start",
669
+ "message_delta",
670
+ "message_stop",
671
+ "content_block_start",
672
+ "content_block_delta",
673
+ "content_block_stop",
674
+ ]);
675
+
676
+ function flushSseEvent(state: SseDecoderState): ServerSentEvent | null {
677
+ if (!state.event && state.data.length === 0) {
678
+ return null;
679
+ }
680
+
681
+ const event: ServerSentEvent = {
682
+ event: state.event,
683
+ data: state.data.join("\n"),
684
+ raw: [...state.raw],
685
+ };
686
+ state.event = null;
687
+ state.data = [];
688
+ state.raw = [];
689
+ return event;
690
+ }
691
+
692
+ function decodeSseLine(line: string, state: SseDecoderState): ServerSentEvent | null {
693
+ if (line === "") {
694
+ return flushSseEvent(state);
695
+ }
696
+
697
+ state.raw.push(line);
698
+ if (line.startsWith(":")) {
699
+ return null;
700
+ }
701
+
702
+ const delimiterIndex = line.indexOf(":");
703
+ const fieldName = delimiterIndex === -1 ? line : line.slice(0, delimiterIndex);
704
+ let value = delimiterIndex === -1 ? "" : line.slice(delimiterIndex + 1);
705
+ if (value.startsWith(" ")) {
706
+ value = value.slice(1);
707
+ }
708
+
709
+ if (fieldName === "event") {
710
+ state.event = value;
711
+ } else if (fieldName === "data") {
712
+ state.data.push(value);
713
+ }
714
+
715
+ return null;
716
+ }
717
+
718
+ function nextLineBreakIndex(text: string): number {
719
+ const carriageReturnIndex = text.indexOf("\r");
720
+ const newlineIndex = text.indexOf("\n");
721
+ if (carriageReturnIndex === -1) {
722
+ return newlineIndex;
723
+ }
724
+ if (newlineIndex === -1) {
725
+ return carriageReturnIndex;
726
+ }
727
+ return Math.min(carriageReturnIndex, newlineIndex);
728
+ }
729
+
730
+ function consumeLine(text: string): { line: string; rest: string } | null {
731
+ const lineBreakIndex = nextLineBreakIndex(text);
732
+ if (lineBreakIndex === -1) {
733
+ return null;
734
+ }
735
+
736
+ let nextIndex = lineBreakIndex + 1;
737
+ if (text[lineBreakIndex] === "\r" && text[nextIndex] === "\n") {
738
+ nextIndex += 1;
739
+ }
740
+
741
+ return {
742
+ line: text.slice(0, lineBreakIndex),
743
+ rest: text.slice(nextIndex),
744
+ };
745
+ }
746
+
747
+ async function* iterateSseMessages(
748
+ body: ReadableStream<Uint8Array>,
749
+ signal?: AbortSignal,
750
+ ): AsyncGenerator<ServerSentEvent> {
751
+ const reader = body.getReader();
752
+ const decoder = new TextDecoder();
753
+ const state: SseDecoderState = { event: null, data: [], raw: [] };
754
+ let buffer = "";
755
+
756
+ try {
757
+ while (true) {
758
+ if (signal?.aborted) {
759
+ throw new Error("Request was aborted");
760
+ }
761
+
762
+ const { value, done } = await reader.read();
763
+ if (done) {
764
+ break;
765
+ }
766
+
767
+ buffer += decoder.decode(value, { stream: true });
768
+ let consumed = consumeLine(buffer);
769
+ while (consumed) {
770
+ buffer = consumed.rest;
771
+ const event = decodeSseLine(consumed.line, state);
772
+ if (event) {
773
+ yield event;
774
+ }
775
+ consumed = consumeLine(buffer);
776
+ }
777
+ }
778
+
779
+ buffer += decoder.decode();
780
+ let consumed = consumeLine(buffer);
781
+ while (consumed) {
782
+ buffer = consumed.rest;
783
+ const event = decodeSseLine(consumed.line, state);
784
+ if (event) {
785
+ yield event;
786
+ }
787
+ consumed = consumeLine(buffer);
788
+ }
789
+
790
+ if (buffer.length > 0) {
791
+ const event = decodeSseLine(buffer, state);
792
+ if (event) {
793
+ yield event;
794
+ }
795
+ }
796
+
797
+ const trailingEvent = flushSseEvent(state);
798
+ if (trailingEvent) {
799
+ yield trailingEvent;
800
+ }
801
+ } finally {
802
+ reader.releaseLock();
803
+ }
804
+ }
805
+
806
+ async function* iterateAnthropicEvents(
807
+ response: Response,
808
+ signal?: AbortSignal,
809
+ ): AsyncGenerator<RawMessageStreamEvent> {
810
+ if (!response.body) {
811
+ throw new Error("Attempted to iterate over an Anthropic response with no body");
812
+ }
813
+
814
+ let sawMessageStart = false;
815
+ let sawMessageEnd = false;
816
+
817
+ for await (const sse of iterateSseMessages(response.body, signal)) {
818
+ if (sse.event === "error") {
819
+ throw new Error(sse.data);
820
+ }
821
+
822
+ if (!ANTHROPIC_MESSAGE_EVENTS.has(sse.event ?? "")) {
823
+ continue;
824
+ }
825
+
826
+ try {
827
+ const event = parseJsonWithRepair<RawMessageStreamEvent>(sse.data);
828
+ if (event.type === "message_start") {
829
+ sawMessageStart = true;
830
+ } else if (event.type === "message_stop") {
831
+ sawMessageEnd = true;
832
+ }
833
+ yield event;
834
+ } catch (error) {
835
+ const message = error instanceof Error ? error.message : String(error);
836
+ throw new Error(
837
+ `Could not parse Anthropic SSE event ${sse.event}: ${message}; data=${sse.data}; raw=${sse.raw.join("\\n")}`,
838
+ );
839
+ }
840
+ }
841
+
842
+ if (sawMessageStart && !sawMessageEnd) {
843
+ throw createAnthropicStreamEnvelopeError("stream ended before message_stop");
844
+ }
845
+ }
846
+
847
+ type AnthropicRawResponseRequest = {
848
+ asResponse(): Promise<Response>;
849
+ };
850
+
851
+ function hasAnthropicRawResponseRequest(request: unknown): request is AnthropicRawResponseRequest {
852
+ return isRecord(request) && typeof request.asResponse === "function";
853
+ }
854
+
855
+ type AnthropicStreamWithResponseRequest = {
856
+ withResponse(): Promise<{
857
+ data: AsyncIterable<RawMessageStreamEvent>;
858
+ response: Response;
859
+ request_id: string | null;
860
+ }>;
861
+ };
862
+
863
+ function hasAnthropicStreamWithResponseRequest(request: unknown): request is AnthropicStreamWithResponseRequest {
864
+ return isRecord(request) && typeof request.withResponse === "function";
865
+ }
866
+
867
+ async function getAnthropicStreamResponse(
868
+ request: unknown,
869
+ signal?: AbortSignal,
870
+ ): Promise<{ events: AsyncIterable<RawMessageStreamEvent>; response: Response; requestId: string | null }> {
871
+ if (hasAnthropicRawResponseRequest(request)) {
872
+ const response = await request.asResponse();
873
+ return {
874
+ events: iterateAnthropicEvents(response, signal),
875
+ response,
876
+ requestId: response.headers.get("request-id"),
877
+ };
878
+ }
879
+ if (hasAnthropicStreamWithResponseRequest(request)) {
880
+ const { data, response, request_id } = await request.withResponse();
881
+ return { events: data, response, requestId: request_id };
882
+ }
883
+ throw new Error("Anthropic SDK request did not expose a stream response");
884
+ }
885
+
886
+ function getAnthropicCompat(
887
+ model: Model<"anthropic-messages">,
888
+ ): Required<NonNullable<Model<"anthropic-messages">["compat"]>> {
889
+ return {
890
+ disableStrictTools: model.compat?.disableStrictTools ?? false,
891
+ disableAdaptiveThinking: model.compat?.disableAdaptiveThinking ?? false,
892
+ supportsEagerToolInputStreaming: model.compat?.supportsEagerToolInputStreaming ?? true,
893
+ supportsLongCacheRetention: model.compat?.supportsLongCacheRetention ?? true,
894
+ };
895
+ }
896
+
627
897
  const PROVIDER_MAX_RETRIES = 3;
628
898
  const PROVIDER_BASE_DELAY_MS = 2000;
629
899
 
@@ -789,6 +1059,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
789
1059
  headers: options?.headers,
790
1060
  dynamicHeaders: copilotDynamicHeaders?.headers,
791
1061
  isOAuth: options?.isOAuth,
1062
+ hasTools: !!context.tools?.length,
792
1063
  });
793
1064
  client = created.client;
794
1065
  isOAuthToken = created.isOAuthToken;
@@ -844,8 +1115,12 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
844
1115
  let streamedReplayUnsafeContent = false;
845
1116
 
846
1117
  try {
847
- const { data: anthropicStream, response, request_id } = await anthropicRequest.withResponse();
848
- await notifyProviderResponse(options, response, model, request_id);
1118
+ const {
1119
+ events: anthropicStream,
1120
+ response,
1121
+ requestId,
1122
+ } = await getAnthropicStreamResponse(anthropicRequest, requestSignal);
1123
+ await notifyProviderResponse(options, response, model, requestId);
849
1124
  const firstEventWatchdog = createWatchdog(firstEventTimeoutMs, () =>
850
1125
  activeAbortTracker.abortLocally(firstEventTimeoutAbortError),
851
1126
  );
@@ -1199,9 +1474,12 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
1199
1474
  interleavedThinking = true,
1200
1475
  headers,
1201
1476
  dynamicHeaders,
1477
+ hasTools = false,
1202
1478
  isOAuth,
1203
1479
  } = args;
1480
+ const compat = getAnthropicCompat(model);
1204
1481
  const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinkingDisplay(model.id);
1482
+ const needsFineGrainedToolStreamingBeta = hasTools && !compat.supportsEagerToolInputStreaming;
1205
1483
  const oauthToken = isOAuth ?? isAnthropicOAuthToken(apiKey);
1206
1484
  const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
1207
1485
  const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
@@ -1209,6 +1487,9 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
1209
1487
  if (model.provider === "github-copilot") {
1210
1488
  const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
1211
1489
  const betaFeatures = [...extraBetas];
1490
+ if (needsFineGrainedToolStreamingBeta) {
1491
+ betaFeatures.push(fineGrainedToolStreamingBeta);
1492
+ }
1212
1493
  const defaultHeaders = mergeHeaders(
1213
1494
  {
1214
1495
  Accept: stream ? "text/event-stream" : "application/json",
@@ -1235,8 +1516,11 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
1235
1516
  }
1236
1517
 
1237
1518
  const betaFeatures = [...extraBetas];
1519
+ if (needsFineGrainedToolStreamingBeta) {
1520
+ betaFeatures.push(fineGrainedToolStreamingBeta);
1521
+ }
1238
1522
  if (needsInterleavedBeta) {
1239
- betaFeatures.push("interleaved-thinking-2025-05-14");
1523
+ betaFeatures.push(interleavedThinkingBeta);
1240
1524
  }
1241
1525
 
1242
1526
  const defaultHeaders = buildAnthropicHeaders({
@@ -1246,8 +1530,22 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
1246
1530
  extraBetas: betaFeatures,
1247
1531
  stream,
1248
1532
  modelHeaders: mergeHeaders(model.headers, foundryCustomHeaders, headers, dynamicHeaders),
1533
+ isCloudflareAiGateway: model.provider === "cloudflare-ai-gateway",
1249
1534
  });
1250
1535
 
1536
+ if (model.provider === "cloudflare-ai-gateway") {
1537
+ return {
1538
+ isOAuthToken: false,
1539
+ apiKey: null,
1540
+ authToken: null,
1541
+ baseURL: baseUrl,
1542
+ maxRetries: 5,
1543
+ dangerouslyAllowBrowser: true,
1544
+ defaultHeaders,
1545
+ logLevel: ANTHROPIC_SDK_LOG_LEVEL,
1546
+ };
1547
+ }
1548
+
1251
1549
  return {
1252
1550
  isOAuthToken: oauthToken,
1253
1551
  apiKey: oauthToken ? null : apiKey,
@@ -1532,13 +1830,16 @@ function buildParams(
1532
1830
  options?: AnthropicOptions,
1533
1831
  disableStrictTools = false,
1534
1832
  ): MessageCreateParamsStreaming {
1535
- const { cacheControl } = getCacheControl(baseUrl, options?.cacheRetention);
1833
+ const { cacheControl } = getCacheControl(model, baseUrl, options?.cacheRetention);
1536
1834
  const params: AnthropicSamplingParams = {
1537
1835
  model: model.id,
1538
1836
  messages: convertAnthropicMessages(context.messages, model, isOAuthToken),
1539
1837
  max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
1540
1838
  stream: true,
1541
1839
  };
1840
+ if (options?.temperature !== undefined && !options?.thinkingEnabled) {
1841
+ params.temperature = options.temperature;
1842
+ }
1542
1843
 
1543
1844
  if (options?.topP !== undefined) {
1544
1845
  params.top_p = options.topP;
@@ -1551,6 +1852,7 @@ function buildParams(
1551
1852
  if (hasOpus47ApiRestrictions(model.id)) {
1552
1853
  delete params.top_p;
1553
1854
  delete params.top_k;
1855
+ delete params.temperature;
1554
1856
  }
1555
1857
 
1556
1858
  if (context.tools) {
@@ -1558,38 +1860,45 @@ function buildParams(
1558
1860
  context.tools,
1559
1861
  isOAuthToken,
1560
1862
  disableStrictTools || model.provider === "github-copilot",
1863
+ getAnthropicCompat(model).supportsEagerToolInputStreaming,
1561
1864
  );
1562
1865
  }
1563
1866
 
1564
- if (options?.thinkingEnabled && model.reasoning) {
1565
- const mode = model.thinking?.mode;
1566
- const requestedEffort = options.reasoning;
1567
- const effort =
1568
- options.effort ?? (requestedEffort ? mapEffortToAnthropicAdaptiveEffort(model, requestedEffort) : undefined);
1569
-
1570
- const disableAdaptiveThinking = model.compat?.disableAdaptiveThinking ?? false;
1571
- if (mode === "anthropic-adaptive" && !disableAdaptiveThinking) {
1572
- // Starting with Claude Opus 4.7, adaptive thinking content is omitted from the
1573
- // response by default. Opt into summarized reasoning so thinking deltas keep
1574
- // streaming with human-readable content for callers that rely on it.
1575
- const adaptive: { type: "adaptive"; display?: "summarized" | "omitted" } = { type: "adaptive" };
1576
- if (supportsAdaptiveThinkingDisplay(model.id)) {
1577
- adaptive.display = "summarized";
1578
- }
1579
- params.thinking = adaptive as typeof params.thinking;
1580
- if (effort) {
1581
- // SDK's OutputConfig.effort type is not yet widened to include the new "xhigh"
1582
- // level introduced with Claude Opus 4.7. Cast until the SDK catches up.
1583
- params.output_config = { effort } as typeof params.output_config;
1584
- }
1585
- } else {
1586
- params.thinking = {
1587
- type: "enabled",
1588
- budget_tokens: options.thinkingBudgetTokens || 1024,
1589
- };
1590
- if (mode === "anthropic-budget-effort" && effort) {
1591
- params.output_config = { effort } as typeof params.output_config;
1867
+ if (model.reasoning) {
1868
+ if (options?.thinkingEnabled) {
1869
+ const mode = model.thinking?.mode;
1870
+ const requestedEffort = options.reasoning;
1871
+ const effort =
1872
+ options.effort ??
1873
+ (requestedEffort ? mapEffortToAnthropicAdaptiveEffort(model, requestedEffort) : undefined);
1874
+
1875
+ const compat = getAnthropicCompat(model);
1876
+ if (mode === "anthropic-adaptive" && !compat.disableAdaptiveThinking) {
1877
+ // Starting with Claude Opus 4.7, adaptive thinking content is omitted from the
1878
+ // response by default. Opt into summarized reasoning so thinking deltas keep
1879
+ // streaming with human-readable content for callers that rely on it.
1880
+ const adaptive: { type: "adaptive"; display?: AnthropicThinkingDisplay } = { type: "adaptive" };
1881
+ if (supportsAdaptiveThinkingDisplay(model.id)) {
1882
+ adaptive.display = options.thinkingDisplay ?? "summarized";
1883
+ }
1884
+ params.thinking = adaptive as typeof params.thinking;
1885
+ if (effort) {
1886
+ // SDK's OutputConfig.effort type is not yet widened to include the new "xhigh"
1887
+ // level introduced with Claude Opus 4.7. Cast until the SDK catches up.
1888
+ params.output_config = { effort } as typeof params.output_config;
1889
+ }
1890
+ } else {
1891
+ params.thinking = {
1892
+ type: "enabled",
1893
+ budget_tokens: options.thinkingBudgetTokens || 1024,
1894
+ display: options.thinkingDisplay ?? "summarized",
1895
+ } as typeof params.thinking;
1896
+ if (mode === "anthropic-budget-effort" && effort) {
1897
+ params.output_config = { effort } as typeof params.output_config;
1898
+ }
1592
1899
  }
1900
+ } else if (options?.thinkingEnabled === false) {
1901
+ params.thinking = { type: "disabled" };
1593
1902
  }
1594
1903
  }
1595
1904
 
@@ -2108,7 +2417,12 @@ function buildAnthropicToolSchemaPlans(tools: Tool[], disableStrictTools = false
2108
2417
  return plans;
2109
2418
  }
2110
2419
 
2111
- function convertTools(tools: Tool[], isOAuthToken: boolean, disableStrictTools = false): Anthropic.Messages.Tool[] {
2420
+ function convertTools(
2421
+ tools: Tool[],
2422
+ isOAuthToken: boolean,
2423
+ disableStrictTools = false,
2424
+ supportsEagerToolInputStreaming = true,
2425
+ ): Anthropic.Messages.Tool[] {
2112
2426
  if (!tools) return [];
2113
2427
  const schemaPlans = buildAnthropicToolSchemaPlans(tools, disableStrictTools);
2114
2428
 
@@ -2118,6 +2432,7 @@ function convertTools(tools: Tool[], isOAuthToken: boolean, disableStrictTools =
2118
2432
  name: isOAuthToken ? applyClaudeToolPrefix(tool.name) : tool.name,
2119
2433
  description: tool.description || "",
2120
2434
  input_schema: plan.inputSchema,
2435
+ ...(supportsEagerToolInputStreaming ? { eager_input_streaming: true } : {}),
2121
2436
  ...(plan.strict ? { strict: true } : {}),
2122
2437
  };
2123
2438
  });
@@ -144,7 +144,7 @@ export async function transformRequestBody(
144
144
 
145
145
  body.text = {
146
146
  ...body.text,
147
- verbosity: options.textVerbosity || "medium",
147
+ verbosity: options.textVerbosity || "low",
148
148
  };
149
149
 
150
150
  const include = Array.isArray(options.include) ? [...options.include] : [];