@oh-my-pi/pi-ai 14.5.14 → 14.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -1
- package/package.json +3 -3
- package/src/index.ts +1 -1
- package/src/providers/anthropic.ts +351 -36
- package/src/providers/openai-codex/request-transformer.ts +1 -1
- package/src/providers/openai-codex-responses.ts +182 -57
- package/src/providers/openai-completions.ts +45 -29
- package/src/stream.ts +1 -0
- package/src/types.ts +13 -0
- package/src/utils/abort.ts +18 -3
- package/src/utils/json-parse.ts +127 -7
- package/src/utils/oauth/anthropic.ts +78 -40
- package/src/providers/antigravity-user-agent.ts +0 -19
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,34 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [14.6.0] - 2026-05-02
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
|
|
9
|
+
- Added `disableReasoning` to stream and OpenAI completion options to force reasoning off for models that support it, sending `reasoning: { enabled: false }` for OpenRouter-compatible requests
|
|
10
|
+
- Added `thinkingDisplay` option to Anthropic options to control whether adaptive and explicit reasoning is returned as `summarized` or `omitted`
|
|
11
|
+
- Added Anthropic model compatibility flags `supportsEagerToolInputStreaming` and `supportsLongCacheRetention` for API-capability-specific request behavior
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
- Changed Anthropic request payloads to send `thinking: { type: "disabled" }` when `thinkingEnabled` is explicitly `false` on reasoning-enabled models
|
|
16
|
+
- Changed Anthropic cache retention handling so `cacheRetention: "long"` now uses `ttl: "1h"` only for canonical Anthropic endpoints with long-cache support
|
|
17
|
+
- Changed Anthropic tool schema generation to include `eager_input_streaming` only on models that advertise support
|
|
18
|
+
- Changed Anthropic OAuth login flow to include browser fallback guidance and richer error context when token exchange or refresh fails
|
|
19
|
+
|
|
20
|
+
### Fixed
|
|
21
|
+
|
|
22
|
+
- Fixed Anthropic non-thinking requests to include the caller-provided `temperature` value in request payloads
|
|
23
|
+
- Fixed Anthropic `claude-opus-4-7` non-thinking payloads to omit sampling fields (`temperature`, `top_p`, and `top_k`)
|
|
24
|
+
- Fixed OpenAI Codex base URL normalization so configured base URLs with or without `/codex` or `/codex/responses` now resolve to `/codex/responses`
|
|
25
|
+
- Fixed OpenAI Codex websocket handling to parse JSON from non-string message payloads including `ArrayBuffer`, typed arrays, and `Blob` values
|
|
26
|
+
- Fixed OpenAI Codex websocket handshakes to replace stale `openai-beta` values with the websocket beta and avoid sending request-body headers over websocket transport
|
|
27
|
+
- Fixed abort tracking so caller-initiated cancellations are treated as user aborts even after local watchdog timeouts, preventing unintended automatic retries
|
|
28
|
+
- Fixed Anthropic stream handling to parse raw SSE envelopes directly, ignore unrelated events, and repair malformed JSON in SSE payloads
|
|
29
|
+
- Fixed Anthropic streaming to emit an explicit error when the SSE stream ends without a `message_stop` event
|
|
30
|
+
- Fixed OpenAI Codex websocket continuations to send true `previous_response_id` deltas for `store: false` transcripts, expose request stats, and default text verbosity to `low` unless explicitly overridden.
|
|
31
|
+
- Fixed OpenAI Codex websocket append reuse after `response.completed` terminal events.
|
|
32
|
+
|
|
5
33
|
## [14.5.14] - 2026-05-01
|
|
6
34
|
### Added
|
|
7
35
|
|
|
@@ -2208,4 +2236,4 @@ _Dedicated to Peter's shoulder ([@steipete](https://twitter.com/steipete))_
|
|
|
2208
2236
|
|
|
2209
2237
|
## [0.9.4] - 2025-11-26
|
|
2210
2238
|
|
|
2211
|
-
Initial release with multi-provider LLM support.
|
|
2239
|
+
Initial release with multi-provider LLM support.
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-ai",
|
|
4
|
-
"version": "14.
|
|
4
|
+
"version": "14.6.0",
|
|
5
5
|
"description": "Unified LLM API with automatic model discovery and provider configuration",
|
|
6
6
|
"homepage": "https://github.com/can1357/oh-my-pi",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -46,8 +46,8 @@
|
|
|
46
46
|
"@aws-sdk/credential-provider-node": "^3.972.36",
|
|
47
47
|
"@bufbuild/protobuf": "^2.12.0",
|
|
48
48
|
"@google/genai": "^1.50.1",
|
|
49
|
-
"@oh-my-pi/pi-natives": "14.
|
|
50
|
-
"@oh-my-pi/pi-utils": "14.
|
|
49
|
+
"@oh-my-pi/pi-natives": "14.6.0",
|
|
50
|
+
"@oh-my-pi/pi-utils": "14.6.0",
|
|
51
51
|
"@sinclair/typebox": "^0.34.49",
|
|
52
52
|
"@smithy/node-http-handler": "^4.6.1",
|
|
53
53
|
"ajv": "^8.20.0",
|
package/src/index.ts
CHANGED
|
@@ -18,7 +18,7 @@ export * from "./providers/google-gemini-headers";
|
|
|
18
18
|
export type * from "./providers/google-vertex";
|
|
19
19
|
export * from "./providers/kimi";
|
|
20
20
|
export * from "./providers/ollama";
|
|
21
|
-
export
|
|
21
|
+
export * from "./providers/openai-codex-responses";
|
|
22
22
|
export * from "./providers/openai-completions";
|
|
23
23
|
export * from "./providers/openai-responses";
|
|
24
24
|
export * from "./providers/synthetic";
|
|
@@ -6,6 +6,7 @@ import type {
|
|
|
6
6
|
ContentBlockParam,
|
|
7
7
|
MessageCreateParamsStreaming,
|
|
8
8
|
MessageParam,
|
|
9
|
+
RawMessageStreamEvent,
|
|
9
10
|
} from "@anthropic-ai/sdk/resources/messages";
|
|
10
11
|
import { $env, abortableSleep, isEnoent } from "@oh-my-pi/pi-utils";
|
|
11
12
|
import { hasOpus47ApiRestrictions, mapEffortToAnthropicAdaptiveEffort } from "../model-thinking";
|
|
@@ -38,7 +39,7 @@ import { AssistantMessageEventStream } from "../utils/event-stream";
|
|
|
38
39
|
import { isFoundryEnabled } from "../utils/foundry";
|
|
39
40
|
import { finalizeErrorMessage, type RawHttpRequestDump, rewriteCopilotError } from "../utils/http-inspector";
|
|
40
41
|
import { createWatchdog, getStreamFirstEventTimeoutMs } from "../utils/idle-iterator";
|
|
41
|
-
import { parseStreamingJson } from "../utils/json-parse";
|
|
42
|
+
import { parseJsonWithRepair, parseStreamingJson } from "../utils/json-parse";
|
|
42
43
|
import { parseGitHubCopilotApiKey } from "../utils/oauth/github-copilot";
|
|
43
44
|
import { notifyProviderResponse } from "../utils/provider-response";
|
|
44
45
|
import { extractHttpStatusFromError, isCopilotRetryableError, isUnexpectedSocketCloseMessage } from "../utils/retry";
|
|
@@ -57,6 +58,7 @@ export type AnthropicHeaderOptions = {
|
|
|
57
58
|
extraBetas?: string[];
|
|
58
59
|
stream?: boolean;
|
|
59
60
|
modelHeaders?: Record<string, string>;
|
|
61
|
+
isCloudflareAiGateway?: boolean;
|
|
60
62
|
};
|
|
61
63
|
|
|
62
64
|
export function normalizeAnthropicBaseUrl(baseUrl?: string): string | undefined {
|
|
@@ -88,6 +90,9 @@ const claudeCodeBetaDefaults = [
|
|
|
88
90
|
"context-management-2025-06-27",
|
|
89
91
|
"prompt-caching-scope-2026-01-05",
|
|
90
92
|
];
|
|
93
|
+
const fineGrainedToolStreamingBeta = "fine-grained-tool-streaming-2025-05-14";
|
|
94
|
+
const interleavedThinkingBeta = "interleaved-thinking-2025-05-14";
|
|
95
|
+
|
|
91
96
|
function getHeaderCaseInsensitive(headers: Record<string, string> | undefined, headerName: string): string | undefined {
|
|
92
97
|
if (!headers) return undefined;
|
|
93
98
|
const normalizedName = headerName.toLowerCase();
|
|
@@ -131,6 +136,16 @@ export function buildAnthropicHeaders(options: AnthropicHeaderOptions): Record<s
|
|
|
131
136
|
Object.entries(options.modelHeaders ?? {}).filter(([key]) => !enforcedHeaderKeys.has(key.toLowerCase())),
|
|
132
137
|
);
|
|
133
138
|
|
|
139
|
+
if (options.isCloudflareAiGateway) {
|
|
140
|
+
return {
|
|
141
|
+
...modelHeaders,
|
|
142
|
+
Accept: acceptHeader,
|
|
143
|
+
...sharedHeaders,
|
|
144
|
+
"Anthropic-Beta": betaHeader,
|
|
145
|
+
"cf-aig-authorization": `Bearer ${options.apiKey}`,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
|
|
134
149
|
if (oauthToken) {
|
|
135
150
|
const incomingUserAgent = getHeaderCaseInsensitive(options.modelHeaders, "User-Agent");
|
|
136
151
|
const userAgent = isClaudeCodeClientUserAgent(incomingUserAgent)
|
|
@@ -235,6 +250,7 @@ function dropAnthropicStrictTools(params: MessageCreateParamsStreaming): void {
|
|
|
235
250
|
}
|
|
236
251
|
|
|
237
252
|
function getCacheControl(
|
|
253
|
+
model: Model<"anthropic-messages">,
|
|
238
254
|
baseUrl: string,
|
|
239
255
|
cacheRetention?: CacheRetention,
|
|
240
256
|
): { retention: CacheRetention; cacheControl?: AnthropicCacheControl } {
|
|
@@ -242,7 +258,10 @@ function getCacheControl(
|
|
|
242
258
|
if (retention === "none") {
|
|
243
259
|
return { retention };
|
|
244
260
|
}
|
|
245
|
-
const ttl =
|
|
261
|
+
const ttl =
|
|
262
|
+
retention === "long" && isAnthropicApiBaseUrl(baseUrl) && getAnthropicCompat(model).supportsLongCacheRetention
|
|
263
|
+
? "1h"
|
|
264
|
+
: undefined;
|
|
246
265
|
return {
|
|
247
266
|
retention,
|
|
248
267
|
cacheControl: { type: "ephemeral", ...(ttl && { ttl }) },
|
|
@@ -312,6 +331,7 @@ const enforcedHeaderKeys = new Set(
|
|
|
312
331
|
"X-App",
|
|
313
332
|
"Authorization",
|
|
314
333
|
"X-Api-Key",
|
|
334
|
+
"cf-aig-authorization",
|
|
315
335
|
].map(key => key.toLowerCase()),
|
|
316
336
|
);
|
|
317
337
|
|
|
@@ -424,6 +444,7 @@ function convertContentBlocks(content: (TextContent | ImageContent)[]):
|
|
|
424
444
|
}
|
|
425
445
|
|
|
426
446
|
export type AnthropicEffort = "low" | "medium" | "high" | "xhigh" | "max";
|
|
447
|
+
export type AnthropicThinkingDisplay = "summarized" | "omitted";
|
|
427
448
|
|
|
428
449
|
export interface AnthropicOptions extends StreamOptions {
|
|
429
450
|
/**
|
|
@@ -452,6 +473,12 @@ export interface AnthropicOptions extends StreamOptions {
|
|
|
452
473
|
* Converted to adaptive effort when effort is not explicitly provided.
|
|
453
474
|
*/
|
|
454
475
|
reasoning?: SimpleStreamOptions["reasoning"];
|
|
476
|
+
/**
|
|
477
|
+
* Controls how Anthropic returns thinking content when the selected thinking
|
|
478
|
+
* transport supports a display option. Defaults to "summarized" where the
|
|
479
|
+
* API accepts it.
|
|
480
|
+
*/
|
|
481
|
+
thinkingDisplay?: AnthropicThinkingDisplay;
|
|
455
482
|
interleavedThinking?: boolean;
|
|
456
483
|
toolChoice?: "auto" | "any" | "none" | { type: "tool"; name: string };
|
|
457
484
|
betas?: string[] | string;
|
|
@@ -474,12 +501,13 @@ export type AnthropicClientOptionsArgs = {
|
|
|
474
501
|
headers?: Record<string, string>;
|
|
475
502
|
dynamicHeaders?: Record<string, string>;
|
|
476
503
|
isOAuth?: boolean;
|
|
504
|
+
hasTools?: boolean;
|
|
477
505
|
};
|
|
478
506
|
|
|
479
507
|
export type AnthropicClientOptionsResult = {
|
|
480
508
|
isOAuthToken: boolean;
|
|
481
509
|
apiKey: string | null;
|
|
482
|
-
authToken?: string;
|
|
510
|
+
authToken?: string | null;
|
|
483
511
|
baseURL?: string;
|
|
484
512
|
maxRetries: number;
|
|
485
513
|
dangerouslyAllowBrowser: boolean;
|
|
@@ -624,6 +652,248 @@ function mergeHeaders(...headerSources: (Record<string, string> | undefined)[]):
|
|
|
624
652
|
// We surface the resulting provider error ourselves, so keep the SDK quiet.
|
|
625
653
|
const ANTHROPIC_SDK_LOG_LEVEL = "off" as const;
|
|
626
654
|
|
|
655
|
+
interface ServerSentEvent {
|
|
656
|
+
event: string | null;
|
|
657
|
+
data: string;
|
|
658
|
+
raw: string[];
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
interface SseDecoderState {
|
|
662
|
+
event: string | null;
|
|
663
|
+
data: string[];
|
|
664
|
+
raw: string[];
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
const ANTHROPIC_MESSAGE_EVENTS: ReadonlySet<string> = new Set([
|
|
668
|
+
"message_start",
|
|
669
|
+
"message_delta",
|
|
670
|
+
"message_stop",
|
|
671
|
+
"content_block_start",
|
|
672
|
+
"content_block_delta",
|
|
673
|
+
"content_block_stop",
|
|
674
|
+
]);
|
|
675
|
+
|
|
676
|
+
function flushSseEvent(state: SseDecoderState): ServerSentEvent | null {
|
|
677
|
+
if (!state.event && state.data.length === 0) {
|
|
678
|
+
return null;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
const event: ServerSentEvent = {
|
|
682
|
+
event: state.event,
|
|
683
|
+
data: state.data.join("\n"),
|
|
684
|
+
raw: [...state.raw],
|
|
685
|
+
};
|
|
686
|
+
state.event = null;
|
|
687
|
+
state.data = [];
|
|
688
|
+
state.raw = [];
|
|
689
|
+
return event;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
function decodeSseLine(line: string, state: SseDecoderState): ServerSentEvent | null {
|
|
693
|
+
if (line === "") {
|
|
694
|
+
return flushSseEvent(state);
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
state.raw.push(line);
|
|
698
|
+
if (line.startsWith(":")) {
|
|
699
|
+
return null;
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
const delimiterIndex = line.indexOf(":");
|
|
703
|
+
const fieldName = delimiterIndex === -1 ? line : line.slice(0, delimiterIndex);
|
|
704
|
+
let value = delimiterIndex === -1 ? "" : line.slice(delimiterIndex + 1);
|
|
705
|
+
if (value.startsWith(" ")) {
|
|
706
|
+
value = value.slice(1);
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
if (fieldName === "event") {
|
|
710
|
+
state.event = value;
|
|
711
|
+
} else if (fieldName === "data") {
|
|
712
|
+
state.data.push(value);
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
return null;
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
function nextLineBreakIndex(text: string): number {
|
|
719
|
+
const carriageReturnIndex = text.indexOf("\r");
|
|
720
|
+
const newlineIndex = text.indexOf("\n");
|
|
721
|
+
if (carriageReturnIndex === -1) {
|
|
722
|
+
return newlineIndex;
|
|
723
|
+
}
|
|
724
|
+
if (newlineIndex === -1) {
|
|
725
|
+
return carriageReturnIndex;
|
|
726
|
+
}
|
|
727
|
+
return Math.min(carriageReturnIndex, newlineIndex);
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
function consumeLine(text: string): { line: string; rest: string } | null {
|
|
731
|
+
const lineBreakIndex = nextLineBreakIndex(text);
|
|
732
|
+
if (lineBreakIndex === -1) {
|
|
733
|
+
return null;
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
let nextIndex = lineBreakIndex + 1;
|
|
737
|
+
if (text[lineBreakIndex] === "\r" && text[nextIndex] === "\n") {
|
|
738
|
+
nextIndex += 1;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
return {
|
|
742
|
+
line: text.slice(0, lineBreakIndex),
|
|
743
|
+
rest: text.slice(nextIndex),
|
|
744
|
+
};
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
async function* iterateSseMessages(
|
|
748
|
+
body: ReadableStream<Uint8Array>,
|
|
749
|
+
signal?: AbortSignal,
|
|
750
|
+
): AsyncGenerator<ServerSentEvent> {
|
|
751
|
+
const reader = body.getReader();
|
|
752
|
+
const decoder = new TextDecoder();
|
|
753
|
+
const state: SseDecoderState = { event: null, data: [], raw: [] };
|
|
754
|
+
let buffer = "";
|
|
755
|
+
|
|
756
|
+
try {
|
|
757
|
+
while (true) {
|
|
758
|
+
if (signal?.aborted) {
|
|
759
|
+
throw new Error("Request was aborted");
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
const { value, done } = await reader.read();
|
|
763
|
+
if (done) {
|
|
764
|
+
break;
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
buffer += decoder.decode(value, { stream: true });
|
|
768
|
+
let consumed = consumeLine(buffer);
|
|
769
|
+
while (consumed) {
|
|
770
|
+
buffer = consumed.rest;
|
|
771
|
+
const event = decodeSseLine(consumed.line, state);
|
|
772
|
+
if (event) {
|
|
773
|
+
yield event;
|
|
774
|
+
}
|
|
775
|
+
consumed = consumeLine(buffer);
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
buffer += decoder.decode();
|
|
780
|
+
let consumed = consumeLine(buffer);
|
|
781
|
+
while (consumed) {
|
|
782
|
+
buffer = consumed.rest;
|
|
783
|
+
const event = decodeSseLine(consumed.line, state);
|
|
784
|
+
if (event) {
|
|
785
|
+
yield event;
|
|
786
|
+
}
|
|
787
|
+
consumed = consumeLine(buffer);
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
if (buffer.length > 0) {
|
|
791
|
+
const event = decodeSseLine(buffer, state);
|
|
792
|
+
if (event) {
|
|
793
|
+
yield event;
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
const trailingEvent = flushSseEvent(state);
|
|
798
|
+
if (trailingEvent) {
|
|
799
|
+
yield trailingEvent;
|
|
800
|
+
}
|
|
801
|
+
} finally {
|
|
802
|
+
reader.releaseLock();
|
|
803
|
+
}
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
async function* iterateAnthropicEvents(
|
|
807
|
+
response: Response,
|
|
808
|
+
signal?: AbortSignal,
|
|
809
|
+
): AsyncGenerator<RawMessageStreamEvent> {
|
|
810
|
+
if (!response.body) {
|
|
811
|
+
throw new Error("Attempted to iterate over an Anthropic response with no body");
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
let sawMessageStart = false;
|
|
815
|
+
let sawMessageEnd = false;
|
|
816
|
+
|
|
817
|
+
for await (const sse of iterateSseMessages(response.body, signal)) {
|
|
818
|
+
if (sse.event === "error") {
|
|
819
|
+
throw new Error(sse.data);
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
if (!ANTHROPIC_MESSAGE_EVENTS.has(sse.event ?? "")) {
|
|
823
|
+
continue;
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
try {
|
|
827
|
+
const event = parseJsonWithRepair<RawMessageStreamEvent>(sse.data);
|
|
828
|
+
if (event.type === "message_start") {
|
|
829
|
+
sawMessageStart = true;
|
|
830
|
+
} else if (event.type === "message_stop") {
|
|
831
|
+
sawMessageEnd = true;
|
|
832
|
+
}
|
|
833
|
+
yield event;
|
|
834
|
+
} catch (error) {
|
|
835
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
836
|
+
throw new Error(
|
|
837
|
+
`Could not parse Anthropic SSE event ${sse.event}: ${message}; data=${sse.data}; raw=${sse.raw.join("\\n")}`,
|
|
838
|
+
);
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
if (sawMessageStart && !sawMessageEnd) {
|
|
843
|
+
throw createAnthropicStreamEnvelopeError("stream ended before message_stop");
|
|
844
|
+
}
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
type AnthropicRawResponseRequest = {
|
|
848
|
+
asResponse(): Promise<Response>;
|
|
849
|
+
};
|
|
850
|
+
|
|
851
|
+
function hasAnthropicRawResponseRequest(request: unknown): request is AnthropicRawResponseRequest {
|
|
852
|
+
return isRecord(request) && typeof request.asResponse === "function";
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
type AnthropicStreamWithResponseRequest = {
|
|
856
|
+
withResponse(): Promise<{
|
|
857
|
+
data: AsyncIterable<RawMessageStreamEvent>;
|
|
858
|
+
response: Response;
|
|
859
|
+
request_id: string | null;
|
|
860
|
+
}>;
|
|
861
|
+
};
|
|
862
|
+
|
|
863
|
+
function hasAnthropicStreamWithResponseRequest(request: unknown): request is AnthropicStreamWithResponseRequest {
|
|
864
|
+
return isRecord(request) && typeof request.withResponse === "function";
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
async function getAnthropicStreamResponse(
|
|
868
|
+
request: unknown,
|
|
869
|
+
signal?: AbortSignal,
|
|
870
|
+
): Promise<{ events: AsyncIterable<RawMessageStreamEvent>; response: Response; requestId: string | null }> {
|
|
871
|
+
if (hasAnthropicRawResponseRequest(request)) {
|
|
872
|
+
const response = await request.asResponse();
|
|
873
|
+
return {
|
|
874
|
+
events: iterateAnthropicEvents(response, signal),
|
|
875
|
+
response,
|
|
876
|
+
requestId: response.headers.get("request-id"),
|
|
877
|
+
};
|
|
878
|
+
}
|
|
879
|
+
if (hasAnthropicStreamWithResponseRequest(request)) {
|
|
880
|
+
const { data, response, request_id } = await request.withResponse();
|
|
881
|
+
return { events: data, response, requestId: request_id };
|
|
882
|
+
}
|
|
883
|
+
throw new Error("Anthropic SDK request did not expose a stream response");
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
function getAnthropicCompat(
|
|
887
|
+
model: Model<"anthropic-messages">,
|
|
888
|
+
): Required<NonNullable<Model<"anthropic-messages">["compat"]>> {
|
|
889
|
+
return {
|
|
890
|
+
disableStrictTools: model.compat?.disableStrictTools ?? false,
|
|
891
|
+
disableAdaptiveThinking: model.compat?.disableAdaptiveThinking ?? false,
|
|
892
|
+
supportsEagerToolInputStreaming: model.compat?.supportsEagerToolInputStreaming ?? true,
|
|
893
|
+
supportsLongCacheRetention: model.compat?.supportsLongCacheRetention ?? true,
|
|
894
|
+
};
|
|
895
|
+
}
|
|
896
|
+
|
|
627
897
|
const PROVIDER_MAX_RETRIES = 3;
|
|
628
898
|
const PROVIDER_BASE_DELAY_MS = 2000;
|
|
629
899
|
|
|
@@ -789,6 +1059,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
789
1059
|
headers: options?.headers,
|
|
790
1060
|
dynamicHeaders: copilotDynamicHeaders?.headers,
|
|
791
1061
|
isOAuth: options?.isOAuth,
|
|
1062
|
+
hasTools: !!context.tools?.length,
|
|
792
1063
|
});
|
|
793
1064
|
client = created.client;
|
|
794
1065
|
isOAuthToken = created.isOAuthToken;
|
|
@@ -844,8 +1115,12 @@ export const streamAnthropic: StreamFunction<"anthropic-messages"> = (
|
|
|
844
1115
|
let streamedReplayUnsafeContent = false;
|
|
845
1116
|
|
|
846
1117
|
try {
|
|
847
|
-
const {
|
|
848
|
-
|
|
1118
|
+
const {
|
|
1119
|
+
events: anthropicStream,
|
|
1120
|
+
response,
|
|
1121
|
+
requestId,
|
|
1122
|
+
} = await getAnthropicStreamResponse(anthropicRequest, requestSignal);
|
|
1123
|
+
await notifyProviderResponse(options, response, model, requestId);
|
|
849
1124
|
const firstEventWatchdog = createWatchdog(firstEventTimeoutMs, () =>
|
|
850
1125
|
activeAbortTracker.abortLocally(firstEventTimeoutAbortError),
|
|
851
1126
|
);
|
|
@@ -1199,9 +1474,12 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
1199
1474
|
interleavedThinking = true,
|
|
1200
1475
|
headers,
|
|
1201
1476
|
dynamicHeaders,
|
|
1477
|
+
hasTools = false,
|
|
1202
1478
|
isOAuth,
|
|
1203
1479
|
} = args;
|
|
1480
|
+
const compat = getAnthropicCompat(model);
|
|
1204
1481
|
const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinkingDisplay(model.id);
|
|
1482
|
+
const needsFineGrainedToolStreamingBeta = hasTools && !compat.supportsEagerToolInputStreaming;
|
|
1205
1483
|
const oauthToken = isOAuth ?? isAnthropicOAuthToken(apiKey);
|
|
1206
1484
|
const baseUrl = resolveAnthropicBaseUrl(model, apiKey);
|
|
1207
1485
|
const foundryCustomHeaders = resolveAnthropicCustomHeaders(model);
|
|
@@ -1209,6 +1487,9 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
1209
1487
|
if (model.provider === "github-copilot") {
|
|
1210
1488
|
const copilotApiKey = parseGitHubCopilotApiKey(apiKey).accessToken;
|
|
1211
1489
|
const betaFeatures = [...extraBetas];
|
|
1490
|
+
if (needsFineGrainedToolStreamingBeta) {
|
|
1491
|
+
betaFeatures.push(fineGrainedToolStreamingBeta);
|
|
1492
|
+
}
|
|
1212
1493
|
const defaultHeaders = mergeHeaders(
|
|
1213
1494
|
{
|
|
1214
1495
|
Accept: stream ? "text/event-stream" : "application/json",
|
|
@@ -1235,8 +1516,11 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
1235
1516
|
}
|
|
1236
1517
|
|
|
1237
1518
|
const betaFeatures = [...extraBetas];
|
|
1519
|
+
if (needsFineGrainedToolStreamingBeta) {
|
|
1520
|
+
betaFeatures.push(fineGrainedToolStreamingBeta);
|
|
1521
|
+
}
|
|
1238
1522
|
if (needsInterleavedBeta) {
|
|
1239
|
-
betaFeatures.push(
|
|
1523
|
+
betaFeatures.push(interleavedThinkingBeta);
|
|
1240
1524
|
}
|
|
1241
1525
|
|
|
1242
1526
|
const defaultHeaders = buildAnthropicHeaders({
|
|
@@ -1246,8 +1530,22 @@ export function buildAnthropicClientOptions(args: AnthropicClientOptionsArgs): A
|
|
|
1246
1530
|
extraBetas: betaFeatures,
|
|
1247
1531
|
stream,
|
|
1248
1532
|
modelHeaders: mergeHeaders(model.headers, foundryCustomHeaders, headers, dynamicHeaders),
|
|
1533
|
+
isCloudflareAiGateway: model.provider === "cloudflare-ai-gateway",
|
|
1249
1534
|
});
|
|
1250
1535
|
|
|
1536
|
+
if (model.provider === "cloudflare-ai-gateway") {
|
|
1537
|
+
return {
|
|
1538
|
+
isOAuthToken: false,
|
|
1539
|
+
apiKey: null,
|
|
1540
|
+
authToken: null,
|
|
1541
|
+
baseURL: baseUrl,
|
|
1542
|
+
maxRetries: 5,
|
|
1543
|
+
dangerouslyAllowBrowser: true,
|
|
1544
|
+
defaultHeaders,
|
|
1545
|
+
logLevel: ANTHROPIC_SDK_LOG_LEVEL,
|
|
1546
|
+
};
|
|
1547
|
+
}
|
|
1548
|
+
|
|
1251
1549
|
return {
|
|
1252
1550
|
isOAuthToken: oauthToken,
|
|
1253
1551
|
apiKey: oauthToken ? null : apiKey,
|
|
@@ -1532,13 +1830,16 @@ function buildParams(
|
|
|
1532
1830
|
options?: AnthropicOptions,
|
|
1533
1831
|
disableStrictTools = false,
|
|
1534
1832
|
): MessageCreateParamsStreaming {
|
|
1535
|
-
const { cacheControl } = getCacheControl(baseUrl, options?.cacheRetention);
|
|
1833
|
+
const { cacheControl } = getCacheControl(model, baseUrl, options?.cacheRetention);
|
|
1536
1834
|
const params: AnthropicSamplingParams = {
|
|
1537
1835
|
model: model.id,
|
|
1538
1836
|
messages: convertAnthropicMessages(context.messages, model, isOAuthToken),
|
|
1539
1837
|
max_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,
|
|
1540
1838
|
stream: true,
|
|
1541
1839
|
};
|
|
1840
|
+
if (options?.temperature !== undefined && !options?.thinkingEnabled) {
|
|
1841
|
+
params.temperature = options.temperature;
|
|
1842
|
+
}
|
|
1542
1843
|
|
|
1543
1844
|
if (options?.topP !== undefined) {
|
|
1544
1845
|
params.top_p = options.topP;
|
|
@@ -1551,6 +1852,7 @@ function buildParams(
|
|
|
1551
1852
|
if (hasOpus47ApiRestrictions(model.id)) {
|
|
1552
1853
|
delete params.top_p;
|
|
1553
1854
|
delete params.top_k;
|
|
1855
|
+
delete params.temperature;
|
|
1554
1856
|
}
|
|
1555
1857
|
|
|
1556
1858
|
if (context.tools) {
|
|
@@ -1558,38 +1860,45 @@ function buildParams(
|
|
|
1558
1860
|
context.tools,
|
|
1559
1861
|
isOAuthToken,
|
|
1560
1862
|
disableStrictTools || model.provider === "github-copilot",
|
|
1863
|
+
getAnthropicCompat(model).supportsEagerToolInputStreaming,
|
|
1561
1864
|
);
|
|
1562
1865
|
}
|
|
1563
1866
|
|
|
1564
|
-
if (
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
adaptive
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1867
|
+
if (model.reasoning) {
|
|
1868
|
+
if (options?.thinkingEnabled) {
|
|
1869
|
+
const mode = model.thinking?.mode;
|
|
1870
|
+
const requestedEffort = options.reasoning;
|
|
1871
|
+
const effort =
|
|
1872
|
+
options.effort ??
|
|
1873
|
+
(requestedEffort ? mapEffortToAnthropicAdaptiveEffort(model, requestedEffort) : undefined);
|
|
1874
|
+
|
|
1875
|
+
const compat = getAnthropicCompat(model);
|
|
1876
|
+
if (mode === "anthropic-adaptive" && !compat.disableAdaptiveThinking) {
|
|
1877
|
+
// Starting with Claude Opus 4.7, adaptive thinking content is omitted from the
|
|
1878
|
+
// response by default. Opt into summarized reasoning so thinking deltas keep
|
|
1879
|
+
// streaming with human-readable content for callers that rely on it.
|
|
1880
|
+
const adaptive: { type: "adaptive"; display?: AnthropicThinkingDisplay } = { type: "adaptive" };
|
|
1881
|
+
if (supportsAdaptiveThinkingDisplay(model.id)) {
|
|
1882
|
+
adaptive.display = options.thinkingDisplay ?? "summarized";
|
|
1883
|
+
}
|
|
1884
|
+
params.thinking = adaptive as typeof params.thinking;
|
|
1885
|
+
if (effort) {
|
|
1886
|
+
// SDK's OutputConfig.effort type is not yet widened to include the new "xhigh"
|
|
1887
|
+
// level introduced with Claude Opus 4.7. Cast until the SDK catches up.
|
|
1888
|
+
params.output_config = { effort } as typeof params.output_config;
|
|
1889
|
+
}
|
|
1890
|
+
} else {
|
|
1891
|
+
params.thinking = {
|
|
1892
|
+
type: "enabled",
|
|
1893
|
+
budget_tokens: options.thinkingBudgetTokens || 1024,
|
|
1894
|
+
display: options.thinkingDisplay ?? "summarized",
|
|
1895
|
+
} as typeof params.thinking;
|
|
1896
|
+
if (mode === "anthropic-budget-effort" && effort) {
|
|
1897
|
+
params.output_config = { effort } as typeof params.output_config;
|
|
1898
|
+
}
|
|
1592
1899
|
}
|
|
1900
|
+
} else if (options?.thinkingEnabled === false) {
|
|
1901
|
+
params.thinking = { type: "disabled" };
|
|
1593
1902
|
}
|
|
1594
1903
|
}
|
|
1595
1904
|
|
|
@@ -2108,7 +2417,12 @@ function buildAnthropicToolSchemaPlans(tools: Tool[], disableStrictTools = false
|
|
|
2108
2417
|
return plans;
|
|
2109
2418
|
}
|
|
2110
2419
|
|
|
2111
|
-
function convertTools(
|
|
2420
|
+
function convertTools(
|
|
2421
|
+
tools: Tool[],
|
|
2422
|
+
isOAuthToken: boolean,
|
|
2423
|
+
disableStrictTools = false,
|
|
2424
|
+
supportsEagerToolInputStreaming = true,
|
|
2425
|
+
): Anthropic.Messages.Tool[] {
|
|
2112
2426
|
if (!tools) return [];
|
|
2113
2427
|
const schemaPlans = buildAnthropicToolSchemaPlans(tools, disableStrictTools);
|
|
2114
2428
|
|
|
@@ -2118,6 +2432,7 @@ function convertTools(tools: Tool[], isOAuthToken: boolean, disableStrictTools =
|
|
|
2118
2432
|
name: isOAuthToken ? applyClaudeToolPrefix(tool.name) : tool.name,
|
|
2119
2433
|
description: tool.description || "",
|
|
2120
2434
|
input_schema: plan.inputSchema,
|
|
2435
|
+
...(supportsEagerToolInputStreaming ? { eager_input_streaming: true } : {}),
|
|
2121
2436
|
...(plan.strict ? { strict: true } : {}),
|
|
2122
2437
|
};
|
|
2123
2438
|
});
|
|
@@ -144,7 +144,7 @@ export async function transformRequestBody(
|
|
|
144
144
|
|
|
145
145
|
body.text = {
|
|
146
146
|
...body.text,
|
|
147
|
-
verbosity: options.textVerbosity || "
|
|
147
|
+
verbosity: options.textVerbosity || "low",
|
|
148
148
|
};
|
|
149
149
|
|
|
150
150
|
const include = Array.isArray(options.include) ? [...options.include] : [];
|