@eminent337/aery-ai 0.67.68 → 0.67.70
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -29
- package/dist/providers/anthropic.d.ts.map +1 -1
- package/dist/providers/anthropic.js +140 -8
- package/dist/providers/anthropic.js.map +1 -1
- package/dist/utils/json-parse.d.ts +8 -1
- package/dist/utils/json-parse.d.ts.map +1 -1
- package/dist/utils/json-parse.js +89 -5
- package/dist/utils/json-parse.js.map +1 -1
- package/dist/utils/oauth/anthropic.d.ts.map +1 -1
- package/dist/utils/oauth/anthropic.js +3 -3
- package/dist/utils/oauth/anthropic.js.map +1 -1
- package/dist/utils/oauth/openai-codex.d.ts.map +1 -1
- package/dist/utils/oauth/openai-codex.js +2 -2
- package/dist/utils/oauth/openai-codex.js.map +1 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# @
|
|
1
|
+
# @eminent337/aery-ai
|
|
2
2
|
|
|
3
3
|
Unified LLM API with automatic model discovery, provider configuration, token and cost tracking, and simple context persistence and hand-off to other models mid-session.
|
|
4
4
|
|
|
@@ -72,15 +72,15 @@ Unified LLM API with automatic model discovery, provider configuration, token an
|
|
|
72
72
|
## Installation
|
|
73
73
|
|
|
74
74
|
```bash
|
|
75
|
-
npm install @
|
|
75
|
+
npm install @eminent337/aery-ai
|
|
76
76
|
```
|
|
77
77
|
|
|
78
|
-
TypeBox exports are re-exported from `@
|
|
78
|
+
TypeBox exports are re-exported from `@eminent337/aery-ai`: `Type`, `Static`, and `TSchema`.
|
|
79
79
|
|
|
80
80
|
## Quick Start
|
|
81
81
|
|
|
82
82
|
```typescript
|
|
83
|
-
import { Type, getModel, stream, complete, Context, Tool, StringEnum } from '@
|
|
83
|
+
import { Type, getModel, stream, complete, Context, Tool, StringEnum } from '@eminent337/aery-ai';
|
|
84
84
|
|
|
85
85
|
// Fully typed with auto-complete support for both providers and models
|
|
86
86
|
const model = getModel('openai', 'gpt-4o-mini');
|
|
@@ -206,7 +206,7 @@ Tools enable LLMs to interact with external systems. This library uses TypeBox s
|
|
|
206
206
|
### Defining Tools
|
|
207
207
|
|
|
208
208
|
```typescript
|
|
209
|
-
import { Type, Tool, StringEnum } from '@
|
|
209
|
+
import { Type, Tool, StringEnum } from '@eminent337/aery-ai';
|
|
210
210
|
|
|
211
211
|
// Define tool parameters with TypeBox
|
|
212
212
|
const weatherTool: Tool = {
|
|
@@ -332,7 +332,7 @@ When using `agentLoop`, tool arguments are automatically validated against your
|
|
|
332
332
|
When implementing your own tool execution loop with `stream()` or `complete()`, use `validateToolCall` to validate arguments before passing them to your tools:
|
|
333
333
|
|
|
334
334
|
```typescript
|
|
335
|
-
import { stream, validateToolCall, Tool } from '@
|
|
335
|
+
import { stream, validateToolCall, Tool } from '@eminent337/aery-ai';
|
|
336
336
|
|
|
337
337
|
const tools: Tool[] = [weatherTool, calculatorTool];
|
|
338
338
|
const s = stream(model, { messages, tools });
|
|
@@ -386,7 +386,7 @@ Models with vision capabilities can process images. You can check if a model sup
|
|
|
386
386
|
|
|
387
387
|
```typescript
|
|
388
388
|
import { readFileSync } from 'fs';
|
|
389
|
-
import { getModel, complete } from '@
|
|
389
|
+
import { getModel, complete } from '@eminent337/aery-ai';
|
|
390
390
|
|
|
391
391
|
const model = getModel('openai', 'gpt-4o-mini');
|
|
392
392
|
|
|
@@ -423,7 +423,7 @@ Many models support thinking/reasoning capabilities where they can show their in
|
|
|
423
423
|
### Unified Interface (streamSimple/completeSimple)
|
|
424
424
|
|
|
425
425
|
```typescript
|
|
426
|
-
import { getModel, streamSimple, completeSimple } from '@
|
|
426
|
+
import { getModel, streamSimple, completeSimple } from '@eminent337/aery-ai';
|
|
427
427
|
|
|
428
428
|
// Many models across providers support thinking/reasoning
|
|
429
429
|
const model = getModel('anthropic', 'claude-sonnet-4-20250514');
|
|
@@ -461,7 +461,7 @@ for (const block of response.content) {
|
|
|
461
461
|
For fine-grained control, use the provider-specific options:
|
|
462
462
|
|
|
463
463
|
```typescript
|
|
464
|
-
import { getModel, complete } from '@
|
|
464
|
+
import { getModel, complete } from '@eminent337/aery-ai';
|
|
465
465
|
|
|
466
466
|
// OpenAI Reasoning (o1, o3, gpt-5)
|
|
467
467
|
const openaiModel = getModel('openai', 'gpt-5-mini');
|
|
@@ -550,7 +550,7 @@ if (message.stopReason === 'error' || message.stopReason === 'aborted') {
|
|
|
550
550
|
The abort signal allows you to cancel in-progress requests. Aborted requests have `stopReason === 'aborted'`:
|
|
551
551
|
|
|
552
552
|
```typescript
|
|
553
|
-
import { getModel, stream } from '@
|
|
553
|
+
import { getModel, stream } from '@eminent337/aery-ai';
|
|
554
554
|
|
|
555
555
|
const model = getModel('openai', 'gpt-4o-mini');
|
|
556
556
|
const controller = new AbortController();
|
|
@@ -649,7 +649,7 @@ import {
|
|
|
649
649
|
fauxToolCall,
|
|
650
650
|
registerFauxProvider,
|
|
651
651
|
stream,
|
|
652
|
-
} from '@
|
|
652
|
+
} from '@eminent337/aery-ai';
|
|
653
653
|
|
|
654
654
|
const registration = registerFauxProvider({
|
|
655
655
|
tokensPerSecond: 50 // optional
|
|
@@ -734,7 +734,7 @@ A **provider** offers models through a specific API. For example:
|
|
|
734
734
|
### Querying Providers and Models
|
|
735
735
|
|
|
736
736
|
```typescript
|
|
737
|
-
import { getProviders, getModels, getModel } from '@
|
|
737
|
+
import { getProviders, getModels, getModel } from '@eminent337/aery-ai';
|
|
738
738
|
|
|
739
739
|
// Get all available providers
|
|
740
740
|
const providers = getProviders();
|
|
@@ -760,7 +760,7 @@ console.log(`Using ${model.name} via ${model.api} API`);
|
|
|
760
760
|
You can create custom models for local inference servers or custom endpoints:
|
|
761
761
|
|
|
762
762
|
```typescript
|
|
763
|
-
import { Model, stream } from '@
|
|
763
|
+
import { Model, stream } from '@eminent337/aery-ai';
|
|
764
764
|
|
|
765
765
|
// Example: Ollama using OpenAI-compatible API
|
|
766
766
|
const ollamaModel: Model<'openai-completions'> = {
|
|
@@ -878,7 +878,7 @@ If `compat` is not set, the library falls back to URL-based detection. If `compa
|
|
|
878
878
|
Models are typed by their API, which keeps the model metadata accurate. Provider-specific option types are enforced when you call the provider functions directly. The generic `stream` and `complete` functions accept `StreamOptions` with additional provider fields.
|
|
879
879
|
|
|
880
880
|
```typescript
|
|
881
|
-
import { streamAnthropic, type AnthropicOptions } from '@
|
|
881
|
+
import { streamAnthropic, type AnthropicOptions } from '@eminent337/aery-ai';
|
|
882
882
|
|
|
883
883
|
// TypeScript knows this is an Anthropic model
|
|
884
884
|
const claude = getModel('anthropic', 'claude-sonnet-4-20250514');
|
|
@@ -907,7 +907,7 @@ When messages from one provider are sent to a different provider, the library au
|
|
|
907
907
|
### Example: Multi-Provider Conversation
|
|
908
908
|
|
|
909
909
|
```typescript
|
|
910
|
-
import { getModel, complete, Context } from '@
|
|
910
|
+
import { getModel, complete, Context } from '@eminent337/aery-ai';
|
|
911
911
|
|
|
912
912
|
// Start with Claude
|
|
913
913
|
const claude = getModel('anthropic', 'claude-sonnet-4-20250514');
|
|
@@ -952,7 +952,7 @@ This enables flexible workflows where you can:
|
|
|
952
952
|
The `Context` object can be easily serialized and deserialized using standard JSON methods, making it simple to persist conversations, implement chat history, or transfer contexts between services:
|
|
953
953
|
|
|
954
954
|
```typescript
|
|
955
|
-
import { Context, getModel, complete } from '@
|
|
955
|
+
import { Context, getModel, complete } from '@eminent337/aery-ai';
|
|
956
956
|
|
|
957
957
|
// Create and use a context
|
|
958
958
|
const context: Context = {
|
|
@@ -989,7 +989,7 @@ const continuation = await complete(newModel, restored);
|
|
|
989
989
|
The library supports browser environments. You must pass the API key explicitly since environment variables are not available in browsers:
|
|
990
990
|
|
|
991
991
|
```typescript
|
|
992
|
-
import { getModel, complete } from '@
|
|
992
|
+
import { getModel, complete } from '@eminent337/aery-ai';
|
|
993
993
|
|
|
994
994
|
// API key must be passed explicitly in browser
|
|
995
995
|
const model = getModel('anthropic', 'claude-3-5-haiku-20241022');
|
|
@@ -1006,7 +1006,7 @@ const response = await complete(model, {
|
|
|
1006
1006
|
### Browser Compatibility Notes
|
|
1007
1007
|
|
|
1008
1008
|
- Amazon Bedrock (`bedrock-converse-stream`) is not supported in browser environments.
|
|
1009
|
-
- OAuth login flows are not supported in browser environments. Use the `@
|
|
1009
|
+
- OAuth login flows are not supported in browser environments. Use the `@eminent337/aery-ai/oauth` entry point in Node.js.
|
|
1010
1010
|
- In browser builds, Bedrock can still appear in model lists. Calls to Bedrock models fail at runtime.
|
|
1011
1011
|
- Use a server-side proxy or backend service if you need Bedrock or OAuth-based auth from a web app.
|
|
1012
1012
|
|
|
@@ -1070,7 +1070,7 @@ This only affects direct API calls to `api.anthropic.com` and `api.openai.com`.
|
|
|
1070
1070
|
### Checking Environment Variables
|
|
1071
1071
|
|
|
1072
1072
|
```typescript
|
|
1073
|
-
import { getEnvApiKey } from '@
|
|
1073
|
+
import { getEnvApiKey } from '@eminent337/aery-ai';
|
|
1074
1074
|
|
|
1075
1075
|
// Check if an API key is set in environment variables
|
|
1076
1076
|
const key = getEnvApiKey('openai'); // checks OPENAI_API_KEY
|
|
@@ -1111,7 +1111,7 @@ export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account.json"
|
|
|
1111
1111
|
```
|
|
1112
1112
|
|
|
1113
1113
|
```typescript
|
|
1114
|
-
import { getModel, complete } from '@
|
|
1114
|
+
import { getModel, complete } from '@eminent337/aery-ai';
|
|
1115
1115
|
|
|
1116
1116
|
(async () => {
|
|
1117
1117
|
const model = getModel('google-vertex', 'gemini-2.5-flash');
|
|
@@ -1134,16 +1134,16 @@ Official docs: [Application Default Credentials](https://cloud.google.com/docs/a
|
|
|
1134
1134
|
The quickest way to authenticate:
|
|
1135
1135
|
|
|
1136
1136
|
```bash
|
|
1137
|
-
npx @
|
|
1138
|
-
npx @
|
|
1139
|
-
npx @
|
|
1137
|
+
npx @eminent337/aery-ai login # interactive provider selection
|
|
1138
|
+
npx @eminent337/aery-ai login anthropic # login to specific provider
|
|
1139
|
+
npx @eminent337/aery-ai list # list available providers
|
|
1140
1140
|
```
|
|
1141
1141
|
|
|
1142
1142
|
Credentials are saved to `auth.json` in the current directory.
|
|
1143
1143
|
|
|
1144
1144
|
### Programmatic OAuth
|
|
1145
1145
|
|
|
1146
|
-
The library provides login and token refresh functions via the `@
|
|
1146
|
+
The library provides login and token refresh functions via the `@eminent337/aery-ai/oauth` entry point. Credential storage is the caller's responsibility.
|
|
1147
1147
|
|
|
1148
1148
|
```typescript
|
|
1149
1149
|
import {
|
|
@@ -1161,13 +1161,13 @@ import {
|
|
|
1161
1161
|
// Types
|
|
1162
1162
|
type OAuthProvider, // 'anthropic' | 'openai-codex' | 'github-copilot' | 'google-gemini-cli' | 'google-antigravity'
|
|
1163
1163
|
type OAuthCredentials,
|
|
1164
|
-
} from '@
|
|
1164
|
+
} from '@eminent337/aery-ai/oauth';
|
|
1165
1165
|
```
|
|
1166
1166
|
|
|
1167
1167
|
### Login Flow Example
|
|
1168
1168
|
|
|
1169
1169
|
```typescript
|
|
1170
|
-
import { loginGitHubCopilot } from '@
|
|
1170
|
+
import { loginGitHubCopilot } from '@eminent337/aery-ai/oauth';
|
|
1171
1171
|
import { writeFileSync } from 'fs';
|
|
1172
1172
|
|
|
1173
1173
|
const credentials = await loginGitHubCopilot({
|
|
@@ -1191,8 +1191,8 @@ writeFileSync('auth.json', JSON.stringify(auth, null, 2));
|
|
|
1191
1191
|
Use `getOAuthApiKey()` to get an API key, automatically refreshing if expired:
|
|
1192
1192
|
|
|
1193
1193
|
```typescript
|
|
1194
|
-
import { getModel, complete } from '@
|
|
1195
|
-
import { getOAuthApiKey } from '@
|
|
1194
|
+
import { getModel, complete } from '@eminent337/aery-ai';
|
|
1195
|
+
import { getOAuthApiKey } from '@eminent337/aery-ai/oauth';
|
|
1196
1196
|
import { readFileSync, writeFileSync } from 'fs';
|
|
1197
1197
|
|
|
1198
1198
|
// Load your stored credentials
|
|
@@ -1251,7 +1251,7 @@ Create a new provider file (for example `amazon-bedrock.ts`) that exports:
|
|
|
1251
1251
|
- Register the API with `registerApiProvider()`
|
|
1252
1252
|
- Add a package subpath export in `package.json` for the provider module (`./dist/providers/<provider>.js`)
|
|
1253
1253
|
- Add lazy loader wrappers in `src/providers/register-builtins.ts`, do not statically import provider implementation modules there
|
|
1254
|
-
- Add any root-level `export type` re-exports in `src/index.ts` that should remain available from `@
|
|
1254
|
+
- Add any root-level `export type` re-exports in `src/index.ts` that should remain available from `@eminent337/aery-ai`
|
|
1255
1255
|
- Add credential detection in `env-api-keys.ts` for the new provider
|
|
1256
1256
|
- Ensure `streamSimple` handles auth lookup via `getEnvApiKey()` or provider-specific auth
|
|
1257
1257
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"anthropic.d.ts","sourceRoot":"","sources":["../../src/providers/anthropic.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAS1C,OAAO,KAAK,EAQX,mBAAmB,EAEnB,cAAc,EACd,aAAa,EAMb,MAAM,aAAa,CAAC;AAkIrB,MAAM,MAAM,eAAe,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,KAAK,CAAC;AAE1E,MAAM,MAAM,wBAAwB,GAAG,YAAY,GAAG,SAAS,CAAC;AAEhE,MAAM,WAAW,gBAAiB,SAAQ,aAAa;IACtD;;;;OAIG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B;;;;;;;;;OASG;IACH,MAAM,CAAC,EAAE,eAAe,CAAC;IACzB;;;;;;;;;;OAUG;IACH,eAAe,CAAC,EAAE,wBAAwB,CAAC;IAC3C,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,UAAU,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IACtE;;;;OAIG;IACH,MAAM,CAAC,EAAE,SAAS,CAAC;CACnB;AAYD,eAAO,MAAM,eAAe,EAAE,cAAc,CAAC,oBAAoB,EAAE,gBAAgB,CA2PlF,CAAC;AA4CF,eAAO,MAAM,qBAAqB,EAAE,cAAc,CAAC,oBAAoB,EAAE,mBAAmB,CAuC3F,CAAC","sourcesContent":["import Anthropic from \"@anthropic-ai/sdk\";\nimport type {\n\tCacheControlEphemeral,\n\tContentBlockParam,\n\tMessageCreateParamsStreaming,\n\tMessageParam,\n} from \"@anthropic-ai/sdk/resources/messages.js\";\nimport { getEnvApiKey } from \"../env-api-keys.js\";\nimport { calculateCost } from \"../models.js\";\nimport type {\n\tApi,\n\tAssistantMessage,\n\tCacheRetention,\n\tContext,\n\tImageContent,\n\tMessage,\n\tModel,\n\tSimpleStreamOptions,\n\tStopReason,\n\tStreamFunction,\n\tStreamOptions,\n\tTextContent,\n\tThinkingContent,\n\tTool,\n\tToolCall,\n\tToolResultMessage,\n} from \"../types.js\";\nimport { AssistantMessageEventStream } from \"../utils/event-stream.js\";\nimport { headersToRecord } from \"../utils/headers.js\";\nimport { parseStreamingJson } from \"../utils/json-parse.js\";\nimport { sanitizeSurrogates } from \"../utils/sanitize-unicode.js\";\n\nimport { buildCopilotDynamicHeaders, hasCopilotVisionInput } from \"./github-copilot-headers.js\";\nimport { adjustMaxTokensForThinking, buildBaseOptions } from \"./simple-options.js\";\nimport { transformMessages } from \"./transform-messages.js\";\n\n/**\n * Resolve cache retention preference.\n * Defaults to \"short\" and uses PI_CACHE_RETENTION for backward compatibility.\n */\nfunction resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {\n\tif (cacheRetention) {\n\t\treturn cacheRetention;\n\t}\n\tif (typeof process !== \"undefined\" && process.env.PI_CACHE_RETENTION === \"long\") {\n\t\treturn \"long\";\n\t}\n\treturn \"short\";\n}\n\nfunction getCacheControl(\n\tbaseUrl: string,\n\tcacheRetention?: CacheRetention,\n): { retention: CacheRetention; cacheControl?: CacheControlEphemeral } {\n\tconst retention = resolveCacheRetention(cacheRetention);\n\tif (retention === \"none\") {\n\t\treturn { retention };\n\t}\n\tconst ttl = retention === \"long\" && baseUrl.includes(\"api.anthropic.com\") ? \"1h\" : undefined;\n\treturn {\n\t\tretention,\n\t\tcacheControl: { type: \"ephemeral\", ...(ttl && { ttl }) },\n\t};\n}\n\n// Stealth mode: Mimic Claude Code's tool naming exactly\nconst claudeCodeVersion = \"2.1.75\";\n\n// Claude Code 2.x tool names (canonical casing)\n// Source: https://aery.dev/data/prompts-2.1.11.md\n// To update: https://github.com/eminent337/aery\nconst claudeCodeTools = [\n\t\"Read\",\n\t\"Write\",\n\t\"Edit\",\n\t\"Bash\",\n\t\"Grep\",\n\t\"Glob\",\n\t\"AskUserQuestion\",\n\t\"EnterPlanMode\",\n\t\"ExitPlanMode\",\n\t\"KillShell\",\n\t\"NotebookEdit\",\n\t\"Skill\",\n\t\"Task\",\n\t\"TaskOutput\",\n\t\"TodoWrite\",\n\t\"WebFetch\",\n\t\"WebSearch\",\n];\n\nconst ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t]));\n\n// Convert tool name to CC canonical casing if it matches (case-insensitive)\nconst toClaudeCodeName = (name: string) => ccToolLookup.get(name.toLowerCase()) ?? name;\nconst fromClaudeCodeName = (name: string, tools?: Tool[]) => {\n\tif (tools && tools.length > 0) {\n\t\tconst lowerName = name.toLowerCase();\n\t\tconst matchedTool = tools.find((tool) => tool.name.toLowerCase() === lowerName);\n\t\tif (matchedTool) return matchedTool.name;\n\t}\n\treturn name;\n};\n\n/**\n * Convert content blocks to Anthropic API format\n */\nfunction convertContentBlocks(content: (TextContent | ImageContent)[]):\n\t| string\n\t| Array<\n\t\t\t| { type: \"text\"; text: string }\n\t\t\t| {\n\t\t\t\t\ttype: \"image\";\n\t\t\t\t\tsource: {\n\t\t\t\t\t\ttype: \"base64\";\n\t\t\t\t\t\tmedia_type: \"image/jpeg\" | \"image/png\" | \"image/gif\" | \"image/webp\";\n\t\t\t\t\t\tdata: string;\n\t\t\t\t\t};\n\t\t\t }\n\t > {\n\t// If only text blocks, return as concatenated string for simplicity\n\tconst hasImages = content.some((c) => c.type === \"image\");\n\tif (!hasImages) {\n\t\treturn sanitizeSurrogates(content.map((c) => (c as TextContent).text).join(\"\\n\"));\n\t}\n\n\t// If we have images, convert to content block array\n\tconst blocks = content.map((block) => {\n\t\tif (block.type === \"text\") {\n\t\t\treturn {\n\t\t\t\ttype: \"text\" as const,\n\t\t\t\ttext: sanitizeSurrogates(block.text),\n\t\t\t};\n\t\t}\n\t\treturn {\n\t\t\ttype: \"image\" as const,\n\t\t\tsource: {\n\t\t\t\ttype: \"base64\" as const,\n\t\t\t\tmedia_type: block.mimeType as \"image/jpeg\" | \"image/png\" | \"image/gif\" | \"image/webp\",\n\t\t\t\tdata: block.data,\n\t\t\t},\n\t\t};\n\t});\n\n\t// If only images (no text), add placeholder text block\n\tconst hasText = blocks.some((b) => b.type === \"text\");\n\tif (!hasText) {\n\t\tblocks.unshift({\n\t\t\ttype: \"text\" as const,\n\t\t\ttext: \"(see attached image)\",\n\t\t});\n\t}\n\n\treturn blocks;\n}\n\nexport type AnthropicEffort = \"low\" | \"medium\" | \"high\" | \"xhigh\" | \"max\";\n\nexport type AnthropicThinkingDisplay = \"summarized\" | \"omitted\";\n\nexport interface AnthropicOptions extends StreamOptions {\n\t/**\n\t * Enable extended thinking.\n\t * For Opus 4.6 and Sonnet 4.6: uses adaptive thinking (model decides when/how much to think).\n\t * For older models: uses budget-based thinking with thinkingBudgetTokens.\n\t */\n\tthinkingEnabled?: boolean;\n\t/**\n\t * Token budget for extended thinking (older models only).\n\t * Ignored for Opus 4.6 and Sonnet 4.6, which use adaptive thinking.\n\t */\n\tthinkingBudgetTokens?: number;\n\t/**\n\t * Effort level for adaptive thinking (Opus 4.6+ and Sonnet 4.6).\n\t * Controls how much thinking Claude allocates:\n\t * - \"max\": Always thinks with no constraints (Opus 4.6 only)\n\t * - \"xhigh\": Highest reasoning level (Opus 4.7)\n\t * - \"high\": Always thinks, deep reasoning (default)\n\t * - \"medium\": Moderate thinking, may skip for simple queries\n\t * - \"low\": Minimal thinking, skips for simple tasks\n\t * Ignored for older models.\n\t */\n\teffort?: AnthropicEffort;\n\t/**\n\t * Controls how thinking content is returned in API responses.\n\t * - \"summarized\": Thinking blocks contain summarized thinking text (default here).\n\t * - \"omitted\": Thinking blocks return an empty thinking field; the encrypted\n\t * signature still travels back for multi-turn continuity. Use for faster\n\t * time-to-first-text-token when your UI does not surface thinking.\n\t *\n\t * Note: Anthropic's API default for Claude Opus 4.7 and Claude Mythos Preview\n\t * is \"omitted\". We default to \"summarized\" here to keep behavior consistent\n\t * with older Claude 4 models. Set this explicitly to \"omitted\" to opt in.\n\t */\n\tthinkingDisplay?: AnthropicThinkingDisplay;\n\tinterleavedThinking?: boolean;\n\ttoolChoice?: \"auto\" | \"any\" | \"none\" | { type: \"tool\"; name: string };\n\t/**\n\t * Pre-built Anthropic client instance. When provided, skips internal client\n\t * construction entirely. Use this to inject alternative SDK clients such as\n\t * `AnthropicVertex` that shares the same messaging API.\n\t */\n\tclient?: Anthropic;\n}\n\nfunction mergeHeaders(...headerSources: (Record<string, string> | undefined)[]): Record<string, string> {\n\tconst merged: Record<string, string> = {};\n\tfor (const headers of headerSources) {\n\t\tif (headers) {\n\t\t\tObject.assign(merged, headers);\n\t\t}\n\t}\n\treturn merged;\n}\n\nexport const streamAnthropic: StreamFunction<\"anthropic-messages\", AnthropicOptions> = (\n\tmodel: Model<\"anthropic-messages\">,\n\tcontext: Context,\n\toptions?: AnthropicOptions,\n): AssistantMessageEventStream => {\n\tconst stream = new AssistantMessageEventStream();\n\n\t(async () => {\n\t\tconst output: AssistantMessage = {\n\t\t\trole: \"assistant\",\n\t\t\tcontent: [],\n\t\t\tapi: model.api as Api,\n\t\t\tprovider: model.provider,\n\t\t\tmodel: model.id,\n\t\t\tusage: {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t},\n\t\t\tstopReason: \"stop\",\n\t\t\ttimestamp: Date.now(),\n\t\t};\n\n\t\ttry {\n\t\t\tlet client: Anthropic;\n\t\t\tlet isOAuth: boolean;\n\n\t\t\tif (options?.client) {\n\t\t\t\tclient = options.client;\n\t\t\t\tisOAuth = false;\n\t\t\t} else {\n\t\t\t\tconst apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? \"\";\n\n\t\t\t\tlet copilotDynamicHeaders: Record<string, string> | undefined;\n\t\t\t\tif (model.provider === \"github-copilot\") {\n\t\t\t\t\tconst hasImages = hasCopilotVisionInput(context.messages);\n\t\t\t\t\tcopilotDynamicHeaders = buildCopilotDynamicHeaders({\n\t\t\t\t\t\tmessages: context.messages,\n\t\t\t\t\t\thasImages,\n\t\t\t\t\t});\n\t\t\t\t}\n\n\t\t\t\tconst created = createClient(\n\t\t\t\t\tmodel,\n\t\t\t\t\tapiKey,\n\t\t\t\t\toptions?.interleavedThinking ?? true,\n\t\t\t\t\toptions?.headers,\n\t\t\t\t\tcopilotDynamicHeaders,\n\t\t\t\t);\n\t\t\t\tclient = created.client;\n\t\t\t\tisOAuth = created.isOAuthToken;\n\t\t\t}\n\t\t\tlet params = buildParams(model, context, isOAuth, options);\n\t\t\tconst nextParams = await options?.onPayload?.(params, model);\n\t\t\tif (nextParams !== undefined) {\n\t\t\t\tparams = nextParams as MessageCreateParamsStreaming;\n\t\t\t}\n\t\t\tconst { data: anthropicStream, response } = await client.messages\n\t\t\t\t.stream({ ...params, stream: true }, { signal: options?.signal })\n\t\t\t\t.withResponse();\n\t\t\tawait options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);\n\t\t\tstream.push({ type: \"start\", partial: output });\n\n\t\t\ttype Block = (ThinkingContent | TextContent | (ToolCall & { partialJson: string })) & { index: number };\n\t\t\tconst blocks = output.content as Block[];\n\n\t\t\tfor await (const event of anthropicStream) {\n\t\t\t\tif (event.type === \"message_start\") {\n\t\t\t\t\toutput.responseId = event.message.id;\n\t\t\t\t\t// Capture initial token usage from message_start event\n\t\t\t\t\t// This ensures we have input token counts even if the stream is aborted early\n\t\t\t\t\toutput.usage.input = event.message.usage.input_tokens || 0;\n\t\t\t\t\toutput.usage.output = event.message.usage.output_tokens || 0;\n\t\t\t\t\toutput.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;\n\t\t\t\t\toutput.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;\n\t\t\t\t\t// Anthropic doesn't provide total_tokens, compute from components\n\t\t\t\t\toutput.usage.totalTokens =\n\t\t\t\t\t\toutput.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;\n\t\t\t\t\tcalculateCost(model, output.usage);\n\t\t\t\t} else if (event.type === \"content_block_start\") {\n\t\t\t\t\tif (event.content_block.type === \"text\") {\n\t\t\t\t\t\tconst block: Block = {\n\t\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\t\ttext: \"\",\n\t\t\t\t\t\t\tindex: event.index,\n\t\t\t\t\t\t};\n\t\t\t\t\t\toutput.content.push(block);\n\t\t\t\t\t\tstream.push({ type: \"text_start\", contentIndex: output.content.length - 1, partial: output });\n\t\t\t\t\t} else if (event.content_block.type === \"thinking\") {\n\t\t\t\t\t\tconst block: Block = {\n\t\t\t\t\t\t\ttype: \"thinking\",\n\t\t\t\t\t\t\tthinking: \"\",\n\t\t\t\t\t\t\tthinkingSignature: \"\",\n\t\t\t\t\t\t\tindex: event.index,\n\t\t\t\t\t\t};\n\t\t\t\t\t\toutput.content.push(block);\n\t\t\t\t\t\tstream.push({ type: \"thinking_start\", contentIndex: output.content.length - 1, partial: output });\n\t\t\t\t\t} else if (event.content_block.type === \"redacted_thinking\") {\n\t\t\t\t\t\tconst block: Block = {\n\t\t\t\t\t\t\ttype: \"thinking\",\n\t\t\t\t\t\t\tthinking: \"[Reasoning redacted]\",\n\t\t\t\t\t\t\tthinkingSignature: event.content_block.data,\n\t\t\t\t\t\t\tredacted: true,\n\t\t\t\t\t\t\tindex: event.index,\n\t\t\t\t\t\t};\n\t\t\t\t\t\toutput.content.push(block);\n\t\t\t\t\t\tstream.push({ type: \"thinking_start\", contentIndex: output.content.length - 1, partial: output });\n\t\t\t\t\t} else if (event.content_block.type === \"tool_use\") {\n\t\t\t\t\t\tconst block: Block = {\n\t\t\t\t\t\t\ttype: \"toolCall\",\n\t\t\t\t\t\t\tid: event.content_block.id,\n\t\t\t\t\t\t\tname: isOAuth\n\t\t\t\t\t\t\t\t? fromClaudeCodeName(event.content_block.name, context.tools)\n\t\t\t\t\t\t\t\t: event.content_block.name,\n\t\t\t\t\t\t\targuments: (event.content_block.input as Record<string, any>) ?? {},\n\t\t\t\t\t\t\tpartialJson: \"\",\n\t\t\t\t\t\t\tindex: event.index,\n\t\t\t\t\t\t};\n\t\t\t\t\t\toutput.content.push(block);\n\t\t\t\t\t\tstream.push({ type: \"toolcall_start\", contentIndex: output.content.length - 1, partial: output });\n\t\t\t\t\t}\n\t\t\t\t} else if (event.type === \"content_block_delta\") {\n\t\t\t\t\tif (event.delta.type === \"text_delta\") {\n\t\t\t\t\t\tconst index = blocks.findIndex((b) => b.index === event.index);\n\t\t\t\t\t\tconst block = blocks[index];\n\t\t\t\t\t\tif (block && block.type === \"text\") {\n\t\t\t\t\t\t\tblock.text += event.delta.text;\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"text_delta\",\n\t\t\t\t\t\t\t\tcontentIndex: index,\n\t\t\t\t\t\t\t\tdelta: event.delta.text,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t} else if (event.delta.type === \"thinking_delta\") {\n\t\t\t\t\t\tconst index = blocks.findIndex((b) => b.index === event.index);\n\t\t\t\t\t\tconst block = blocks[index];\n\t\t\t\t\t\tif (block && block.type === \"thinking\") {\n\t\t\t\t\t\t\tblock.thinking += event.delta.thinking;\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"thinking_delta\",\n\t\t\t\t\t\t\t\tcontentIndex: index,\n\t\t\t\t\t\t\t\tdelta: event.delta.thinking,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t} else if (event.delta.type === \"input_json_delta\") {\n\t\t\t\t\t\tconst index = blocks.findIndex((b) => b.index === event.index);\n\t\t\t\t\t\tconst block = blocks[index];\n\t\t\t\t\t\tif (block && block.type === \"toolCall\") {\n\t\t\t\t\t\t\tblock.partialJson += event.delta.partial_json;\n\t\t\t\t\t\t\tblock.arguments = parseStreamingJson(block.partialJson);\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"toolcall_delta\",\n\t\t\t\t\t\t\t\tcontentIndex: index,\n\t\t\t\t\t\t\t\tdelta: event.delta.partial_json,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t} else if (event.delta.type === \"signature_delta\") {\n\t\t\t\t\t\tconst index = blocks.findIndex((b) => b.index === event.index);\n\t\t\t\t\t\tconst block = blocks[index];\n\t\t\t\t\t\tif (block && block.type === \"thinking\") {\n\t\t\t\t\t\t\tblock.thinkingSignature = block.thinkingSignature || \"\";\n\t\t\t\t\t\t\tblock.thinkingSignature += event.delta.signature;\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t} else if (event.type === \"content_block_stop\") {\n\t\t\t\t\tconst index = blocks.findIndex((b) => b.index === event.index);\n\t\t\t\t\tconst block = blocks[index];\n\t\t\t\t\tif (block) {\n\t\t\t\t\t\tdelete (block as any).index;\n\t\t\t\t\t\tif (block.type === \"text\") {\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"text_end\",\n\t\t\t\t\t\t\t\tcontentIndex: index,\n\t\t\t\t\t\t\t\tcontent: block.text,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t} else if (block.type === \"thinking\") {\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"thinking_end\",\n\t\t\t\t\t\t\t\tcontentIndex: index,\n\t\t\t\t\t\t\t\tcontent: block.thinking,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t} else if (block.type === \"toolCall\") {\n\t\t\t\t\t\t\tblock.arguments = parseStreamingJson(block.partialJson);\n\t\t\t\t\t\t\t// Finalize in-place and strip the scratch buffer so replay only\n\t\t\t\t\t\t\t// carries parsed arguments.\n\t\t\t\t\t\t\tdelete (block as { partialJson?: string }).partialJson;\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"toolcall_end\",\n\t\t\t\t\t\t\t\tcontentIndex: index,\n\t\t\t\t\t\t\t\ttoolCall: block,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t} else if (event.type === \"message_delta\") {\n\t\t\t\t\tif (event.delta.stop_reason) {\n\t\t\t\t\t\toutput.stopReason = mapStopReason(event.delta.stop_reason);\n\t\t\t\t\t}\n\t\t\t\t\t// Only update usage fields if present (not null).\n\t\t\t\t\t// Preserves input_tokens from message_start when proxies omit it in message_delta.\n\t\t\t\t\tif (event.usage.input_tokens != null) {\n\t\t\t\t\t\toutput.usage.input = event.usage.input_tokens;\n\t\t\t\t\t}\n\t\t\t\t\tif (event.usage.output_tokens != null) {\n\t\t\t\t\t\toutput.usage.output = event.usage.output_tokens;\n\t\t\t\t\t}\n\t\t\t\t\tif (event.usage.cache_read_input_tokens != null) {\n\t\t\t\t\t\toutput.usage.cacheRead = event.usage.cache_read_input_tokens;\n\t\t\t\t\t}\n\t\t\t\t\tif (event.usage.cache_creation_input_tokens != null) {\n\t\t\t\t\t\toutput.usage.cacheWrite = event.usage.cache_creation_input_tokens;\n\t\t\t\t\t}\n\t\t\t\t\t// Anthropic doesn't provide total_tokens, compute from components\n\t\t\t\t\toutput.usage.totalTokens =\n\t\t\t\t\t\toutput.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;\n\t\t\t\t\tcalculateCost(model, output.usage);\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (options?.signal?.aborted) {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\n\t\t\tif (output.stopReason === \"aborted\" || output.stopReason === \"error\") {\n\t\t\t\tthrow new Error(\"An unknown error occurred\");\n\t\t\t}\n\n\t\t\tstream.push({ type: \"done\", reason: output.stopReason, message: output });\n\t\t\tstream.end();\n\t\t} catch (error) {\n\t\t\tfor (const block of output.content) {\n\t\t\t\tdelete (block as { index?: number }).index;\n\t\t\t\t// partialJson is only a streaming scratch buffer; never persist it.\n\t\t\t\tdelete (block as { partialJson?: string }).partialJson;\n\t\t\t}\n\t\t\toutput.stopReason = options?.signal?.aborted ? \"aborted\" : \"error\";\n\t\t\toutput.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);\n\t\t\tstream.push({ type: \"error\", reason: output.stopReason, error: output });\n\t\t\tstream.end();\n\t\t}\n\t})();\n\n\treturn stream;\n};\n\n/**\n * Check if a model supports adaptive thinking (Opus 4.6+, Sonnet 4.6)\n */\nfunction supportsAdaptiveThinking(modelId: string): boolean {\n\t// Adaptive-thinking model IDs (with or without date suffix)\n\treturn (\n\t\tmodelId.includes(\"opus-4-6\") ||\n\t\tmodelId.includes(\"opus-4.6\") ||\n\t\tmodelId.includes(\"opus-4-7\") ||\n\t\tmodelId.includes(\"opus-4.7\") ||\n\t\tmodelId.includes(\"sonnet-4-6\") ||\n\t\tmodelId.includes(\"sonnet-4.6\")\n\t);\n}\n\n/**\n * Map ThinkingLevel to Anthropic effort levels for adaptive thinking.\n * Note: effort \"max\" is only valid on Opus 4.6, while Opus 4.7 supports \"xhigh\".\n */\nfunction mapThinkingLevelToEffort(level: SimpleStreamOptions[\"reasoning\"], modelId: string): AnthropicEffort {\n\tswitch (level) {\n\t\tcase \"minimal\":\n\t\t\treturn \"low\";\n\t\tcase \"low\":\n\t\t\treturn \"low\";\n\t\tcase \"medium\":\n\t\t\treturn \"medium\";\n\t\tcase \"high\":\n\t\t\treturn \"high\";\n\t\tcase \"xhigh\":\n\t\t\tif (modelId.includes(\"opus-4-6\") || modelId.includes(\"opus-4.6\")) {\n\t\t\t\treturn \"max\";\n\t\t\t}\n\t\t\tif (modelId.includes(\"opus-4-7\") || modelId.includes(\"opus-4.7\")) {\n\t\t\t\treturn \"xhigh\";\n\t\t\t}\n\t\t\treturn \"high\";\n\t\tdefault:\n\t\t\treturn \"high\";\n\t}\n}\n\nexport const streamSimpleAnthropic: StreamFunction<\"anthropic-messages\", SimpleStreamOptions> = (\n\tmodel: Model<\"anthropic-messages\">,\n\tcontext: Context,\n\toptions?: SimpleStreamOptions,\n): AssistantMessageEventStream => {\n\tconst apiKey = options?.apiKey || getEnvApiKey(model.provider);\n\tif (!apiKey) {\n\t\tthrow new Error(`No API key for provider: ${model.provider}`);\n\t}\n\n\tconst base = buildBaseOptions(model, options, apiKey);\n\tif (!options?.reasoning) {\n\t\treturn streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);\n\t}\n\n\t// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level\n\t// For older models: use budget-based thinking\n\tif (supportsAdaptiveThinking(model.id)) {\n\t\tconst effort = mapThinkingLevelToEffort(options.reasoning, model.id);\n\t\treturn streamAnthropic(model, context, {\n\t\t\t...base,\n\t\t\tthinkingEnabled: true,\n\t\t\teffort,\n\t\t} satisfies AnthropicOptions);\n\t}\n\n\tconst adjusted = adjustMaxTokensForThinking(\n\t\tbase.maxTokens || 0,\n\t\tmodel.maxTokens,\n\t\toptions.reasoning,\n\t\toptions.thinkingBudgets,\n\t);\n\n\treturn streamAnthropic(model, context, {\n\t\t...base,\n\t\tmaxTokens: adjusted.maxTokens,\n\t\tthinkingEnabled: true,\n\t\tthinkingBudgetTokens: adjusted.thinkingBudget,\n\t} satisfies AnthropicOptions);\n};\n\nfunction isOAuthToken(apiKey: string): boolean {\n\treturn apiKey.includes(\"sk-ant-oat\");\n}\n\nfunction createClient(\n\tmodel: Model<\"anthropic-messages\">,\n\tapiKey: string,\n\tinterleavedThinking: boolean,\n\toptionsHeaders?: Record<string, string>,\n\tdynamicHeaders?: Record<string, string>,\n): { client: Anthropic; isOAuthToken: boolean } {\n\t// Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in.\n\t// The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it.\n\tconst needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model.id);\n\n\t// Copilot: Bearer auth, selective betas (no fine-grained-tool-streaming)\n\tif (model.provider === \"github-copilot\") {\n\t\tconst betaFeatures: string[] = [];\n\t\tif (needsInterleavedBeta) {\n\t\t\tbetaFeatures.push(\"interleaved-thinking-2025-05-14\");\n\t\t}\n\n\t\tconst client = new Anthropic({\n\t\t\tapiKey: null,\n\t\t\tauthToken: apiKey,\n\t\t\tbaseURL: model.baseUrl,\n\t\t\tdangerouslyAllowBrowser: true,\n\t\t\tdefaultHeaders: mergeHeaders(\n\t\t\t\t{\n\t\t\t\t\taccept: \"application/json\",\n\t\t\t\t\t\"anthropic-dangerous-direct-browser-access\": \"true\",\n\t\t\t\t\t...(betaFeatures.length > 0 ? { \"anthropic-beta\": betaFeatures.join(\",\") } : {}),\n\t\t\t\t},\n\t\t\t\tmodel.headers,\n\t\t\t\tdynamicHeaders,\n\t\t\t\toptionsHeaders,\n\t\t\t),\n\t\t});\n\n\t\treturn { client, isOAuthToken: false };\n\t}\n\n\tconst betaFeatures = [\"fine-grained-tool-streaming-2025-05-14\"];\n\tif (needsInterleavedBeta) {\n\t\tbetaFeatures.push(\"interleaved-thinking-2025-05-14\");\n\t}\n\n\t// OAuth: Bearer auth, Claude Code identity headers\n\tif (isOAuthToken(apiKey)) {\n\t\tconst client = new Anthropic({\n\t\t\tapiKey: null,\n\t\t\tauthToken: apiKey,\n\t\t\tbaseURL: model.baseUrl,\n\t\t\tdangerouslyAllowBrowser: true,\n\t\t\tdefaultHeaders: mergeHeaders(\n\t\t\t\t{\n\t\t\t\t\taccept: \"application/json\",\n\t\t\t\t\t\"anthropic-dangerous-direct-browser-access\": \"true\",\n\t\t\t\t\t\"anthropic-beta\": `claude-code-20250219,oauth-2025-04-20,${betaFeatures.join(\",\")}`,\n\t\t\t\t\t\"user-agent\": `claude-cli/${claudeCodeVersion}`,\n\t\t\t\t\t\"x-app\": \"cli\",\n\t\t\t\t},\n\t\t\t\tmodel.headers,\n\t\t\t\toptionsHeaders,\n\t\t\t),\n\t\t});\n\n\t\treturn { client, isOAuthToken: true };\n\t}\n\n\t// API key auth\n\tconst client = new Anthropic({\n\t\tapiKey,\n\t\tbaseURL: model.baseUrl,\n\t\tdangerouslyAllowBrowser: true,\n\t\tdefaultHeaders: mergeHeaders(\n\t\t\t{\n\t\t\t\taccept: \"application/json\",\n\t\t\t\t\"anthropic-dangerous-direct-browser-access\": \"true\",\n\t\t\t\t\"anthropic-beta\": betaFeatures.join(\",\"),\n\t\t\t},\n\t\t\tmodel.headers,\n\t\t\toptionsHeaders,\n\t\t),\n\t});\n\n\treturn { client, isOAuthToken: false };\n}\n\nfunction buildParams(\n\tmodel: Model<\"anthropic-messages\">,\n\tcontext: Context,\n\tisOAuthToken: boolean,\n\toptions?: AnthropicOptions,\n): MessageCreateParamsStreaming {\n\tconst { cacheControl } = getCacheControl(model.baseUrl, options?.cacheRetention);\n\tconst params: MessageCreateParamsStreaming = {\n\t\tmodel: model.id,\n\t\tmessages: convertMessages(context.messages, model, isOAuthToken, cacheControl),\n\t\tmax_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,\n\t\tstream: true,\n\t};\n\n\t// For OAuth tokens, we MUST include Claude Code identity\n\tif (isOAuthToken) {\n\t\tparams.system = [\n\t\t\t{\n\t\t\t\ttype: \"text\",\n\t\t\t\ttext: \"You are Claude Code, Anthropic's official CLI for Claude.\",\n\t\t\t\t...(cacheControl ? { cache_control: cacheControl } : {}),\n\t\t\t},\n\t\t];\n\t\tif (context.systemPrompt) {\n\t\t\tparams.system.push({\n\t\t\t\ttype: \"text\",\n\t\t\t\ttext: sanitizeSurrogates(context.systemPrompt),\n\t\t\t\t...(cacheControl ? { cache_control: cacheControl } : {}),\n\t\t\t});\n\t\t}\n\t} else if (context.systemPrompt) {\n\t\t// Add cache control to system prompt for non-OAuth tokens\n\t\tparams.system = [\n\t\t\t{\n\t\t\t\ttype: \"text\",\n\t\t\t\ttext: sanitizeSurrogates(context.systemPrompt),\n\t\t\t\t...(cacheControl ? { cache_control: cacheControl } : {}),\n\t\t\t},\n\t\t];\n\t}\n\n\t// Temperature is incompatible with extended thinking (adaptive or budget-based).\n\tif (options?.temperature !== undefined && !options?.thinkingEnabled) {\n\t\tparams.temperature = options.temperature;\n\t}\n\n\tif (context.tools) {\n\t\tparams.tools = convertTools(context.tools, isOAuthToken, cacheControl);\n\t}\n\n\t// Configure thinking mode: adaptive (Opus 4.6+ and Sonnet 4.6),\n\t// budget-based (older models), or explicitly disabled.\n\tif (model.reasoning) {\n\t\tif (options?.thinkingEnabled) {\n\t\t\t// Default to \"summarized\" so Opus 4.7 and Mythos Preview behave like\n\t\t\t// older Claude 4 models (whose API default is also \"summarized\").\n\t\t\tconst display: AnthropicThinkingDisplay = options.thinkingDisplay ?? \"summarized\";\n\t\t\tif (supportsAdaptiveThinking(model.id)) {\n\t\t\t\t// Adaptive thinking: Claude decides when and how much to think.\n\t\t\t\tparams.thinking = { type: \"adaptive\", display };\n\t\t\t\tif (options.effort) {\n\t\t\t\t\t// The Anthropic SDK types can lag newly supported effort values such as \"xhigh\".\n\t\t\t\t\tparams.output_config =\n\t\t\t\t\t\toptions.effort === \"xhigh\"\n\t\t\t\t\t\t\t? ({ effort: options.effort } as unknown as NonNullable<\n\t\t\t\t\t\t\t\t\tMessageCreateParamsStreaming[\"output_config\"]\n\t\t\t\t\t\t\t\t>)\n\t\t\t\t\t\t\t: { effort: options.effort };\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\t// Budget-based thinking for older models\n\t\t\t\tparams.thinking = {\n\t\t\t\t\ttype: \"enabled\",\n\t\t\t\t\tbudget_tokens: options.thinkingBudgetTokens || 1024,\n\t\t\t\t\tdisplay,\n\t\t\t\t};\n\t\t\t}\n\t\t} else if (options?.thinkingEnabled === false) {\n\t\t\tparams.thinking = { type: \"disabled\" };\n\t\t}\n\t}\n\n\tif (options?.metadata) {\n\t\tconst userId = options.metadata.user_id;\n\t\tif (typeof userId === \"string\") {\n\t\t\tparams.metadata = { user_id: userId };\n\t\t}\n\t}\n\n\tif (options?.toolChoice) {\n\t\tif (typeof options.toolChoice === \"string\") {\n\t\t\tparams.tool_choice = { type: options.toolChoice };\n\t\t} else {\n\t\t\tparams.tool_choice = options.toolChoice;\n\t\t}\n\t}\n\n\treturn params;\n}\n\n// Normalize tool call IDs to match Anthropic's required pattern and length\nfunction normalizeToolCallId(id: string): string {\n\treturn id.replace(/[^a-zA-Z0-9_-]/g, \"_\").slice(0, 64);\n}\n\nfunction convertMessages(\n\tmessages: Message[],\n\tmodel: Model<\"anthropic-messages\">,\n\tisOAuthToken: boolean,\n\tcacheControl?: CacheControlEphemeral,\n): MessageParam[] {\n\tconst params: MessageParam[] = [];\n\n\t// Transform messages for cross-provider compatibility\n\tconst transformedMessages = transformMessages(messages, model, normalizeToolCallId);\n\n\tfor (let i = 0; i < transformedMessages.length; i++) {\n\t\tconst msg = transformedMessages[i];\n\n\t\tif (msg.role === \"user\") {\n\t\t\tif (typeof msg.content === \"string\") {\n\t\t\t\tif (msg.content.trim().length > 0) {\n\t\t\t\t\tparams.push({\n\t\t\t\t\t\trole: \"user\",\n\t\t\t\t\t\tcontent: sanitizeSurrogates(msg.content),\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\tconst blocks: ContentBlockParam[] = msg.content.map((item) => {\n\t\t\t\t\tif (item.type === \"text\") {\n\t\t\t\t\t\treturn {\n\t\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\t\ttext: sanitizeSurrogates(item.text),\n\t\t\t\t\t\t};\n\t\t\t\t\t} else {\n\t\t\t\t\t\treturn {\n\t\t\t\t\t\t\ttype: \"image\",\n\t\t\t\t\t\t\tsource: {\n\t\t\t\t\t\t\t\ttype: \"base64\",\n\t\t\t\t\t\t\t\tmedia_type: item.mimeType as \"image/jpeg\" | \"image/png\" | \"image/gif\" | \"image/webp\",\n\t\t\t\t\t\t\t\tdata: item.data,\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t};\n\t\t\t\t\t}\n\t\t\t\t});\n\t\t\t\tconst filteredBlocks = blocks.filter((b) => {\n\t\t\t\t\tif (b.type === \"text\") {\n\t\t\t\t\t\treturn b.text.trim().length > 0;\n\t\t\t\t\t}\n\t\t\t\t\treturn true;\n\t\t\t\t});\n\t\t\t\tif (filteredBlocks.length === 0) continue;\n\t\t\t\tparams.push({\n\t\t\t\t\trole: \"user\",\n\t\t\t\t\tcontent: filteredBlocks,\n\t\t\t\t});\n\t\t\t}\n\t\t} else if (msg.role === \"assistant\") {\n\t\t\tconst blocks: ContentBlockParam[] = [];\n\n\t\t\tfor (const block of msg.content) {\n\t\t\t\tif (block.type === \"text\") {\n\t\t\t\t\tif (block.text.trim().length === 0) continue;\n\t\t\t\t\tblocks.push({\n\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\ttext: sanitizeSurrogates(block.text),\n\t\t\t\t\t});\n\t\t\t\t} else if (block.type === \"thinking\") {\n\t\t\t\t\t// Redacted thinking: pass the opaque payload back as redacted_thinking\n\t\t\t\t\tif (block.redacted) {\n\t\t\t\t\t\tblocks.push({\n\t\t\t\t\t\t\ttype: \"redacted_thinking\",\n\t\t\t\t\t\t\tdata: block.thinkingSignature!,\n\t\t\t\t\t\t});\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\t\t\t\t\tif (block.thinking.trim().length === 0) continue;\n\t\t\t\t\t// If thinking signature is missing/empty (e.g., from aborted stream),\n\t\t\t\t\t// convert to plain text block without <thinking> tags to avoid API rejection\n\t\t\t\t\t// and prevent Claude from mimicking the tags in responses\n\t\t\t\t\tif (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {\n\t\t\t\t\t\tblocks.push({\n\t\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\t\ttext: sanitizeSurrogates(block.thinking),\n\t\t\t\t\t\t});\n\t\t\t\t\t} else {\n\t\t\t\t\t\tblocks.push({\n\t\t\t\t\t\t\ttype: \"thinking\",\n\t\t\t\t\t\t\tthinking: sanitizeSurrogates(block.thinking),\n\t\t\t\t\t\t\tsignature: block.thinkingSignature,\n\t\t\t\t\t\t});\n\t\t\t\t\t}\n\t\t\t\t} else if (block.type === \"toolCall\") {\n\t\t\t\t\tblocks.push({\n\t\t\t\t\t\ttype: \"tool_use\",\n\t\t\t\t\t\tid: block.id,\n\t\t\t\t\t\tname: isOAuthToken ? toClaudeCodeName(block.name) : block.name,\n\t\t\t\t\t\tinput: block.arguments ?? {},\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\t\t\tif (blocks.length === 0) continue;\n\t\t\tparams.push({\n\t\t\t\trole: \"assistant\",\n\t\t\t\tcontent: blocks,\n\t\t\t});\n\t\t} else if (msg.role === \"toolResult\") {\n\t\t\t// Collect all consecutive toolResult messages, needed for z.ai Anthropic endpoint\n\t\t\tconst toolResults: ContentBlockParam[] = [];\n\n\t\t\t// Add the current tool result\n\t\t\ttoolResults.push({\n\t\t\t\ttype: \"tool_result\",\n\t\t\t\ttool_use_id: msg.toolCallId,\n\t\t\t\tcontent: convertContentBlocks(msg.content),\n\t\t\t\tis_error: msg.isError,\n\t\t\t});\n\n\t\t\t// Look ahead for consecutive toolResult messages\n\t\t\tlet j = i + 1;\n\t\t\twhile (j < transformedMessages.length && transformedMessages[j].role === \"toolResult\") {\n\t\t\t\tconst nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult\n\t\t\t\ttoolResults.push({\n\t\t\t\t\ttype: \"tool_result\",\n\t\t\t\t\ttool_use_id: nextMsg.toolCallId,\n\t\t\t\t\tcontent: convertContentBlocks(nextMsg.content),\n\t\t\t\t\tis_error: nextMsg.isError,\n\t\t\t\t});\n\t\t\t\tj++;\n\t\t\t}\n\n\t\t\t// Skip the messages we've already processed\n\t\t\ti = j - 1;\n\n\t\t\t// Add a single user message with all tool results\n\t\t\tparams.push({\n\t\t\t\trole: \"user\",\n\t\t\t\tcontent: toolResults,\n\t\t\t});\n\t\t}\n\t}\n\n\t// Add cache_control to the last user message to cache conversation history\n\tif (cacheControl && params.length > 0) {\n\t\tconst lastMessage = params[params.length - 1];\n\t\tif (lastMessage.role === \"user\") {\n\t\t\tif (Array.isArray(lastMessage.content)) {\n\t\t\t\tconst lastBlock = lastMessage.content[lastMessage.content.length - 1];\n\t\t\t\tif (\n\t\t\t\t\tlastBlock &&\n\t\t\t\t\t(lastBlock.type === \"text\" || lastBlock.type === \"image\" || lastBlock.type === \"tool_result\")\n\t\t\t\t) {\n\t\t\t\t\t(lastBlock as any).cache_control = cacheControl;\n\t\t\t\t}\n\t\t\t} else if (typeof lastMessage.content === \"string\") {\n\t\t\t\tlastMessage.content = [\n\t\t\t\t\t{\n\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\ttext: lastMessage.content,\n\t\t\t\t\t\tcache_control: cacheControl,\n\t\t\t\t\t},\n\t\t\t\t] as any;\n\t\t\t}\n\t\t}\n\t}\n\n\treturn params;\n}\n\nfunction convertTools(\n\ttools: Tool[],\n\tisOAuthToken: boolean,\n\tcacheControl?: CacheControlEphemeral,\n): Anthropic.Messages.Tool[] {\n\tif (!tools) return [];\n\n\treturn tools.map((tool, index) => {\n\t\tconst schema = tool.parameters as { properties?: unknown; required?: string[] };\n\n\t\treturn {\n\t\t\tname: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name,\n\t\t\tdescription: tool.description,\n\t\t\tinput_schema: {\n\t\t\t\ttype: \"object\",\n\t\t\t\tproperties: schema.properties ?? {},\n\t\t\t\trequired: schema.required ?? [],\n\t\t\t},\n\t\t\t...(cacheControl && index === tools.length - 1 ? { cache_control: cacheControl } : {}),\n\t\t};\n\t});\n}\n\nfunction mapStopReason(reason: Anthropic.Messages.StopReason | string): StopReason {\n\tswitch (reason) {\n\t\tcase \"end_turn\":\n\t\t\treturn \"stop\";\n\t\tcase \"max_tokens\":\n\t\t\treturn \"length\";\n\t\tcase \"tool_use\":\n\t\t\treturn \"toolUse\";\n\t\tcase \"refusal\":\n\t\t\treturn \"error\";\n\t\tcase \"pause_turn\": // Stop is good enough -> resubmit\n\t\t\treturn \"stop\";\n\t\tcase \"stop_sequence\":\n\t\t\treturn \"stop\"; // We don't supply stop sequences, so this should never happen\n\t\tcase \"sensitive\": // Content flagged by safety filters (not yet in SDK types)\n\t\t\treturn \"error\";\n\t\tdefault:\n\t\t\t// Handle unknown stop reasons gracefully (API may add new values)\n\t\t\tthrow new Error(`Unhandled stop reason: ${reason}`);\n\t}\n}\n"]}
|
|
1
|
+
{"version":3,"file":"anthropic.d.ts","sourceRoot":"","sources":["../../src/providers/anthropic.ts"],"names":[],"mappings":"AAAA,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAU1C,OAAO,KAAK,EAQX,mBAAmB,EAEnB,cAAc,EACd,aAAa,EAMb,MAAM,aAAa,CAAC;AAkIrB,MAAM,MAAM,eAAe,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,OAAO,GAAG,KAAK,CAAC;AAE1E,MAAM,MAAM,wBAAwB,GAAG,YAAY,GAAG,SAAS,CAAC;AAEhE,MAAM,WAAW,gBAAiB,SAAQ,aAAa;IACtD;;;;OAIG;IACH,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B;;;;;;;;;OASG;IACH,MAAM,CAAC,EAAE,eAAe,CAAC;IACzB;;;;;;;;;;OAUG;IACH,eAAe,CAAC,EAAE,wBAAwB,CAAC;IAC3C,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,UAAU,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,MAAM,GAAG;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,EAAE,MAAM,CAAA;KAAE,CAAC;IACtE;;;;OAIG;IACH,MAAM,CAAC,EAAE,SAAS,CAAC;CACnB;AAsLD,eAAO,MAAM,eAAe,EAAE,cAAc,CAAC,oBAAoB,EAAE,gBAAgB,CA2PlF,CAAC;AA4CF,eAAO,MAAM,qBAAqB,EAAE,cAAc,CAAC,oBAAoB,EAAE,mBAAmB,CAuC3F,CAAC","sourcesContent":["import Anthropic from \"@anthropic-ai/sdk\";\nimport type {\n\tCacheControlEphemeral,\n\tContentBlockParam,\n\tMessageCreateParamsStreaming,\n\tMessageParam,\n\tRawMessageStreamEvent,\n} from \"@anthropic-ai/sdk/resources/messages.js\";\nimport { getEnvApiKey } from \"../env-api-keys.js\";\nimport { calculateCost } from \"../models.js\";\nimport type {\n\tApi,\n\tAssistantMessage,\n\tCacheRetention,\n\tContext,\n\tImageContent,\n\tMessage,\n\tModel,\n\tSimpleStreamOptions,\n\tStopReason,\n\tStreamFunction,\n\tStreamOptions,\n\tTextContent,\n\tThinkingContent,\n\tTool,\n\tToolCall,\n\tToolResultMessage,\n} from \"../types.js\";\nimport { AssistantMessageEventStream } from \"../utils/event-stream.js\";\nimport { headersToRecord } from \"../utils/headers.js\";\nimport { parseJsonWithRepair, parseStreamingJson } from \"../utils/json-parse.js\";\nimport { sanitizeSurrogates } from \"../utils/sanitize-unicode.js\";\n\nimport { buildCopilotDynamicHeaders, hasCopilotVisionInput } from \"./github-copilot-headers.js\";\nimport { adjustMaxTokensForThinking, buildBaseOptions } from \"./simple-options.js\";\nimport { transformMessages } from \"./transform-messages.js\";\n\n/**\n * Resolve cache retention preference.\n * Defaults to \"short\" and uses PI_CACHE_RETENTION for backward compatibility.\n */\nfunction resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {\n\tif (cacheRetention) {\n\t\treturn cacheRetention;\n\t}\n\tif (typeof process !== \"undefined\" && process.env.PI_CACHE_RETENTION === \"long\") {\n\t\treturn \"long\";\n\t}\n\treturn \"short\";\n}\n\nfunction getCacheControl(\n\tbaseUrl: string,\n\tcacheRetention?: CacheRetention,\n): { retention: CacheRetention; cacheControl?: CacheControlEphemeral } {\n\tconst retention = resolveCacheRetention(cacheRetention);\n\tif (retention === \"none\") {\n\t\treturn { retention };\n\t}\n\tconst ttl = retention === \"long\" && baseUrl.includes(\"api.anthropic.com\") ? \"1h\" : undefined;\n\treturn {\n\t\tretention,\n\t\tcacheControl: { type: \"ephemeral\", ...(ttl && { ttl }) },\n\t};\n}\n\n// Stealth mode: Mimic Claude Code's tool naming exactly\nconst claudeCodeVersion = \"2.1.75\";\n\n// Claude Code 2.x tool names (canonical casing)\n// Source: https://aery.dev/data/prompts-2.1.11.md\n// To update: https://github.com/eminent337/aery\nconst claudeCodeTools = [\n\t\"Read\",\n\t\"Write\",\n\t\"Edit\",\n\t\"Bash\",\n\t\"Grep\",\n\t\"Glob\",\n\t\"AskUserQuestion\",\n\t\"EnterPlanMode\",\n\t\"ExitPlanMode\",\n\t\"KillShell\",\n\t\"NotebookEdit\",\n\t\"Skill\",\n\t\"Task\",\n\t\"TaskOutput\",\n\t\"TodoWrite\",\n\t\"WebFetch\",\n\t\"WebSearch\",\n];\n\nconst ccToolLookup = new Map(claudeCodeTools.map((t) => [t.toLowerCase(), t]));\n\n// Convert tool name to CC canonical casing if it matches (case-insensitive)\nconst toClaudeCodeName = (name: string) => ccToolLookup.get(name.toLowerCase()) ?? name;\nconst fromClaudeCodeName = (name: string, tools?: Tool[]) => {\n\tif (tools && tools.length > 0) {\n\t\tconst lowerName = name.toLowerCase();\n\t\tconst matchedTool = tools.find((tool) => tool.name.toLowerCase() === lowerName);\n\t\tif (matchedTool) return matchedTool.name;\n\t}\n\treturn name;\n};\n\n/**\n * Convert content blocks to Anthropic API format\n */\nfunction convertContentBlocks(content: (TextContent | ImageContent)[]):\n\t| string\n\t| Array<\n\t\t\t| { type: \"text\"; text: string }\n\t\t\t| {\n\t\t\t\t\ttype: \"image\";\n\t\t\t\t\tsource: {\n\t\t\t\t\t\ttype: \"base64\";\n\t\t\t\t\t\tmedia_type: \"image/jpeg\" | \"image/png\" | \"image/gif\" | \"image/webp\";\n\t\t\t\t\t\tdata: string;\n\t\t\t\t\t};\n\t\t\t }\n\t > {\n\t// If only text blocks, return as concatenated string for simplicity\n\tconst hasImages = content.some((c) => c.type === \"image\");\n\tif (!hasImages) {\n\t\treturn sanitizeSurrogates(content.map((c) => (c as TextContent).text).join(\"\\n\"));\n\t}\n\n\t// If we have images, convert to content block array\n\tconst blocks = content.map((block) => {\n\t\tif (block.type === \"text\") {\n\t\t\treturn {\n\t\t\t\ttype: \"text\" as const,\n\t\t\t\ttext: sanitizeSurrogates(block.text),\n\t\t\t};\n\t\t}\n\t\treturn {\n\t\t\ttype: \"image\" as const,\n\t\t\tsource: {\n\t\t\t\ttype: \"base64\" as const,\n\t\t\t\tmedia_type: block.mimeType as \"image/jpeg\" | \"image/png\" | \"image/gif\" | \"image/webp\",\n\t\t\t\tdata: block.data,\n\t\t\t},\n\t\t};\n\t});\n\n\t// If only images (no text), add placeholder text block\n\tconst hasText = blocks.some((b) => b.type === \"text\");\n\tif (!hasText) {\n\t\tblocks.unshift({\n\t\t\ttype: \"text\" as const,\n\t\t\ttext: \"(see attached image)\",\n\t\t});\n\t}\n\n\treturn blocks;\n}\n\nexport type AnthropicEffort = \"low\" | \"medium\" | \"high\" | \"xhigh\" | \"max\";\n\nexport type AnthropicThinkingDisplay = \"summarized\" | \"omitted\";\n\nexport interface AnthropicOptions extends StreamOptions {\n\t/**\n\t * Enable extended thinking.\n\t * For Opus 4.6 and Sonnet 4.6: uses adaptive thinking (model decides when/how much to think).\n\t * For older models: uses budget-based thinking with thinkingBudgetTokens.\n\t */\n\tthinkingEnabled?: boolean;\n\t/**\n\t * Token budget for extended thinking (older models only).\n\t * Ignored for Opus 4.6 and Sonnet 4.6, which use adaptive thinking.\n\t */\n\tthinkingBudgetTokens?: number;\n\t/**\n\t * Effort level for adaptive thinking (Opus 4.6+ and Sonnet 4.6).\n\t * Controls how much thinking Claude allocates:\n\t * - \"max\": Always thinks with no constraints (Opus 4.6 only)\n\t * - \"xhigh\": Highest reasoning level (Opus 4.7)\n\t * - \"high\": Always thinks, deep reasoning (default)\n\t * - \"medium\": Moderate thinking, may skip for simple queries\n\t * - \"low\": Minimal thinking, skips for simple tasks\n\t * Ignored for older models.\n\t */\n\teffort?: AnthropicEffort;\n\t/**\n\t * Controls how thinking content is returned in API responses.\n\t * - \"summarized\": Thinking blocks contain summarized thinking text (default here).\n\t * - \"omitted\": Thinking blocks return an empty thinking field; the encrypted\n\t * signature still travels back for multi-turn continuity. Use for faster\n\t * time-to-first-text-token when your UI does not surface thinking.\n\t *\n\t * Note: Anthropic's API default for Claude Opus 4.7 and Claude Mythos Preview\n\t * is \"omitted\". We default to \"summarized\" here to keep behavior consistent\n\t * with older Claude 4 models. Set this explicitly to \"omitted\" to opt in.\n\t */\n\tthinkingDisplay?: AnthropicThinkingDisplay;\n\tinterleavedThinking?: boolean;\n\ttoolChoice?: \"auto\" | \"any\" | \"none\" | { type: \"tool\"; name: string };\n\t/**\n\t * Pre-built Anthropic client instance. When provided, skips internal client\n\t * construction entirely. Use this to inject alternative SDK clients such as\n\t * `AnthropicVertex` that shares the same messaging API.\n\t */\n\tclient?: Anthropic;\n}\n\nfunction mergeHeaders(...headerSources: (Record<string, string> | undefined)[]): Record<string, string> {\n\tconst merged: Record<string, string> = {};\n\tfor (const headers of headerSources) {\n\t\tif (headers) {\n\t\t\tObject.assign(merged, headers);\n\t\t}\n\t}\n\treturn merged;\n}\n\ninterface ServerSentEvent {\n\tevent: string | null;\n\tdata: string;\n\traw: string[];\n}\n\ninterface SseDecoderState {\n\tevent: string | null;\n\tdata: string[];\n\traw: string[];\n}\n\nfunction flushSseEvent(state: SseDecoderState): ServerSentEvent | null {\n\tif (!state.event && state.data.length === 0) {\n\t\treturn null;\n\t}\n\n\tconst event: ServerSentEvent = {\n\t\tevent: state.event,\n\t\tdata: state.data.join(\"\\n\"),\n\t\traw: [...state.raw],\n\t};\n\tstate.event = null;\n\tstate.data = [];\n\tstate.raw = [];\n\treturn event;\n}\n\nfunction decodeSseLine(line: string, state: SseDecoderState): ServerSentEvent | null {\n\tif (line === \"\") {\n\t\treturn flushSseEvent(state);\n\t}\n\n\tstate.raw.push(line);\n\tif (line.startsWith(\":\")) {\n\t\treturn null;\n\t}\n\n\tconst delimiterIndex = line.indexOf(\":\");\n\tconst fieldName = delimiterIndex === -1 ? line : line.slice(0, delimiterIndex);\n\tlet value = delimiterIndex === -1 ? \"\" : line.slice(delimiterIndex + 1);\n\tif (value.startsWith(\" \")) {\n\t\tvalue = value.slice(1);\n\t}\n\n\tif (fieldName === \"event\") {\n\t\tstate.event = value;\n\t} else if (fieldName === \"data\") {\n\t\tstate.data.push(value);\n\t}\n\n\treturn null;\n}\n\nfunction nextLineBreakIndex(text: string): number {\n\tconst carriageReturnIndex = text.indexOf(\"\\r\");\n\tconst newlineIndex = text.indexOf(\"\\n\");\n\tif (carriageReturnIndex === -1) {\n\t\treturn newlineIndex;\n\t}\n\tif (newlineIndex === -1) {\n\t\treturn carriageReturnIndex;\n\t}\n\treturn Math.min(carriageReturnIndex, newlineIndex);\n}\n\nfunction consumeLine(text: string): { line: string; rest: string } | null {\n\tconst lineBreakIndex = nextLineBreakIndex(text);\n\tif (lineBreakIndex === -1) {\n\t\treturn null;\n\t}\n\n\tlet nextIndex = lineBreakIndex + 1;\n\tif (text[lineBreakIndex] === \"\\r\" && text[nextIndex] === \"\\n\") {\n\t\tnextIndex += 1;\n\t}\n\n\treturn {\n\t\tline: text.slice(0, lineBreakIndex),\n\t\trest: text.slice(nextIndex),\n\t};\n}\n\nasync function* iterateSseMessages(\n\tbody: ReadableStream<Uint8Array>,\n\tsignal?: AbortSignal,\n): AsyncGenerator<ServerSentEvent> {\n\tconst reader = body.getReader();\n\tconst decoder = new TextDecoder();\n\tconst state: SseDecoderState = { event: null, data: [], raw: [] };\n\tlet buffer = \"\";\n\n\ttry {\n\t\twhile (true) {\n\t\t\tif (signal?.aborted) {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\n\t\t\tconst { value, done } = await reader.read();\n\t\t\tif (done) {\n\t\t\t\tbreak;\n\t\t\t}\n\n\t\t\tbuffer += decoder.decode(value, { stream: true });\n\t\t\tlet consumed = consumeLine(buffer);\n\t\t\twhile (consumed) {\n\t\t\t\tbuffer = consumed.rest;\n\t\t\t\tconst event = decodeSseLine(consumed.line, state);\n\t\t\t\tif (event) {\n\t\t\t\t\tyield event;\n\t\t\t\t}\n\t\t\t\tconsumed = consumeLine(buffer);\n\t\t\t}\n\t\t}\n\n\t\tbuffer += decoder.decode();\n\t\tlet consumed = consumeLine(buffer);\n\t\twhile (consumed) {\n\t\t\tbuffer = consumed.rest;\n\t\t\tconst event = decodeSseLine(consumed.line, state);\n\t\t\tif (event) {\n\t\t\t\tyield event;\n\t\t\t}\n\t\t\tconsumed = consumeLine(buffer);\n\t\t}\n\n\t\tif (buffer.length > 0) {\n\t\t\tconst event = decodeSseLine(buffer, state);\n\t\t\tif (event) {\n\t\t\t\tyield event;\n\t\t\t}\n\t\t}\n\n\t\tconst trailingEvent = flushSseEvent(state);\n\t\tif (trailingEvent) {\n\t\t\tyield trailingEvent;\n\t\t}\n\t} finally {\n\t\treader.releaseLock();\n\t}\n}\n\nasync function* iterateAnthropicEvents(\n\tresponse: Response,\n\tsignal?: AbortSignal,\n): AsyncGenerator<RawMessageStreamEvent> {\n\tif (!response.body) {\n\t\tthrow new Error(\"Attempted to iterate over an Anthropic response with no body\");\n\t}\n\n\tfor await (const sse of iterateSseMessages(response.body, signal)) {\n\t\tif (!sse.event || sse.event === \"ping\") {\n\t\t\tcontinue;\n\t\t}\n\n\t\tif (sse.event === \"error\") {\n\t\t\tthrow new Error(sse.data);\n\t\t}\n\n\t\ttry {\n\t\t\tyield parseJsonWithRepair<RawMessageStreamEvent>(sse.data);\n\t\t} catch (error) {\n\t\t\tconst message = error instanceof Error ? error.message : String(error);\n\t\t\tthrow new Error(\n\t\t\t\t`Could not parse Anthropic SSE event ${sse.event}: ${message}; data=${sse.data}; raw=${sse.raw.join(\"\\\\n\")}`,\n\t\t\t);\n\t\t}\n\t}\n}\n\nexport const streamAnthropic: StreamFunction<\"anthropic-messages\", AnthropicOptions> = (\n\tmodel: Model<\"anthropic-messages\">,\n\tcontext: Context,\n\toptions?: AnthropicOptions,\n): AssistantMessageEventStream => {\n\tconst stream = new AssistantMessageEventStream();\n\n\t(async () => {\n\t\tconst output: AssistantMessage = {\n\t\t\trole: \"assistant\",\n\t\t\tcontent: [],\n\t\t\tapi: model.api as Api,\n\t\t\tprovider: model.provider,\n\t\t\tmodel: model.id,\n\t\t\tusage: {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t},\n\t\t\tstopReason: \"stop\",\n\t\t\ttimestamp: Date.now(),\n\t\t};\n\n\t\ttry {\n\t\t\tlet client: Anthropic;\n\t\t\tlet isOAuth: boolean;\n\n\t\t\tif (options?.client) {\n\t\t\t\tclient = options.client;\n\t\t\t\tisOAuth = false;\n\t\t\t} else {\n\t\t\t\tconst apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? \"\";\n\n\t\t\t\tlet copilotDynamicHeaders: Record<string, string> | undefined;\n\t\t\t\tif (model.provider === \"github-copilot\") {\n\t\t\t\t\tconst hasImages = hasCopilotVisionInput(context.messages);\n\t\t\t\t\tcopilotDynamicHeaders = buildCopilotDynamicHeaders({\n\t\t\t\t\t\tmessages: context.messages,\n\t\t\t\t\t\thasImages,\n\t\t\t\t\t});\n\t\t\t\t}\n\n\t\t\t\tconst created = createClient(\n\t\t\t\t\tmodel,\n\t\t\t\t\tapiKey,\n\t\t\t\t\toptions?.interleavedThinking ?? true,\n\t\t\t\t\toptions?.headers,\n\t\t\t\t\tcopilotDynamicHeaders,\n\t\t\t\t);\n\t\t\t\tclient = created.client;\n\t\t\t\tisOAuth = created.isOAuthToken;\n\t\t\t}\n\t\t\tlet params = buildParams(model, context, isOAuth, options);\n\t\t\tconst nextParams = await options?.onPayload?.(params, model);\n\t\t\tif (nextParams !== undefined) {\n\t\t\t\tparams = nextParams as MessageCreateParamsStreaming;\n\t\t\t}\n\t\t\tconst response = await client.messages\n\t\t\t\t.create({ ...params, stream: true }, { signal: options?.signal })\n\t\t\t\t.asResponse();\n\t\t\tawait options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);\n\t\t\tstream.push({ type: \"start\", partial: output });\n\n\t\t\ttype Block = (ThinkingContent | TextContent | (ToolCall & { partialJson: string })) & { index: number };\n\t\t\tconst blocks = output.content as Block[];\n\n\t\t\tfor await (const event of iterateAnthropicEvents(response, options?.signal)) {\n\t\t\t\tif (event.type === \"message_start\") {\n\t\t\t\t\toutput.responseId = event.message.id;\n\t\t\t\t\t// Capture initial token usage from message_start event\n\t\t\t\t\t// This ensures we have input token counts even if the stream is aborted early\n\t\t\t\t\toutput.usage.input = event.message.usage.input_tokens || 0;\n\t\t\t\t\toutput.usage.output = event.message.usage.output_tokens || 0;\n\t\t\t\t\toutput.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;\n\t\t\t\t\toutput.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;\n\t\t\t\t\t// Anthropic doesn't provide total_tokens, compute from components\n\t\t\t\t\toutput.usage.totalTokens =\n\t\t\t\t\t\toutput.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;\n\t\t\t\t\tcalculateCost(model, output.usage);\n\t\t\t\t} else if (event.type === \"content_block_start\") {\n\t\t\t\t\tif (event.content_block.type === \"text\") {\n\t\t\t\t\t\tconst block: Block = {\n\t\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\t\ttext: \"\",\n\t\t\t\t\t\t\tindex: event.index,\n\t\t\t\t\t\t};\n\t\t\t\t\t\toutput.content.push(block);\n\t\t\t\t\t\tstream.push({ type: \"text_start\", contentIndex: output.content.length - 1, partial: output });\n\t\t\t\t\t} else if (event.content_block.type === \"thinking\") {\n\t\t\t\t\t\tconst block: Block = {\n\t\t\t\t\t\t\ttype: \"thinking\",\n\t\t\t\t\t\t\tthinking: \"\",\n\t\t\t\t\t\t\tthinkingSignature: \"\",\n\t\t\t\t\t\t\tindex: event.index,\n\t\t\t\t\t\t};\n\t\t\t\t\t\toutput.content.push(block);\n\t\t\t\t\t\tstream.push({ type: \"thinking_start\", contentIndex: output.content.length - 1, partial: output });\n\t\t\t\t\t} else if (event.content_block.type === \"redacted_thinking\") {\n\t\t\t\t\t\tconst block: Block = {\n\t\t\t\t\t\t\ttype: \"thinking\",\n\t\t\t\t\t\t\tthinking: \"[Reasoning redacted]\",\n\t\t\t\t\t\t\tthinkingSignature: event.content_block.data,\n\t\t\t\t\t\t\tredacted: true,\n\t\t\t\t\t\t\tindex: event.index,\n\t\t\t\t\t\t};\n\t\t\t\t\t\toutput.content.push(block);\n\t\t\t\t\t\tstream.push({ type: \"thinking_start\", contentIndex: output.content.length - 1, partial: output });\n\t\t\t\t\t} else if (event.content_block.type === \"tool_use\") {\n\t\t\t\t\t\tconst block: Block = {\n\t\t\t\t\t\t\ttype: \"toolCall\",\n\t\t\t\t\t\t\tid: event.content_block.id,\n\t\t\t\t\t\t\tname: isOAuth\n\t\t\t\t\t\t\t\t? fromClaudeCodeName(event.content_block.name, context.tools)\n\t\t\t\t\t\t\t\t: event.content_block.name,\n\t\t\t\t\t\t\targuments: (event.content_block.input as Record<string, any>) ?? {},\n\t\t\t\t\t\t\tpartialJson: \"\",\n\t\t\t\t\t\t\tindex: event.index,\n\t\t\t\t\t\t};\n\t\t\t\t\t\toutput.content.push(block);\n\t\t\t\t\t\tstream.push({ type: \"toolcall_start\", contentIndex: output.content.length - 1, partial: output });\n\t\t\t\t\t}\n\t\t\t\t} else if (event.type === \"content_block_delta\") {\n\t\t\t\t\tif (event.delta.type === \"text_delta\") {\n\t\t\t\t\t\tconst index = blocks.findIndex((b) => b.index === event.index);\n\t\t\t\t\t\tconst block = blocks[index];\n\t\t\t\t\t\tif (block && block.type === \"text\") {\n\t\t\t\t\t\t\tblock.text += event.delta.text;\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"text_delta\",\n\t\t\t\t\t\t\t\tcontentIndex: index,\n\t\t\t\t\t\t\t\tdelta: event.delta.text,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t} else if (event.delta.type === \"thinking_delta\") {\n\t\t\t\t\t\tconst index = blocks.findIndex((b) => b.index === event.index);\n\t\t\t\t\t\tconst block = blocks[index];\n\t\t\t\t\t\tif (block && block.type === \"thinking\") {\n\t\t\t\t\t\t\tblock.thinking += event.delta.thinking;\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"thinking_delta\",\n\t\t\t\t\t\t\t\tcontentIndex: index,\n\t\t\t\t\t\t\t\tdelta: event.delta.thinking,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t} else if (event.delta.type === \"input_json_delta\") {\n\t\t\t\t\t\tconst index = blocks.findIndex((b) => b.index === event.index);\n\t\t\t\t\t\tconst block = blocks[index];\n\t\t\t\t\t\tif (block && block.type === \"toolCall\") {\n\t\t\t\t\t\t\tblock.partialJson += event.delta.partial_json;\n\t\t\t\t\t\t\tblock.arguments = parseStreamingJson(block.partialJson);\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"toolcall_delta\",\n\t\t\t\t\t\t\t\tcontentIndex: index,\n\t\t\t\t\t\t\t\tdelta: event.delta.partial_json,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t} else if (event.delta.type === \"signature_delta\") {\n\t\t\t\t\t\tconst index = blocks.findIndex((b) => b.index === event.index);\n\t\t\t\t\t\tconst block = blocks[index];\n\t\t\t\t\t\tif (block && block.type === \"thinking\") {\n\t\t\t\t\t\t\tblock.thinkingSignature = block.thinkingSignature || \"\";\n\t\t\t\t\t\t\tblock.thinkingSignature += event.delta.signature;\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t} else if (event.type === \"content_block_stop\") {\n\t\t\t\t\tconst index = blocks.findIndex((b) => b.index === event.index);\n\t\t\t\t\tconst block = blocks[index];\n\t\t\t\t\tif (block) {\n\t\t\t\t\t\tdelete (block as any).index;\n\t\t\t\t\t\tif (block.type === \"text\") {\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"text_end\",\n\t\t\t\t\t\t\t\tcontentIndex: index,\n\t\t\t\t\t\t\t\tcontent: block.text,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t} else if (block.type === \"thinking\") {\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"thinking_end\",\n\t\t\t\t\t\t\t\tcontentIndex: index,\n\t\t\t\t\t\t\t\tcontent: block.thinking,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t} else if (block.type === \"toolCall\") {\n\t\t\t\t\t\t\tblock.arguments = parseStreamingJson(block.partialJson);\n\t\t\t\t\t\t\t// Finalize in-place and strip the scratch buffer so replay only\n\t\t\t\t\t\t\t// carries parsed arguments.\n\t\t\t\t\t\t\tdelete (block as { partialJson?: string }).partialJson;\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"toolcall_end\",\n\t\t\t\t\t\t\t\tcontentIndex: index,\n\t\t\t\t\t\t\t\ttoolCall: block,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t} else if (event.type === \"message_delta\") {\n\t\t\t\t\tif (event.delta.stop_reason) {\n\t\t\t\t\t\toutput.stopReason = mapStopReason(event.delta.stop_reason);\n\t\t\t\t\t}\n\t\t\t\t\t// Only update usage fields if present (not null).\n\t\t\t\t\t// Preserves input_tokens from message_start when proxies omit it in message_delta.\n\t\t\t\t\tif (event.usage.input_tokens != null) {\n\t\t\t\t\t\toutput.usage.input = event.usage.input_tokens;\n\t\t\t\t\t}\n\t\t\t\t\tif (event.usage.output_tokens != null) {\n\t\t\t\t\t\toutput.usage.output = event.usage.output_tokens;\n\t\t\t\t\t}\n\t\t\t\t\tif (event.usage.cache_read_input_tokens != null) {\n\t\t\t\t\t\toutput.usage.cacheRead = event.usage.cache_read_input_tokens;\n\t\t\t\t\t}\n\t\t\t\t\tif (event.usage.cache_creation_input_tokens != null) {\n\t\t\t\t\t\toutput.usage.cacheWrite = event.usage.cache_creation_input_tokens;\n\t\t\t\t\t}\n\t\t\t\t\t// Anthropic doesn't provide total_tokens, compute from components\n\t\t\t\t\toutput.usage.totalTokens =\n\t\t\t\t\t\toutput.usage.input + output.usage.output + output.usage.cacheRead + output.usage.cacheWrite;\n\t\t\t\t\tcalculateCost(model, output.usage);\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (options?.signal?.aborted) {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\n\t\t\tif (output.stopReason === \"aborted\" || output.stopReason === \"error\") {\n\t\t\t\tthrow new Error(\"An unknown error occurred\");\n\t\t\t}\n\n\t\t\tstream.push({ type: \"done\", reason: output.stopReason, message: output });\n\t\t\tstream.end();\n\t\t} catch (error) {\n\t\t\tfor (const block of output.content) {\n\t\t\t\tdelete (block as { index?: number }).index;\n\t\t\t\t// partialJson is only a streaming scratch buffer; never persist it.\n\t\t\t\tdelete (block as { partialJson?: string }).partialJson;\n\t\t\t}\n\t\t\toutput.stopReason = options?.signal?.aborted ? \"aborted\" : \"error\";\n\t\t\toutput.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);\n\t\t\tstream.push({ type: \"error\", reason: output.stopReason, error: output });\n\t\t\tstream.end();\n\t\t}\n\t})();\n\n\treturn stream;\n};\n\n/**\n * Check if a model supports adaptive thinking (Opus 4.6+, Sonnet 4.6)\n */\nfunction supportsAdaptiveThinking(modelId: string): boolean {\n\t// Adaptive-thinking model IDs (with or without date suffix)\n\treturn (\n\t\tmodelId.includes(\"opus-4-6\") ||\n\t\tmodelId.includes(\"opus-4.6\") ||\n\t\tmodelId.includes(\"opus-4-7\") ||\n\t\tmodelId.includes(\"opus-4.7\") ||\n\t\tmodelId.includes(\"sonnet-4-6\") ||\n\t\tmodelId.includes(\"sonnet-4.6\")\n\t);\n}\n\n/**\n * Map ThinkingLevel to Anthropic effort levels for adaptive thinking.\n * Note: effort \"max\" is only valid on Opus 4.6, while Opus 4.7 supports \"xhigh\".\n */\nfunction mapThinkingLevelToEffort(level: SimpleStreamOptions[\"reasoning\"], modelId: string): AnthropicEffort {\n\tswitch (level) {\n\t\tcase \"minimal\":\n\t\t\treturn \"low\";\n\t\tcase \"low\":\n\t\t\treturn \"low\";\n\t\tcase \"medium\":\n\t\t\treturn \"medium\";\n\t\tcase \"high\":\n\t\t\treturn \"high\";\n\t\tcase \"xhigh\":\n\t\t\tif (modelId.includes(\"opus-4-6\") || modelId.includes(\"opus-4.6\")) {\n\t\t\t\treturn \"max\";\n\t\t\t}\n\t\t\tif (modelId.includes(\"opus-4-7\") || modelId.includes(\"opus-4.7\")) {\n\t\t\t\treturn \"xhigh\";\n\t\t\t}\n\t\t\treturn \"high\";\n\t\tdefault:\n\t\t\treturn \"high\";\n\t}\n}\n\nexport const streamSimpleAnthropic: StreamFunction<\"anthropic-messages\", SimpleStreamOptions> = (\n\tmodel: Model<\"anthropic-messages\">,\n\tcontext: Context,\n\toptions?: SimpleStreamOptions,\n): AssistantMessageEventStream => {\n\tconst apiKey = options?.apiKey || getEnvApiKey(model.provider);\n\tif (!apiKey) {\n\t\tthrow new Error(`No API key for provider: ${model.provider}`);\n\t}\n\n\tconst base = buildBaseOptions(model, options, apiKey);\n\tif (!options?.reasoning) {\n\t\treturn streamAnthropic(model, context, { ...base, thinkingEnabled: false } satisfies AnthropicOptions);\n\t}\n\n\t// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level\n\t// For older models: use budget-based thinking\n\tif (supportsAdaptiveThinking(model.id)) {\n\t\tconst effort = mapThinkingLevelToEffort(options.reasoning, model.id);\n\t\treturn streamAnthropic(model, context, {\n\t\t\t...base,\n\t\t\tthinkingEnabled: true,\n\t\t\teffort,\n\t\t} satisfies AnthropicOptions);\n\t}\n\n\tconst adjusted = adjustMaxTokensForThinking(\n\t\tbase.maxTokens || 0,\n\t\tmodel.maxTokens,\n\t\toptions.reasoning,\n\t\toptions.thinkingBudgets,\n\t);\n\n\treturn streamAnthropic(model, context, {\n\t\t...base,\n\t\tmaxTokens: adjusted.maxTokens,\n\t\tthinkingEnabled: true,\n\t\tthinkingBudgetTokens: adjusted.thinkingBudget,\n\t} satisfies AnthropicOptions);\n};\n\nfunction isOAuthToken(apiKey: string): boolean {\n\treturn apiKey.includes(\"sk-ant-oat\");\n}\n\nfunction createClient(\n\tmodel: Model<\"anthropic-messages\">,\n\tapiKey: string,\n\tinterleavedThinking: boolean,\n\toptionsHeaders?: Record<string, string>,\n\tdynamicHeaders?: Record<string, string>,\n): { client: Anthropic; isOAuthToken: boolean } {\n\t// Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in.\n\t// The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it.\n\tconst needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model.id);\n\n\t// Copilot: Bearer auth, selective betas (no fine-grained-tool-streaming)\n\tif (model.provider === \"github-copilot\") {\n\t\tconst betaFeatures: string[] = [];\n\t\tif (needsInterleavedBeta) {\n\t\t\tbetaFeatures.push(\"interleaved-thinking-2025-05-14\");\n\t\t}\n\n\t\tconst client = new Anthropic({\n\t\t\tapiKey: null,\n\t\t\tauthToken: apiKey,\n\t\t\tbaseURL: model.baseUrl,\n\t\t\tdangerouslyAllowBrowser: true,\n\t\t\tdefaultHeaders: mergeHeaders(\n\t\t\t\t{\n\t\t\t\t\taccept: \"application/json\",\n\t\t\t\t\t\"anthropic-dangerous-direct-browser-access\": \"true\",\n\t\t\t\t\t...(betaFeatures.length > 0 ? { \"anthropic-beta\": betaFeatures.join(\",\") } : {}),\n\t\t\t\t},\n\t\t\t\tmodel.headers,\n\t\t\t\tdynamicHeaders,\n\t\t\t\toptionsHeaders,\n\t\t\t),\n\t\t});\n\n\t\treturn { client, isOAuthToken: false };\n\t}\n\n\tconst betaFeatures: string[] = [];\n\tif (needsInterleavedBeta) {\n\t\tbetaFeatures.push(\"interleaved-thinking-2025-05-14\");\n\t}\n\n\t// OAuth: Bearer auth, Claude Code identity headers\n\tif (isOAuthToken(apiKey)) {\n\t\tconst client = new Anthropic({\n\t\t\tapiKey: null,\n\t\t\tauthToken: apiKey,\n\t\t\tbaseURL: model.baseUrl,\n\t\t\tdangerouslyAllowBrowser: true,\n\t\t\tdefaultHeaders: mergeHeaders(\n\t\t\t\t{\n\t\t\t\t\taccept: \"application/json\",\n\t\t\t\t\t\"anthropic-dangerous-direct-browser-access\": \"true\",\n\t\t\t\t\t\"anthropic-beta\": [\"claude-code-20250219\", \"oauth-2025-04-20\", ...betaFeatures].join(\",\"),\n\t\t\t\t\t\"user-agent\": `claude-cli/${claudeCodeVersion}`,\n\t\t\t\t\t\"x-app\": \"cli\",\n\t\t\t\t},\n\t\t\t\tmodel.headers,\n\t\t\t\toptionsHeaders,\n\t\t\t),\n\t\t});\n\n\t\treturn { client, isOAuthToken: true };\n\t}\n\n\t// API key auth\n\tconst client = new Anthropic({\n\t\tapiKey,\n\t\tbaseURL: model.baseUrl,\n\t\tdangerouslyAllowBrowser: true,\n\t\tdefaultHeaders: mergeHeaders(\n\t\t\t{\n\t\t\t\taccept: \"application/json\",\n\t\t\t\t\"anthropic-dangerous-direct-browser-access\": \"true\",\n\t\t\t\t...(betaFeatures.length > 0 ? { \"anthropic-beta\": betaFeatures.join(\",\") } : {}),\n\t\t\t},\n\t\t\tmodel.headers,\n\t\t\toptionsHeaders,\n\t\t),\n\t});\n\n\treturn { client, isOAuthToken: false };\n}\n\nfunction buildParams(\n\tmodel: Model<\"anthropic-messages\">,\n\tcontext: Context,\n\tisOAuthToken: boolean,\n\toptions?: AnthropicOptions,\n): MessageCreateParamsStreaming {\n\tconst { cacheControl } = getCacheControl(model.baseUrl, options?.cacheRetention);\n\tconst params: MessageCreateParamsStreaming = {\n\t\tmodel: model.id,\n\t\tmessages: convertMessages(context.messages, model, isOAuthToken, cacheControl),\n\t\tmax_tokens: options?.maxTokens || (model.maxTokens / 3) | 0,\n\t\tstream: true,\n\t};\n\n\t// For OAuth tokens, we MUST include Claude Code identity\n\tif (isOAuthToken) {\n\t\tparams.system = [\n\t\t\t{\n\t\t\t\ttype: \"text\",\n\t\t\t\ttext: \"You are Claude Code, Anthropic's official CLI for Claude.\",\n\t\t\t\t...(cacheControl ? { cache_control: cacheControl } : {}),\n\t\t\t},\n\t\t];\n\t\tif (context.systemPrompt) {\n\t\t\tparams.system.push({\n\t\t\t\ttype: \"text\",\n\t\t\t\ttext: sanitizeSurrogates(context.systemPrompt),\n\t\t\t\t...(cacheControl ? { cache_control: cacheControl } : {}),\n\t\t\t});\n\t\t}\n\t} else if (context.systemPrompt) {\n\t\t// Add cache control to system prompt for non-OAuth tokens\n\t\tparams.system = [\n\t\t\t{\n\t\t\t\ttype: \"text\",\n\t\t\t\ttext: sanitizeSurrogates(context.systemPrompt),\n\t\t\t\t...(cacheControl ? { cache_control: cacheControl } : {}),\n\t\t\t},\n\t\t];\n\t}\n\n\t// Temperature is incompatible with extended thinking (adaptive or budget-based).\n\tif (options?.temperature !== undefined && !options?.thinkingEnabled) {\n\t\tparams.temperature = options.temperature;\n\t}\n\n\tif (context.tools) {\n\t\tparams.tools = convertTools(context.tools, isOAuthToken, cacheControl);\n\t}\n\n\t// Configure thinking mode: adaptive (Opus 4.6+ and Sonnet 4.6),\n\t// budget-based (older models), or explicitly disabled.\n\tif (model.reasoning) {\n\t\tif (options?.thinkingEnabled) {\n\t\t\t// Default to \"summarized\" so Opus 4.7 and Mythos Preview behave like\n\t\t\t// older Claude 4 models (whose API default is also \"summarized\").\n\t\t\tconst display: AnthropicThinkingDisplay = options.thinkingDisplay ?? \"summarized\";\n\t\t\tif (supportsAdaptiveThinking(model.id)) {\n\t\t\t\t// Adaptive thinking: Claude decides when and how much to think.\n\t\t\t\tparams.thinking = { type: \"adaptive\", display };\n\t\t\t\tif (options.effort) {\n\t\t\t\t\t// The Anthropic SDK types can lag newly supported effort values such as \"xhigh\".\n\t\t\t\t\tparams.output_config =\n\t\t\t\t\t\toptions.effort === \"xhigh\"\n\t\t\t\t\t\t\t? ({ effort: options.effort } as unknown as NonNullable<\n\t\t\t\t\t\t\t\t\tMessageCreateParamsStreaming[\"output_config\"]\n\t\t\t\t\t\t\t\t>)\n\t\t\t\t\t\t\t: { effort: options.effort };\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\t// Budget-based thinking for older models\n\t\t\t\tparams.thinking = {\n\t\t\t\t\ttype: \"enabled\",\n\t\t\t\t\tbudget_tokens: options.thinkingBudgetTokens || 1024,\n\t\t\t\t\tdisplay,\n\t\t\t\t};\n\t\t\t}\n\t\t} else if (options?.thinkingEnabled === false) {\n\t\t\tparams.thinking = { type: \"disabled\" };\n\t\t}\n\t}\n\n\tif (options?.metadata) {\n\t\tconst userId = options.metadata.user_id;\n\t\tif (typeof userId === \"string\") {\n\t\t\tparams.metadata = { user_id: userId };\n\t\t}\n\t}\n\n\tif (options?.toolChoice) {\n\t\tif (typeof options.toolChoice === \"string\") {\n\t\t\tparams.tool_choice = { type: options.toolChoice };\n\t\t} else {\n\t\t\tparams.tool_choice = options.toolChoice;\n\t\t}\n\t}\n\n\treturn params;\n}\n\n// Normalize tool call IDs to match Anthropic's required pattern and length\nfunction normalizeToolCallId(id: string): string {\n\treturn id.replace(/[^a-zA-Z0-9_-]/g, \"_\").slice(0, 64);\n}\n\nfunction convertMessages(\n\tmessages: Message[],\n\tmodel: Model<\"anthropic-messages\">,\n\tisOAuthToken: boolean,\n\tcacheControl?: CacheControlEphemeral,\n): MessageParam[] {\n\tconst params: MessageParam[] = [];\n\n\t// Transform messages for cross-provider compatibility\n\tconst transformedMessages = transformMessages(messages, model, normalizeToolCallId);\n\n\tfor (let i = 0; i < transformedMessages.length; i++) {\n\t\tconst msg = transformedMessages[i];\n\n\t\tif (msg.role === \"user\") {\n\t\t\tif (typeof msg.content === \"string\") {\n\t\t\t\tif (msg.content.trim().length > 0) {\n\t\t\t\t\tparams.push({\n\t\t\t\t\t\trole: \"user\",\n\t\t\t\t\t\tcontent: sanitizeSurrogates(msg.content),\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\tconst blocks: ContentBlockParam[] = msg.content.map((item) => {\n\t\t\t\t\tif (item.type === \"text\") {\n\t\t\t\t\t\treturn {\n\t\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\t\ttext: sanitizeSurrogates(item.text),\n\t\t\t\t\t\t};\n\t\t\t\t\t} else {\n\t\t\t\t\t\treturn {\n\t\t\t\t\t\t\ttype: \"image\",\n\t\t\t\t\t\t\tsource: {\n\t\t\t\t\t\t\t\ttype: \"base64\",\n\t\t\t\t\t\t\t\tmedia_type: item.mimeType as \"image/jpeg\" | \"image/png\" | \"image/gif\" | \"image/webp\",\n\t\t\t\t\t\t\t\tdata: item.data,\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t};\n\t\t\t\t\t}\n\t\t\t\t});\n\t\t\t\tconst filteredBlocks = blocks.filter((b) => {\n\t\t\t\t\tif (b.type === \"text\") {\n\t\t\t\t\t\treturn b.text.trim().length > 0;\n\t\t\t\t\t}\n\t\t\t\t\treturn true;\n\t\t\t\t});\n\t\t\t\tif (filteredBlocks.length === 0) continue;\n\t\t\t\tparams.push({\n\t\t\t\t\trole: \"user\",\n\t\t\t\t\tcontent: filteredBlocks,\n\t\t\t\t});\n\t\t\t}\n\t\t} else if (msg.role === \"assistant\") {\n\t\t\tconst blocks: ContentBlockParam[] = [];\n\n\t\t\tfor (const block of msg.content) {\n\t\t\t\tif (block.type === \"text\") {\n\t\t\t\t\tif (block.text.trim().length === 0) continue;\n\t\t\t\t\tblocks.push({\n\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\ttext: sanitizeSurrogates(block.text),\n\t\t\t\t\t});\n\t\t\t\t} else if (block.type === \"thinking\") {\n\t\t\t\t\t// Redacted thinking: pass the opaque payload back as redacted_thinking\n\t\t\t\t\tif (block.redacted) {\n\t\t\t\t\t\tblocks.push({\n\t\t\t\t\t\t\ttype: \"redacted_thinking\",\n\t\t\t\t\t\t\tdata: block.thinkingSignature!,\n\t\t\t\t\t\t});\n\t\t\t\t\t\tcontinue;\n\t\t\t\t\t}\n\t\t\t\t\tif (block.thinking.trim().length === 0) continue;\n\t\t\t\t\t// If thinking signature is missing/empty (e.g., from aborted stream),\n\t\t\t\t\t// convert to plain text block without <thinking> tags to avoid API rejection\n\t\t\t\t\t// and prevent Claude from mimicking the tags in responses\n\t\t\t\t\tif (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {\n\t\t\t\t\t\tblocks.push({\n\t\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\t\ttext: sanitizeSurrogates(block.thinking),\n\t\t\t\t\t\t});\n\t\t\t\t\t} else {\n\t\t\t\t\t\tblocks.push({\n\t\t\t\t\t\t\ttype: \"thinking\",\n\t\t\t\t\t\t\tthinking: sanitizeSurrogates(block.thinking),\n\t\t\t\t\t\t\tsignature: block.thinkingSignature,\n\t\t\t\t\t\t});\n\t\t\t\t\t}\n\t\t\t\t} else if (block.type === \"toolCall\") {\n\t\t\t\t\tblocks.push({\n\t\t\t\t\t\ttype: \"tool_use\",\n\t\t\t\t\t\tid: block.id,\n\t\t\t\t\t\tname: isOAuthToken ? toClaudeCodeName(block.name) : block.name,\n\t\t\t\t\t\tinput: block.arguments ?? {},\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t}\n\t\t\tif (blocks.length === 0) continue;\n\t\t\tparams.push({\n\t\t\t\trole: \"assistant\",\n\t\t\t\tcontent: blocks,\n\t\t\t});\n\t\t} else if (msg.role === \"toolResult\") {\n\t\t\t// Collect all consecutive toolResult messages, needed for z.ai Anthropic endpoint\n\t\t\tconst toolResults: ContentBlockParam[] = [];\n\n\t\t\t// Add the current tool result\n\t\t\ttoolResults.push({\n\t\t\t\ttype: \"tool_result\",\n\t\t\t\ttool_use_id: msg.toolCallId,\n\t\t\t\tcontent: convertContentBlocks(msg.content),\n\t\t\t\tis_error: msg.isError,\n\t\t\t});\n\n\t\t\t// Look ahead for consecutive toolResult messages\n\t\t\tlet j = i + 1;\n\t\t\twhile (j < transformedMessages.length && transformedMessages[j].role === \"toolResult\") {\n\t\t\t\tconst nextMsg = transformedMessages[j] as ToolResultMessage; // We know it's a toolResult\n\t\t\t\ttoolResults.push({\n\t\t\t\t\ttype: \"tool_result\",\n\t\t\t\t\ttool_use_id: nextMsg.toolCallId,\n\t\t\t\t\tcontent: convertContentBlocks(nextMsg.content),\n\t\t\t\t\tis_error: nextMsg.isError,\n\t\t\t\t});\n\t\t\t\tj++;\n\t\t\t}\n\n\t\t\t// Skip the messages we've already processed\n\t\t\ti = j - 1;\n\n\t\t\t// Add a single user message with all tool results\n\t\t\tparams.push({\n\t\t\t\trole: \"user\",\n\t\t\t\tcontent: toolResults,\n\t\t\t});\n\t\t}\n\t}\n\n\t// Add cache_control to the last user message to cache conversation history\n\tif (cacheControl && params.length > 0) {\n\t\tconst lastMessage = params[params.length - 1];\n\t\tif (lastMessage.role === \"user\") {\n\t\t\tif (Array.isArray(lastMessage.content)) {\n\t\t\t\tconst lastBlock = lastMessage.content[lastMessage.content.length - 1];\n\t\t\t\tif (\n\t\t\t\t\tlastBlock &&\n\t\t\t\t\t(lastBlock.type === \"text\" || lastBlock.type === \"image\" || lastBlock.type === \"tool_result\")\n\t\t\t\t) {\n\t\t\t\t\t(lastBlock as any).cache_control = cacheControl;\n\t\t\t\t}\n\t\t\t} else if (typeof lastMessage.content === \"string\") {\n\t\t\t\tlastMessage.content = [\n\t\t\t\t\t{\n\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\ttext: lastMessage.content,\n\t\t\t\t\t\tcache_control: cacheControl,\n\t\t\t\t\t},\n\t\t\t\t] as any;\n\t\t\t}\n\t\t}\n\t}\n\n\treturn params;\n}\n\nfunction convertTools(\n\ttools: Tool[],\n\tisOAuthToken: boolean,\n\tcacheControl?: CacheControlEphemeral,\n): Anthropic.Messages.Tool[] {\n\tif (!tools) return [];\n\n\treturn tools.map((tool, index) => {\n\t\tconst schema = tool.parameters as { properties?: unknown; required?: string[] };\n\n\t\treturn {\n\t\t\tname: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name,\n\t\t\tdescription: tool.description,\n\t\t\teager_input_streaming: true,\n\t\t\tinput_schema: {\n\t\t\t\ttype: \"object\",\n\t\t\t\tproperties: schema.properties ?? {},\n\t\t\t\trequired: schema.required ?? [],\n\t\t\t},\n\t\t\t...(cacheControl && index === tools.length - 1 ? { cache_control: cacheControl } : {}),\n\t\t};\n\t});\n}\n\nfunction mapStopReason(reason: Anthropic.Messages.StopReason | string): StopReason {\n\tswitch (reason) {\n\t\tcase \"end_turn\":\n\t\t\treturn \"stop\";\n\t\tcase \"max_tokens\":\n\t\t\treturn \"length\";\n\t\tcase \"tool_use\":\n\t\t\treturn \"toolUse\";\n\t\tcase \"refusal\":\n\t\t\treturn \"error\";\n\t\tcase \"pause_turn\": // Stop is good enough -> resubmit\n\t\t\treturn \"stop\";\n\t\tcase \"stop_sequence\":\n\t\t\treturn \"stop\"; // We don't supply stop sequences, so this should never happen\n\t\tcase \"sensitive\": // Content flagged by safety filters (not yet in SDK types)\n\t\t\treturn \"error\";\n\t\tdefault:\n\t\t\t// Handle unknown stop reasons gracefully (API may add new values)\n\t\t\tthrow new Error(`Unhandled stop reason: ${reason}`);\n\t}\n}\n"]}
|
|
@@ -3,7 +3,7 @@ import { getEnvApiKey } from "../env-api-keys.js";
|
|
|
3
3
|
import { calculateCost } from "../models.js";
|
|
4
4
|
import { AssistantMessageEventStream } from "../utils/event-stream.js";
|
|
5
5
|
import { headersToRecord } from "../utils/headers.js";
|
|
6
|
-
import { parseStreamingJson } from "../utils/json-parse.js";
|
|
6
|
+
import { parseJsonWithRepair, parseStreamingJson } from "../utils/json-parse.js";
|
|
7
7
|
import { sanitizeSurrogates } from "../utils/sanitize-unicode.js";
|
|
8
8
|
import { buildCopilotDynamicHeaders, hasCopilotVisionInput } from "./github-copilot-headers.js";
|
|
9
9
|
import { adjustMaxTokensForThinking, buildBaseOptions } from "./simple-options.js";
|
|
@@ -113,6 +113,137 @@ function mergeHeaders(...headerSources) {
|
|
|
113
113
|
}
|
|
114
114
|
return merged;
|
|
115
115
|
}
|
|
116
|
+
function flushSseEvent(state) {
|
|
117
|
+
if (!state.event && state.data.length === 0) {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
const event = {
|
|
121
|
+
event: state.event,
|
|
122
|
+
data: state.data.join("\n"),
|
|
123
|
+
raw: [...state.raw],
|
|
124
|
+
};
|
|
125
|
+
state.event = null;
|
|
126
|
+
state.data = [];
|
|
127
|
+
state.raw = [];
|
|
128
|
+
return event;
|
|
129
|
+
}
|
|
130
|
+
function decodeSseLine(line, state) {
|
|
131
|
+
if (line === "") {
|
|
132
|
+
return flushSseEvent(state);
|
|
133
|
+
}
|
|
134
|
+
state.raw.push(line);
|
|
135
|
+
if (line.startsWith(":")) {
|
|
136
|
+
return null;
|
|
137
|
+
}
|
|
138
|
+
const delimiterIndex = line.indexOf(":");
|
|
139
|
+
const fieldName = delimiterIndex === -1 ? line : line.slice(0, delimiterIndex);
|
|
140
|
+
let value = delimiterIndex === -1 ? "" : line.slice(delimiterIndex + 1);
|
|
141
|
+
if (value.startsWith(" ")) {
|
|
142
|
+
value = value.slice(1);
|
|
143
|
+
}
|
|
144
|
+
if (fieldName === "event") {
|
|
145
|
+
state.event = value;
|
|
146
|
+
}
|
|
147
|
+
else if (fieldName === "data") {
|
|
148
|
+
state.data.push(value);
|
|
149
|
+
}
|
|
150
|
+
return null;
|
|
151
|
+
}
|
|
152
|
+
function nextLineBreakIndex(text) {
|
|
153
|
+
const carriageReturnIndex = text.indexOf("\r");
|
|
154
|
+
const newlineIndex = text.indexOf("\n");
|
|
155
|
+
if (carriageReturnIndex === -1) {
|
|
156
|
+
return newlineIndex;
|
|
157
|
+
}
|
|
158
|
+
if (newlineIndex === -1) {
|
|
159
|
+
return carriageReturnIndex;
|
|
160
|
+
}
|
|
161
|
+
return Math.min(carriageReturnIndex, newlineIndex);
|
|
162
|
+
}
|
|
163
|
+
function consumeLine(text) {
|
|
164
|
+
const lineBreakIndex = nextLineBreakIndex(text);
|
|
165
|
+
if (lineBreakIndex === -1) {
|
|
166
|
+
return null;
|
|
167
|
+
}
|
|
168
|
+
let nextIndex = lineBreakIndex + 1;
|
|
169
|
+
if (text[lineBreakIndex] === "\r" && text[nextIndex] === "\n") {
|
|
170
|
+
nextIndex += 1;
|
|
171
|
+
}
|
|
172
|
+
return {
|
|
173
|
+
line: text.slice(0, lineBreakIndex),
|
|
174
|
+
rest: text.slice(nextIndex),
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
async function* iterateSseMessages(body, signal) {
|
|
178
|
+
const reader = body.getReader();
|
|
179
|
+
const decoder = new TextDecoder();
|
|
180
|
+
const state = { event: null, data: [], raw: [] };
|
|
181
|
+
let buffer = "";
|
|
182
|
+
try {
|
|
183
|
+
while (true) {
|
|
184
|
+
if (signal?.aborted) {
|
|
185
|
+
throw new Error("Request was aborted");
|
|
186
|
+
}
|
|
187
|
+
const { value, done } = await reader.read();
|
|
188
|
+
if (done) {
|
|
189
|
+
break;
|
|
190
|
+
}
|
|
191
|
+
buffer += decoder.decode(value, { stream: true });
|
|
192
|
+
let consumed = consumeLine(buffer);
|
|
193
|
+
while (consumed) {
|
|
194
|
+
buffer = consumed.rest;
|
|
195
|
+
const event = decodeSseLine(consumed.line, state);
|
|
196
|
+
if (event) {
|
|
197
|
+
yield event;
|
|
198
|
+
}
|
|
199
|
+
consumed = consumeLine(buffer);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
buffer += decoder.decode();
|
|
203
|
+
let consumed = consumeLine(buffer);
|
|
204
|
+
while (consumed) {
|
|
205
|
+
buffer = consumed.rest;
|
|
206
|
+
const event = decodeSseLine(consumed.line, state);
|
|
207
|
+
if (event) {
|
|
208
|
+
yield event;
|
|
209
|
+
}
|
|
210
|
+
consumed = consumeLine(buffer);
|
|
211
|
+
}
|
|
212
|
+
if (buffer.length > 0) {
|
|
213
|
+
const event = decodeSseLine(buffer, state);
|
|
214
|
+
if (event) {
|
|
215
|
+
yield event;
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
const trailingEvent = flushSseEvent(state);
|
|
219
|
+
if (trailingEvent) {
|
|
220
|
+
yield trailingEvent;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
finally {
|
|
224
|
+
reader.releaseLock();
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
async function* iterateAnthropicEvents(response, signal) {
|
|
228
|
+
if (!response.body) {
|
|
229
|
+
throw new Error("Attempted to iterate over an Anthropic response with no body");
|
|
230
|
+
}
|
|
231
|
+
for await (const sse of iterateSseMessages(response.body, signal)) {
|
|
232
|
+
if (!sse.event || sse.event === "ping") {
|
|
233
|
+
continue;
|
|
234
|
+
}
|
|
235
|
+
if (sse.event === "error") {
|
|
236
|
+
throw new Error(sse.data);
|
|
237
|
+
}
|
|
238
|
+
try {
|
|
239
|
+
yield parseJsonWithRepair(sse.data);
|
|
240
|
+
}
|
|
241
|
+
catch (error) {
|
|
242
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
243
|
+
throw new Error(`Could not parse Anthropic SSE event ${sse.event}: ${message}; data=${sse.data}; raw=${sse.raw.join("\\n")}`);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
116
247
|
export const streamAnthropic = (model, context, options) => {
|
|
117
248
|
const stream = new AssistantMessageEventStream();
|
|
118
249
|
(async () => {
|
|
@@ -159,13 +290,13 @@ export const streamAnthropic = (model, context, options) => {
|
|
|
159
290
|
if (nextParams !== undefined) {
|
|
160
291
|
params = nextParams;
|
|
161
292
|
}
|
|
162
|
-
const
|
|
163
|
-
.
|
|
164
|
-
.
|
|
293
|
+
const response = await client.messages
|
|
294
|
+
.create({ ...params, stream: true }, { signal: options?.signal })
|
|
295
|
+
.asResponse();
|
|
165
296
|
await options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);
|
|
166
297
|
stream.push({ type: "start", partial: output });
|
|
167
298
|
const blocks = output.content;
|
|
168
|
-
for await (const event of
|
|
299
|
+
for await (const event of iterateAnthropicEvents(response, options?.signal)) {
|
|
169
300
|
if (event.type === "message_start") {
|
|
170
301
|
output.responseId = event.message.id;
|
|
171
302
|
// Capture initial token usage from message_start event
|
|
@@ -448,7 +579,7 @@ function createClient(model, apiKey, interleavedThinking, optionsHeaders, dynami
|
|
|
448
579
|
});
|
|
449
580
|
return { client, isOAuthToken: false };
|
|
450
581
|
}
|
|
451
|
-
const betaFeatures = [
|
|
582
|
+
const betaFeatures = [];
|
|
452
583
|
if (needsInterleavedBeta) {
|
|
453
584
|
betaFeatures.push("interleaved-thinking-2025-05-14");
|
|
454
585
|
}
|
|
@@ -462,7 +593,7 @@ function createClient(model, apiKey, interleavedThinking, optionsHeaders, dynami
|
|
|
462
593
|
defaultHeaders: mergeHeaders({
|
|
463
594
|
accept: "application/json",
|
|
464
595
|
"anthropic-dangerous-direct-browser-access": "true",
|
|
465
|
-
"anthropic-beta":
|
|
596
|
+
"anthropic-beta": ["claude-code-20250219", "oauth-2025-04-20", ...betaFeatures].join(","),
|
|
466
597
|
"user-agent": `claude-cli/${claudeCodeVersion}`,
|
|
467
598
|
"x-app": "cli",
|
|
468
599
|
}, model.headers, optionsHeaders),
|
|
@@ -477,7 +608,7 @@ function createClient(model, apiKey, interleavedThinking, optionsHeaders, dynami
|
|
|
477
608
|
defaultHeaders: mergeHeaders({
|
|
478
609
|
accept: "application/json",
|
|
479
610
|
"anthropic-dangerous-direct-browser-access": "true",
|
|
480
|
-
"anthropic-beta": betaFeatures.join(","),
|
|
611
|
+
...(betaFeatures.length > 0 ? { "anthropic-beta": betaFeatures.join(",") } : {}),
|
|
481
612
|
}, model.headers, optionsHeaders),
|
|
482
613
|
});
|
|
483
614
|
return { client, isOAuthToken: false };
|
|
@@ -741,6 +872,7 @@ function convertTools(tools, isOAuthToken, cacheControl) {
|
|
|
741
872
|
return {
|
|
742
873
|
name: isOAuthToken ? toClaudeCodeName(tool.name) : tool.name,
|
|
743
874
|
description: tool.description,
|
|
875
|
+
eager_input_streaming: true,
|
|
744
876
|
input_schema: {
|
|
745
877
|
type: "object",
|
|
746
878
|
properties: schema.properties ?? {},
|