@ai-sdk/anthropic 4.0.0-beta.5 → 4.0.0-beta.67
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +500 -4
- package/README.md +2 -0
- package/dist/index.d.ts +265 -68
- package/dist/index.js +2636 -1427
- package/dist/index.js.map +1 -1
- package/dist/internal/index.d.ts +234 -62
- package/dist/internal/index.js +2605 -1413
- package/dist/internal/index.js.map +1 -1
- package/docs/05-anthropic.mdx +303 -20
- package/package.json +16 -17
- package/src/{anthropic-messages-api.ts → anthropic-api.ts} +158 -17
- package/src/anthropic-error.ts +1 -1
- package/src/anthropic-files.ts +95 -0
- package/src/{anthropic-messages-options.ts → anthropic-language-model-options.ts} +104 -11
- package/src/{anthropic-messages-language-model.ts → anthropic-language-model.ts} +494 -96
- package/src/anthropic-message-metadata.ts +69 -9
- package/src/anthropic-prepare-tools.ts +31 -7
- package/src/anthropic-provider.ts +42 -13
- package/src/anthropic-tools.ts +31 -0
- package/src/convert-anthropic-usage.ts +109 -0
- package/src/{convert-to-anthropic-messages-prompt.ts → convert-to-anthropic-prompt.ts} +376 -198
- package/src/forward-anthropic-container-id-from-last-step.ts +2 -2
- package/src/get-cache-control.ts +5 -2
- package/src/index.ts +1 -1
- package/src/internal/index.ts +13 -2
- package/src/map-anthropic-stop-reason.ts +1 -1
- package/src/sanitize-json-schema.ts +203 -0
- package/src/skills/anthropic-skills-api.ts +44 -0
- package/src/skills/anthropic-skills.ts +132 -0
- package/src/tool/advisor_20260301.ts +128 -0
- package/src/tool/bash_20241022.ts +84 -13
- package/src/tool/bash_20250124.ts +84 -13
- package/src/tool/code-execution_20250522.ts +2 -2
- package/src/tool/code-execution_20250825.ts +2 -2
- package/src/tool/code-execution_20260120.ts +2 -2
- package/src/tool/computer_20241022.ts +2 -2
- package/src/tool/computer_20250124.ts +2 -2
- package/src/tool/computer_20251124.ts +2 -2
- package/src/tool/memory_20250818.ts +2 -2
- package/src/tool/text-editor_20241022.ts +2 -2
- package/src/tool/text-editor_20250124.ts +2 -2
- package/src/tool/text-editor_20250429.ts +2 -2
- package/src/tool/text-editor_20250728.ts +6 -3
- package/src/tool/tool-search-bm25_20251119.ts +2 -2
- package/src/tool/tool-search-regex_20251119.ts +2 -2
- package/src/tool/web-fetch-20250910.ts +2 -2
- package/src/tool/web-fetch-20260209.ts +2 -2
- package/src/tool/web-search_20250305.ts +2 -2
- package/src/tool/web-search_20260209.ts +2 -2
- package/dist/index.d.mts +0 -1090
- package/dist/index.mjs +0 -5244
- package/dist/index.mjs.map +0 -1
- package/dist/internal/index.d.mts +0 -969
- package/dist/internal/index.mjs +0 -5136
- package/dist/internal/index.mjs.map +0 -1
- package/src/convert-anthropic-messages-usage.ts +0 -73
|
@@ -1,12 +1,29 @@
|
|
|
1
|
-
import { JSONObject } from '@ai-sdk/provider';
|
|
1
|
+
import type { JSONObject } from '@ai-sdk/provider';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* Represents a single iteration in the usage breakdown.
|
|
5
|
-
*
|
|
6
|
-
* usage for each sampling
|
|
5
|
+
*
|
|
6
|
+
* The API returns an iterations array showing usage for each sampling
|
|
7
|
+
* iteration. Iterations can be:
|
|
8
|
+
* - `compaction`: a context compaction step (billed at executor rates).
|
|
9
|
+
* - `message`: an executor sampling iteration (billed at executor rates).
|
|
10
|
+
* - `advisor_message`: an advisor sub-inference (billed at the advisor
|
|
11
|
+
* model's rates). Advisor token usage is NOT rolled into the top-level
|
|
12
|
+
* usage totals because it bills at a different rate; inspect this array
|
|
13
|
+
* directly for advisor billing.
|
|
14
|
+
* - `fallback_message`: a server-side fallback attempt that served the turn.
|
|
15
|
+
* Inspect this array for exact per-model attribution on a turn that fell
|
|
16
|
+
* back.
|
|
7
17
|
*/
|
|
8
|
-
export
|
|
9
|
-
type: 'compaction' | 'message';
|
|
18
|
+
export type AnthropicUsageIteration = {
|
|
19
|
+
type: 'compaction' | 'message' | 'advisor_message' | 'fallback_message';
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* The model that produced this iteration. Populated for the per-model
|
|
23
|
+
* attribution cases (the fallback chain and advisor sub-inferences) and
|
|
24
|
+
* absent otherwise.
|
|
25
|
+
*/
|
|
26
|
+
model?: string;
|
|
10
27
|
|
|
11
28
|
/**
|
|
12
29
|
* Number of input tokens consumed in this iteration.
|
|
@@ -17,15 +34,58 @@ export interface AnthropicUsageIteration {
|
|
|
17
34
|
* Number of output tokens generated in this iteration.
|
|
18
35
|
*/
|
|
19
36
|
outputTokens: number;
|
|
20
|
-
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Number of cache-creation input tokens consumed in this iteration.
|
|
40
|
+
*/
|
|
41
|
+
cacheCreationInputTokens?: number;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Number of cache-read input tokens consumed in this iteration.
|
|
45
|
+
*/
|
|
46
|
+
cacheReadInputTokens?: number;
|
|
47
|
+
};
|
|
21
48
|
|
|
22
49
|
export interface AnthropicMessageMetadata {
|
|
23
50
|
usage: JSONObject;
|
|
24
|
-
// TODO remove cacheCreationInputTokens in AI SDK 6
|
|
25
|
-
// (use value in usage object instead)
|
|
26
|
-
cacheCreationInputTokens: number | null;
|
|
27
51
|
stopSequence: string | null;
|
|
28
52
|
|
|
53
|
+
/**
|
|
54
|
+
* Details about why the request stopped. Present only when the API returns
|
|
55
|
+
* a `refusal` stop reason together with a `stop_details` object (a
|
|
56
|
+
* classifier block or a model refusal).
|
|
57
|
+
*
|
|
58
|
+
* Branch on the finish reason (`content-filter`), not on this object: the
|
|
59
|
+
* API may return a refusal with no details at all, so this field can be
|
|
60
|
+
* absent even on a refusal and should not be relied upon being present.
|
|
61
|
+
*/
|
|
62
|
+
stopDetails?: {
|
|
63
|
+
/**
|
|
64
|
+
* The kind of stop detail. `'refusal'` for classifier blocks and model
|
|
65
|
+
* refusals.
|
|
66
|
+
*/
|
|
67
|
+
type: string;
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* The classifier category that triggered the block, e.g. `'cyber'` or
|
|
71
|
+
* `'bio'`. Absent for model refusals and other cases.
|
|
72
|
+
*/
|
|
73
|
+
category?: string;
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Human-readable explanation of why the request was blocked. May be
|
|
77
|
+
* absent even on a refusal.
|
|
78
|
+
*/
|
|
79
|
+
explanation?: string;
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* The canonical id of a model to retry directly. Populated only when the
|
|
83
|
+
* request included fallbacks and the fallback attempt could not be made
|
|
84
|
+
* (e.g. the fallback model was rate limited or overloaded).
|
|
85
|
+
*/
|
|
86
|
+
recommendedModel?: string;
|
|
87
|
+
};
|
|
88
|
+
|
|
29
89
|
/**
|
|
30
90
|
* Usage breakdown by iteration when compaction is triggered.
|
|
31
91
|
*
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
|
-
LanguageModelV4CallOptions,
|
|
3
|
-
SharedV4Warning,
|
|
4
2
|
UnsupportedFunctionalityError,
|
|
3
|
+
type LanguageModelV4CallOptions,
|
|
4
|
+
type SharedV4Warning,
|
|
5
5
|
} from '@ai-sdk/provider';
|
|
6
|
-
import { AnthropicTool, AnthropicToolChoice } from './anthropic-
|
|
6
|
+
import type { AnthropicTool, AnthropicToolChoice } from './anthropic-api';
|
|
7
7
|
import { CacheControlValidator } from './get-cache-control';
|
|
8
|
+
import { advisor_20260301ArgsSchema } from './tool/advisor_20260301';
|
|
8
9
|
import { textEditor_20250728ArgsSchema } from './tool/text-editor_20250728';
|
|
9
10
|
import { webSearch_20260209ArgsSchema } from './tool/web-search_20260209';
|
|
10
11
|
import { webSearch_20250305ArgsSchema } from './tool/web-search_20250305';
|
|
@@ -27,6 +28,7 @@ export async function prepareTools({
|
|
|
27
28
|
cacheControlValidator,
|
|
28
29
|
supportsStructuredOutput,
|
|
29
30
|
supportsStrictTools,
|
|
31
|
+
defaultEagerInputStreaming = false,
|
|
30
32
|
}: {
|
|
31
33
|
tools: LanguageModelV4CallOptions['tools'];
|
|
32
34
|
toolChoice: LanguageModelV4CallOptions['toolChoice'] | undefined;
|
|
@@ -42,6 +44,12 @@ export async function prepareTools({
|
|
|
42
44
|
* Whether the model supports strict mode on tool definitions.
|
|
43
45
|
*/
|
|
44
46
|
supportsStrictTools: boolean;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Default for `eager_input_streaming` on function tools that do not set
|
|
50
|
+
* it explicitly. Driven by the model-level `toolStreaming` option.
|
|
51
|
+
*/
|
|
52
|
+
defaultEagerInputStreaming?: boolean;
|
|
45
53
|
}): Promise<{
|
|
46
54
|
tools: Array<AnthropicTool> | undefined;
|
|
47
55
|
toolChoice: AnthropicToolChoice | undefined;
|
|
@@ -73,8 +81,10 @@ export async function prepareTools({
|
|
|
73
81
|
const anthropicOptions = tool.providerOptions?.anthropic as
|
|
74
82
|
| AnthropicToolOptions
|
|
75
83
|
| undefined;
|
|
76
|
-
// eager_input_streaming is only supported on custom (function) tools
|
|
77
|
-
|
|
84
|
+
// eager_input_streaming is only supported on custom (function) tools.
|
|
85
|
+
// Fall back to the model-level default when the tool doesn't set it.
|
|
86
|
+
const eagerInputStreaming =
|
|
87
|
+
anthropicOptions?.eagerInputStreaming ?? defaultEagerInputStreaming;
|
|
78
88
|
const deferLoading = anthropicOptions?.deferLoading;
|
|
79
89
|
const allowedCallers = anthropicOptions?.allowedCallers;
|
|
80
90
|
|
|
@@ -322,7 +332,6 @@ export async function prepareTools({
|
|
|
322
332
|
}
|
|
323
333
|
|
|
324
334
|
case 'anthropic.tool_search_regex_20251119': {
|
|
325
|
-
betas.add('advanced-tool-use-2025-11-20');
|
|
326
335
|
anthropicTools.push({
|
|
327
336
|
type: 'tool_search_tool_regex_20251119',
|
|
328
337
|
name: 'tool_search_tool_regex',
|
|
@@ -331,7 +340,6 @@ export async function prepareTools({
|
|
|
331
340
|
}
|
|
332
341
|
|
|
333
342
|
case 'anthropic.tool_search_bm25_20251119': {
|
|
334
|
-
betas.add('advanced-tool-use-2025-11-20');
|
|
335
343
|
anthropicTools.push({
|
|
336
344
|
type: 'tool_search_tool_bm25_20251119',
|
|
337
345
|
name: 'tool_search_tool_bm25',
|
|
@@ -339,6 +347,22 @@ export async function prepareTools({
|
|
|
339
347
|
break;
|
|
340
348
|
}
|
|
341
349
|
|
|
350
|
+
case 'anthropic.advisor_20260301': {
|
|
351
|
+
betas.add('advisor-tool-2026-03-01');
|
|
352
|
+
const args = await validateTypes({
|
|
353
|
+
value: tool.args,
|
|
354
|
+
schema: advisor_20260301ArgsSchema,
|
|
355
|
+
});
|
|
356
|
+
anthropicTools.push({
|
|
357
|
+
type: 'advisor_20260301',
|
|
358
|
+
name: 'advisor',
|
|
359
|
+
model: args.model,
|
|
360
|
+
...(args.maxUses !== undefined && { max_uses: args.maxUses }),
|
|
361
|
+
...(args.caching !== undefined && { caching: args.caching }),
|
|
362
|
+
});
|
|
363
|
+
break;
|
|
364
|
+
}
|
|
365
|
+
|
|
342
366
|
default: {
|
|
343
367
|
toolWarnings.push({
|
|
344
368
|
type: 'unsupported',
|
|
@@ -1,42 +1,53 @@
|
|
|
1
1
|
import {
|
|
2
2
|
InvalidArgumentError,
|
|
3
|
-
LanguageModelV4,
|
|
4
3
|
NoSuchModelError,
|
|
5
|
-
|
|
4
|
+
type FilesV4,
|
|
5
|
+
type LanguageModelV4,
|
|
6
|
+
type ProviderV4,
|
|
7
|
+
type SkillsV4,
|
|
6
8
|
} from '@ai-sdk/provider';
|
|
7
9
|
import {
|
|
8
|
-
FetchFunction,
|
|
9
10
|
generateId,
|
|
10
11
|
loadApiKey,
|
|
11
12
|
loadOptionalSetting,
|
|
12
13
|
withoutTrailingSlash,
|
|
13
14
|
withUserAgentSuffix,
|
|
15
|
+
type FetchFunction,
|
|
14
16
|
} from '@ai-sdk/provider-utils';
|
|
15
|
-
import {
|
|
16
|
-
import {
|
|
17
|
-
import {
|
|
17
|
+
import { AnthropicFiles } from './anthropic-files';
|
|
18
|
+
import { AnthropicLanguageModel } from './anthropic-language-model';
|
|
19
|
+
import type { AnthropicModelId } from './anthropic-language-model-options';
|
|
18
20
|
import { anthropicTools } from './anthropic-tools';
|
|
21
|
+
import { AnthropicSkills } from './skills/anthropic-skills';
|
|
22
|
+
import { VERSION } from './version';
|
|
19
23
|
|
|
20
24
|
export interface AnthropicProvider extends ProviderV4 {
|
|
21
25
|
/**
|
|
22
26
|
* Creates a model for text generation.
|
|
23
27
|
*/
|
|
24
|
-
(modelId:
|
|
28
|
+
(modelId: AnthropicModelId): LanguageModelV4;
|
|
25
29
|
|
|
26
30
|
/**
|
|
27
31
|
* Creates a model for text generation.
|
|
28
32
|
*/
|
|
29
|
-
languageModel(modelId:
|
|
33
|
+
languageModel(modelId: AnthropicModelId): LanguageModelV4;
|
|
30
34
|
|
|
31
|
-
chat(modelId:
|
|
35
|
+
chat(modelId: AnthropicModelId): LanguageModelV4;
|
|
32
36
|
|
|
33
|
-
messages(modelId:
|
|
37
|
+
messages(modelId: AnthropicModelId): LanguageModelV4;
|
|
34
38
|
|
|
35
39
|
/**
|
|
36
40
|
* @deprecated Use `embeddingModel` instead.
|
|
37
41
|
*/
|
|
38
42
|
textEmbeddingModel(modelId: string): never;
|
|
39
43
|
|
|
44
|
+
files(): FilesV4;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Returns a SkillsV4 interface for uploading skills to Anthropic.
|
|
48
|
+
*/
|
|
49
|
+
skills(): SkillsV4;
|
|
50
|
+
|
|
40
51
|
/**
|
|
41
52
|
* Anthropic-specific computer use tool.
|
|
42
53
|
*/
|
|
@@ -130,8 +141,8 @@ export function createAnthropic(
|
|
|
130
141
|
);
|
|
131
142
|
};
|
|
132
143
|
|
|
133
|
-
const createChatModel = (modelId:
|
|
134
|
-
new
|
|
144
|
+
const createChatModel = (modelId: AnthropicModelId) =>
|
|
145
|
+
new AnthropicLanguageModel(modelId, {
|
|
135
146
|
provider: providerName,
|
|
136
147
|
baseURL,
|
|
137
148
|
headers: getHeaders,
|
|
@@ -143,7 +154,15 @@ export function createAnthropic(
|
|
|
143
154
|
}),
|
|
144
155
|
});
|
|
145
156
|
|
|
146
|
-
const
|
|
157
|
+
const createSkills = () =>
|
|
158
|
+
new AnthropicSkills({
|
|
159
|
+
provider: `${providerName.replace('.messages', '')}.skills`,
|
|
160
|
+
baseURL,
|
|
161
|
+
headers: getHeaders,
|
|
162
|
+
fetch: options.fetch,
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
const provider = function (modelId: AnthropicModelId) {
|
|
147
166
|
if (new.target) {
|
|
148
167
|
throw new Error(
|
|
149
168
|
'The Anthropic model function cannot be called with the new keyword.',
|
|
@@ -166,6 +185,16 @@ export function createAnthropic(
|
|
|
166
185
|
throw new NoSuchModelError({ modelId, modelType: 'imageModel' });
|
|
167
186
|
};
|
|
168
187
|
|
|
188
|
+
provider.files = () =>
|
|
189
|
+
new AnthropicFiles({
|
|
190
|
+
provider: providerName,
|
|
191
|
+
baseURL,
|
|
192
|
+
headers: getHeaders,
|
|
193
|
+
fetch: options.fetch,
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
provider.skills = createSkills;
|
|
197
|
+
|
|
169
198
|
provider.tools = anthropicTools;
|
|
170
199
|
|
|
171
200
|
return provider;
|
package/src/anthropic-tools.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { advisor_20260301 } from './tool/advisor_20260301';
|
|
1
2
|
import { bash_20241022 } from './tool/bash_20241022';
|
|
2
3
|
import { bash_20250124 } from './tool/bash_20250124';
|
|
3
4
|
import { codeExecution_20250522 } from './tool/code-execution_20250522';
|
|
@@ -19,6 +20,36 @@ import { webSearch_20260209 } from './tool/web-search_20260209';
|
|
|
19
20
|
import { webSearch_20250305 } from './tool/web-search_20250305';
|
|
20
21
|
|
|
21
22
|
export const anthropicTools = {
|
|
23
|
+
/**
|
|
24
|
+
* Pairs a faster executor model with a higher-intelligence advisor model
|
|
25
|
+
* that provides strategic guidance mid-generation.
|
|
26
|
+
*
|
|
27
|
+
* The advisor lets a faster, lower-cost executor model consult a
|
|
28
|
+
* higher-intelligence advisor model server-side. The advisor reads the
|
|
29
|
+
* executor's full transcript and produces a plan or course correction;
|
|
30
|
+
* the executor continues with the task, informed by the advice. All of
|
|
31
|
+
* this happens inside a single `/v1/messages` request.
|
|
32
|
+
*
|
|
33
|
+
* Beta header `advisor-tool-2026-03-01` is added automatically when this
|
|
34
|
+
* tool is included.
|
|
35
|
+
*
|
|
36
|
+
* Multi-turn conversations: pass the full assistant content (including
|
|
37
|
+
* `advisor_tool_result` blocks) back to the API on subsequent turns. If
|
|
38
|
+
* you omit the advisor tool from `tools` on a follow-up turn while the
|
|
39
|
+
* message history still contains `advisor_tool_result` blocks, the API
|
|
40
|
+
* returns a `400 invalid_request_error`.
|
|
41
|
+
*
|
|
42
|
+
* Supported executor models: Claude Haiku 4.5, Sonnet 4.6, Opus 4.6,
|
|
43
|
+
* Opus 4.7. The advisor must be at least as capable as the executor.
|
|
44
|
+
*
|
|
45
|
+
* @param model - The advisor model ID (required), e.g. `"claude-opus-4-8"`.
|
|
46
|
+
* @param maxUses - Maximum advisor calls per request (per-request cap).
|
|
47
|
+
* @param caching - Enables prompt caching for the advisor's transcript
|
|
48
|
+
* across calls within a conversation. Worthwhile from ~3 advisor calls
|
|
49
|
+
* per conversation.
|
|
50
|
+
*/
|
|
51
|
+
advisor_20260301,
|
|
52
|
+
|
|
22
53
|
/**
|
|
23
54
|
* The bash tool enables Claude to execute shell commands in a persistent bash session,
|
|
24
55
|
* allowing system operations, script execution, and command-line automation.
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import type { JSONObject, LanguageModelV4Usage } from '@ai-sdk/provider';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Represents a single iteration in the usage breakdown.
|
|
5
|
+
*
|
|
6
|
+
* - `compaction` / `message`: executor iterations, billed at executor rates.
|
|
7
|
+
* - `advisor_message`: advisor sub-inference, billed at the advisor model's
|
|
8
|
+
* rates. Advisor tokens are NOT rolled into the top-level totals because
|
|
9
|
+
* they bill at a different rate; inspect this array for advisor cost
|
|
10
|
+
* tracking.
|
|
11
|
+
* - `fallback_message`: a server-side fallback attempt that served the turn.
|
|
12
|
+
* When present, the top-level usage already reflects the served answer, so
|
|
13
|
+
* it is used as-is.
|
|
14
|
+
*
|
|
15
|
+
* The `model` field carries the model that produced the iteration. The API
|
|
16
|
+
* populates it for the per-model attribution cases (the fallback chain and
|
|
17
|
+
* advisor sub-inferences) and omits it otherwise.
|
|
18
|
+
*/
|
|
19
|
+
export type AnthropicUsageIteration = {
|
|
20
|
+
type: 'compaction' | 'message' | 'advisor_message' | 'fallback_message';
|
|
21
|
+
model?: string | null;
|
|
22
|
+
input_tokens: number;
|
|
23
|
+
output_tokens: number;
|
|
24
|
+
cache_creation_input_tokens?: number | null;
|
|
25
|
+
cache_read_input_tokens?: number | null;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
export type AnthropicUsage = {
|
|
29
|
+
input_tokens: number;
|
|
30
|
+
output_tokens: number;
|
|
31
|
+
cache_creation_input_tokens?: number | null;
|
|
32
|
+
cache_read_input_tokens?: number | null;
|
|
33
|
+
/**
|
|
34
|
+
* When compaction is triggered or the advisor tool is invoked, this
|
|
35
|
+
* array contains usage for each sampling iteration. Top-level
|
|
36
|
+
* input_tokens and output_tokens exclude compaction iteration usage,
|
|
37
|
+
* and the advisor sub-inference is also not rolled into the top-level
|
|
38
|
+
* totals because it bills at a different rate. Use this array for
|
|
39
|
+
* per-iteration cost tracking.
|
|
40
|
+
*/
|
|
41
|
+
iterations?: AnthropicUsageIteration[] | null;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
export function convertAnthropicUsage({
|
|
45
|
+
usage,
|
|
46
|
+
rawUsage,
|
|
47
|
+
}: {
|
|
48
|
+
usage: AnthropicUsage;
|
|
49
|
+
rawUsage?: JSONObject;
|
|
50
|
+
}): LanguageModelV4Usage {
|
|
51
|
+
const cacheCreationTokens = usage.cache_creation_input_tokens ?? 0;
|
|
52
|
+
const cacheReadTokens = usage.cache_read_input_tokens ?? 0;
|
|
53
|
+
|
|
54
|
+
// When iterations is present (compaction or advisor), sum across executor
|
|
55
|
+
// iterations to get the true executor totals. The top-level input_tokens
|
|
56
|
+
// and output_tokens exclude compaction usage. Advisor (`advisor_message`)
|
|
57
|
+
// iterations are filtered out: they bill at the advisor model's rates,
|
|
58
|
+
// not the executor's, so they don't belong in the top-level totals.
|
|
59
|
+
//
|
|
60
|
+
// A turn served by a server-side fallback is the exception: the served
|
|
61
|
+
// answer comes from the fallback model, so the executor `message` iteration
|
|
62
|
+
// is the blocked primary attempt (zero output). The top-level totals already
|
|
63
|
+
// reflect the fallback answer, so they are used directly.
|
|
64
|
+
let inputTokens: number;
|
|
65
|
+
let outputTokens: number;
|
|
66
|
+
|
|
67
|
+
const servedByFallback = usage.iterations?.some(
|
|
68
|
+
iter => iter.type === 'fallback_message',
|
|
69
|
+
);
|
|
70
|
+
|
|
71
|
+
if (usage.iterations && usage.iterations.length > 0 && !servedByFallback) {
|
|
72
|
+
const executorIterations = usage.iterations.filter(
|
|
73
|
+
iter => iter.type === 'compaction' || iter.type === 'message',
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
if (executorIterations.length > 0) {
|
|
77
|
+
const totals = executorIterations.reduce(
|
|
78
|
+
(acc, iter) => ({
|
|
79
|
+
input: acc.input + iter.input_tokens,
|
|
80
|
+
output: acc.output + iter.output_tokens,
|
|
81
|
+
}),
|
|
82
|
+
{ input: 0, output: 0 },
|
|
83
|
+
);
|
|
84
|
+
inputTokens = totals.input;
|
|
85
|
+
outputTokens = totals.output;
|
|
86
|
+
} else {
|
|
87
|
+
inputTokens = usage.input_tokens;
|
|
88
|
+
outputTokens = usage.output_tokens;
|
|
89
|
+
}
|
|
90
|
+
} else {
|
|
91
|
+
inputTokens = usage.input_tokens;
|
|
92
|
+
outputTokens = usage.output_tokens;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return {
|
|
96
|
+
inputTokens: {
|
|
97
|
+
total: inputTokens + cacheCreationTokens + cacheReadTokens,
|
|
98
|
+
noCache: inputTokens,
|
|
99
|
+
cacheRead: cacheReadTokens,
|
|
100
|
+
cacheWrite: cacheCreationTokens,
|
|
101
|
+
},
|
|
102
|
+
outputTokens: {
|
|
103
|
+
total: outputTokens,
|
|
104
|
+
text: undefined,
|
|
105
|
+
reasoning: undefined,
|
|
106
|
+
},
|
|
107
|
+
raw: rawUsage ?? usage,
|
|
108
|
+
};
|
|
109
|
+
}
|