@ai-sdk/anthropic 3.0.75 → 3.0.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,14 +2,29 @@ import type { JSONObject, LanguageModelV3Usage } from '@ai-sdk/provider';
2
2
 
3
3
  /**
4
4
  * Represents a single iteration in the usage breakdown.
5
- * When compaction occurs, the API returns an iterations array showing
6
- * usage for each sampling iteration (compaction + message).
5
+ *
6
+ * - `compaction` / `message`: executor iterations, billed at executor rates.
7
+ * - `advisor_message`: advisor sub-inference, billed at the advisor model's
8
+ * rates. The `model` field carries the advisor model ID. Advisor tokens
9
+ * are NOT rolled into the top-level totals because they bill at a
10
+ * different rate; inspect this array for advisor cost tracking.
7
11
  */
8
- export type AnthropicUsageIteration = {
9
- type: 'compaction' | 'message';
10
- input_tokens: number;
11
- output_tokens: number;
12
- };
12
+ export type AnthropicUsageIteration =
13
+ | {
14
+ type: 'compaction' | 'message';
15
+ input_tokens: number;
16
+ output_tokens: number;
17
+ cache_creation_input_tokens?: number | null;
18
+ cache_read_input_tokens?: number | null;
19
+ }
20
+ | {
21
+ type: 'advisor_message';
22
+ model: string;
23
+ input_tokens: number;
24
+ output_tokens: number;
25
+ cache_creation_input_tokens?: number | null;
26
+ cache_read_input_tokens?: number | null;
27
+ };
13
28
 
14
29
  export type AnthropicMessagesUsage = {
15
30
  input_tokens: number;
@@ -17,10 +32,12 @@ export type AnthropicMessagesUsage = {
17
32
  cache_creation_input_tokens?: number | null;
18
33
  cache_read_input_tokens?: number | null;
19
34
  /**
20
- * When compaction is triggered, this array contains usage for each
21
- * sampling iteration. The top-level input_tokens and output_tokens
22
- * do NOT include compaction iteration usage - to get total tokens
23
- * consumed and billed, sum across all entries in this array.
35
+ * When compaction is triggered or the advisor tool is invoked, this
36
+ * array contains usage for each sampling iteration. Top-level
37
+ * input_tokens and output_tokens exclude compaction iteration usage,
38
+ * and the advisor sub-inference is also not rolled into the top-level
39
+ * totals because it bills at a different rate. Use this array for
40
+ * per-iteration cost tracking.
24
41
  */
25
42
  iterations?: AnthropicUsageIteration[] | null;
26
43
  };
@@ -35,22 +52,33 @@ export function convertAnthropicMessagesUsage({
35
52
  const cacheCreationTokens = usage.cache_creation_input_tokens ?? 0;
36
53
  const cacheReadTokens = usage.cache_read_input_tokens ?? 0;
37
54
 
38
- // When iterations is present (compaction occurred), sum across all iterations
39
- // to get the true total tokens consumed/billed. The top-level input_tokens
40
- // and output_tokens exclude compaction iteration usage.
55
+ // When iterations is present (compaction or advisor), sum across executor
56
+ // iterations to get the true executor totals. The top-level input_tokens
57
+ // and output_tokens exclude compaction usage. Advisor (`advisor_message`)
58
+ // iterations are filtered out: they bill at the advisor model's rates,
59
+ // not the executor's, so they don't belong in the top-level totals.
41
60
  let inputTokens: number;
42
61
  let outputTokens: number;
43
62
 
44
63
  if (usage.iterations && usage.iterations.length > 0) {
45
- const totals = usage.iterations.reduce(
46
- (acc, iter) => ({
47
- input: acc.input + iter.input_tokens,
48
- output: acc.output + iter.output_tokens,
49
- }),
50
- { input: 0, output: 0 },
64
+ const executorIterations = usage.iterations.filter(
65
+ iter => iter.type === 'compaction' || iter.type === 'message',
51
66
  );
52
- inputTokens = totals.input;
53
- outputTokens = totals.output;
67
+
68
+ if (executorIterations.length > 0) {
69
+ const totals = executorIterations.reduce(
70
+ (acc, iter) => ({
71
+ input: acc.input + iter.input_tokens,
72
+ output: acc.output + iter.output_tokens,
73
+ }),
74
+ { input: 0, output: 0 },
75
+ );
76
+ inputTokens = totals.input;
77
+ outputTokens = totals.output;
78
+ } else {
79
+ inputTokens = usage.input_tokens;
80
+ outputTokens = usage.output_tokens;
81
+ }
54
82
  } else {
55
83
  inputTokens = usage.input_tokens;
56
84
  outputTokens = usage.output_tokens;
@@ -24,6 +24,7 @@ import {
24
24
  } from './anthropic-messages-api';
25
25
  import { anthropicFilePartProviderOptions } from './anthropic-messages-options';
26
26
  import { CacheControlValidator } from './get-cache-control';
27
+ import { advisor_20260301OutputSchema } from './tool/advisor_20260301';
27
28
  import { codeExecution_20250522OutputSchema } from './tool/code-execution_20250522';
28
29
  import { codeExecution_20250825OutputSchema } from './tool/code-execution_20250825';
29
30
  import { codeExecution_20260120OutputSchema } from './tool/code-execution_20260120';
@@ -634,6 +635,15 @@ export async function convertToAnthropicMessagesPrompt({
634
635
  input: part.input,
635
636
  cache_control: cacheControl,
636
637
  });
638
+ } else if (providerToolName === 'advisor') {
639
+ // The advisor server_tool_use.input is always {}.
640
+ anthropicContent.push({
641
+ type: 'server_tool_use',
642
+ id: part.toolCallId,
643
+ name: 'advisor',
644
+ input: {},
645
+ cache_control: cacheControl,
646
+ });
637
647
  } else {
638
648
  warnings.push({
639
649
  type: 'other',
@@ -1020,6 +1030,58 @@ export async function convertToAnthropicMessagesPrompt({
1020
1030
  break;
1021
1031
  }
1022
1032
 
1033
+ if (providerToolName === 'advisor') {
1034
+ const output = part.output;
1035
+
1036
+ if (output.type !== 'json' && output.type !== 'error-json') {
1037
+ warnings.push({
1038
+ type: 'other',
1039
+ message: `provider executed tool result output type ${output.type} for tool ${part.toolName} is not supported`,
1040
+ });
1041
+
1042
+ break;
1043
+ }
1044
+
1045
+ const advisorOutput = await validateTypes({
1046
+ value: output.value,
1047
+ schema: advisor_20260301OutputSchema,
1048
+ });
1049
+
1050
+ if (advisorOutput.type === 'advisor_result') {
1051
+ anthropicContent.push({
1052
+ type: 'advisor_tool_result',
1053
+ tool_use_id: part.toolCallId,
1054
+ content: {
1055
+ type: 'advisor_result',
1056
+ text: advisorOutput.text,
1057
+ },
1058
+ cache_control: cacheControl,
1059
+ });
1060
+ } else if (advisorOutput.type === 'advisor_redacted_result') {
1061
+ anthropicContent.push({
1062
+ type: 'advisor_tool_result',
1063
+ tool_use_id: part.toolCallId,
1064
+ content: {
1065
+ type: 'advisor_redacted_result',
1066
+ encrypted_content: advisorOutput.encryptedContent,
1067
+ },
1068
+ cache_control: cacheControl,
1069
+ });
1070
+ } else {
1071
+ anthropicContent.push({
1072
+ type: 'advisor_tool_result',
1073
+ tool_use_id: part.toolCallId,
1074
+ content: {
1075
+ type: 'advisor_tool_result_error',
1076
+ error_code: advisorOutput.errorCode,
1077
+ },
1078
+ cache_control: cacheControl,
1079
+ });
1080
+ }
1081
+
1082
+ break;
1083
+ }
1084
+
1023
1085
  warnings.push({
1024
1086
  type: 'other',
1025
1087
  message: `provider executed tool result for tool ${part.toolName} is not supported`,
@@ -0,0 +1,128 @@
1
+ import {
2
+ createProviderToolFactoryWithOutputSchema,
3
+ lazySchema,
4
+ zodSchema,
5
+ } from '@ai-sdk/provider-utils';
6
+ import { z } from 'zod/v4';
7
+
8
+ export const advisor_20260301ArgsSchema = lazySchema(() =>
9
+ zodSchema(
10
+ z.object({
11
+ model: z.string(),
12
+ maxUses: z.number().optional(),
13
+ caching: z
14
+ .object({
15
+ type: z.literal('ephemeral'),
16
+ ttl: z.union([z.literal('5m'), z.literal('1h')]),
17
+ })
18
+ .optional(),
19
+ }),
20
+ ),
21
+ );
22
+
23
+ export const advisor_20260301OutputSchema = lazySchema(() =>
24
+ zodSchema(
25
+ z.discriminatedUnion('type', [
26
+ z.object({
27
+ type: z.literal('advisor_result'),
28
+ text: z.string(),
29
+ }),
30
+ z.object({
31
+ type: z.literal('advisor_redacted_result'),
32
+ encryptedContent: z.string(),
33
+ }),
34
+ z.object({
35
+ type: z.literal('advisor_tool_result_error'),
36
+ errorCode: z.string(),
37
+ }),
38
+ ]),
39
+ ),
40
+ );
41
+
42
+ const advisor_20260301InputSchema = lazySchema(() =>
43
+ zodSchema(z.object({}).strict()),
44
+ );
45
+
46
+ const factory = createProviderToolFactoryWithOutputSchema<
47
+ // Input is always empty: the executor emits server_tool_use with empty input
48
+ // and the server constructs the advisor's view from the full transcript.
49
+ {},
50
+ | {
51
+ type: 'advisor_result';
52
+
53
+ /**
54
+ * Plaintext advice from the advisor model.
55
+ */
56
+ text: string;
57
+ }
58
+ | {
59
+ type: 'advisor_redacted_result';
60
+
61
+ /**
62
+ * Opaque, encrypted advice. Must be round-tripped verbatim on subsequent
63
+ * turns; the server decrypts it server-side when rendering the advisor's
64
+ * advice into the executor's prompt.
65
+ */
66
+ encryptedContent: string;
67
+ }
68
+ | {
69
+ type: 'advisor_tool_result_error';
70
+
71
+ /**
72
+ * Available options: `max_uses_exceeded`, `too_many_requests`,
73
+ * `overloaded`, `prompt_too_long`, `execution_time_exceeded`,
74
+ * `unavailable`.
75
+ */
76
+ errorCode: string;
77
+ },
78
+ {
79
+ /**
80
+ * The advisor model ID, such as `"claude-opus-4-7"`. Billed at this
81
+ * model's rates for the sub-inference.
82
+ *
83
+ * The advisor must be at least as capable as the executor; an invalid
84
+ * pair returns a `400 invalid_request_error` from the API.
85
+ */
86
+ model: string;
87
+
88
+ /**
89
+ * Maximum number of advisor calls allowed in a single request. Once the
90
+ * executor reaches this cap, further advisor calls return an
91
+ * `advisor_tool_result_error` with `error_code: "max_uses_exceeded"` and
92
+ * the executor continues without further advice.
93
+ *
94
+ * This is a per-request cap, not a per-conversation cap. To enforce
95
+ * conversation-level limits, count advisor calls client-side; when you
96
+ * hit your cap, remove the advisor tool from `tools` AND strip all
97
+ * `advisor_tool_result` blocks from your message history (otherwise the
98
+ * API returns `400 invalid_request_error`).
99
+ */
100
+ maxUses?: number;
101
+
102
+ /**
103
+ * Enables prompt caching for the advisor's own transcript across calls
104
+ * within a conversation. Unlike `cache_control` on content blocks, this
105
+ * is not a breakpoint marker; it is an on/off switch. The server decides
106
+ * where cache boundaries go.
107
+ *
108
+ * The cache write costs more than the reads save when the advisor is
109
+ * called two or fewer times per conversation; caching breaks even at
110
+ * roughly three advisor calls. Enable it for long agent loops; keep it
111
+ * off for short tasks. Keep it consistent across a conversation —
112
+ * toggling causes cache misses.
113
+ */
114
+ caching?: {
115
+ type: 'ephemeral';
116
+ ttl: '5m' | '1h';
117
+ };
118
+ }
119
+ >({
120
+ id: 'anthropic.advisor_20260301',
121
+ inputSchema: advisor_20260301InputSchema,
122
+ outputSchema: advisor_20260301OutputSchema,
123
+ supportsDeferredResults: true,
124
+ });
125
+
126
+ export const advisor_20260301 = (args: Parameters<typeof factory>[0]) => {
127
+ return factory(args);
128
+ };