@ai-sdk/anthropic 3.0.24 → 3.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/index.d.mts +31 -21
- package/dist/index.d.ts +31 -21
- package/dist/index.js +153 -79
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +154 -76
- package/dist/index.mjs.map +1 -1
- package/dist/internal/index.d.mts +39 -21
- package/dist/internal/index.d.ts +39 -21
- package/dist/internal/index.js +146 -72
- package/dist/internal/index.js.map +1 -1
- package/dist/internal/index.mjs +153 -75
- package/dist/internal/index.mjs.map +1 -1
- package/docs/05-anthropic.mdx +22 -5
- package/package.json +1 -1
- package/src/anthropic-messages-api.ts +9 -0
- package/src/anthropic-prepare-tools.ts +13 -0
- package/src/anthropic-tools.ts +18 -0
- package/src/tool/computer_20251124.ts +151 -0
package/docs/05-anthropic.mdx
CHANGED
|
@@ -510,12 +510,13 @@ Parameters:
|
|
|
510
510
|
The Computer Tool enables control of keyboard and mouse actions on a computer:
|
|
511
511
|
|
|
512
512
|
```ts
|
|
513
|
-
const computerTool = anthropic.tools.
|
|
513
|
+
const computerTool = anthropic.tools.computer_20251124({
|
|
514
514
|
displayWidthPx: 1920,
|
|
515
515
|
displayHeightPx: 1080,
|
|
516
516
|
displayNumber: 0, // Optional, for X11 environments
|
|
517
|
+
enableZoom: true, // Optional, enables the zoom action
|
|
517
518
|
|
|
518
|
-
execute: async ({ action, coordinate, text }) => {
|
|
519
|
+
execute: async ({ action, coordinate, text, region }) => {
|
|
519
520
|
// Implement your computer control logic here
|
|
520
521
|
// Return the result of the action
|
|
521
522
|
|
|
@@ -530,6 +531,13 @@ const computerTool = anthropic.tools.computer_20241022({
|
|
|
530
531
|
.toString('base64'),
|
|
531
532
|
};
|
|
532
533
|
}
|
|
534
|
+
case 'zoom': {
|
|
535
|
+
// region is [x1, y1, x2, y2] defining the area to zoom into
|
|
536
|
+
return {
|
|
537
|
+
type: 'image',
|
|
538
|
+
data: fs.readFileSync('./data/zoomed-region.png').toString('base64'),
|
|
539
|
+
};
|
|
540
|
+
}
|
|
533
541
|
default: {
|
|
534
542
|
console.log('Action:', action);
|
|
535
543
|
console.log('Coordinate:', coordinate);
|
|
@@ -548,13 +556,22 @@ const computerTool = anthropic.tools.computer_20241022({
|
|
|
548
556
|
});
|
|
549
557
|
```
|
|
550
558
|
|
|
559
|
+
<Note>
|
|
560
|
+
Use `computer_20251124` for Claude Opus 4.5 which supports the zoom action.
|
|
561
|
+
Use `computer_20250124` for Claude Sonnet 4.5, Haiku 4.5, Opus 4.1, Sonnet 4,
|
|
562
|
+
Opus 4, and Sonnet 3.7.
|
|
563
|
+
</Note>
|
|
564
|
+
|
|
551
565
|
Parameters:
|
|
552
566
|
|
|
553
|
-
- `action` ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position'): The action to perform.
|
|
567
|
+
- `action` ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position' | 'zoom'): The action to perform. The `zoom` action is only available with `computer_20251124`.
|
|
554
568
|
- `coordinate` (number[], optional): Required for `mouse_move` and `left_click_drag` actions. Specifies the (x, y) coordinates.
|
|
555
569
|
- `text` (string, optional): Required for `type` and `key` actions.
|
|
556
|
-
|
|
557
|
-
|
|
570
|
+
- `region` (number[], optional): Required for `zoom` action. Specifies `[x1, y1, x2, y2]` coordinates for the area to inspect.
|
|
571
|
+
- `displayWidthPx` (number): The width of the display in pixels.
|
|
572
|
+
- `displayHeightPx` (number): The height of the display in pixels.
|
|
573
|
+
- `displayNumber` (number, optional): The display number for X11 environments.
|
|
574
|
+
- `enableZoom` (boolean, optional): Enable the zoom action. Only available with `computer_20251124`. Default: `false`.
|
|
558
575
|
|
|
559
576
|
### Web Search Tool
|
|
560
577
|
|
package/package.json
CHANGED
|
@@ -360,6 +360,15 @@ export type AnthropicTool =
|
|
|
360
360
|
display_number: number;
|
|
361
361
|
cache_control: AnthropicCacheControl | undefined;
|
|
362
362
|
}
|
|
363
|
+
| {
|
|
364
|
+
name: string;
|
|
365
|
+
type: 'computer_20251124';
|
|
366
|
+
display_width_px: number;
|
|
367
|
+
display_height_px: number;
|
|
368
|
+
display_number: number;
|
|
369
|
+
enable_zoom?: boolean;
|
|
370
|
+
cache_control: AnthropicCacheControl | undefined;
|
|
371
|
+
}
|
|
363
372
|
| {
|
|
364
373
|
name: string;
|
|
365
374
|
type:
|
|
@@ -131,6 +131,19 @@ export async function prepareTools({
|
|
|
131
131
|
});
|
|
132
132
|
break;
|
|
133
133
|
}
|
|
134
|
+
case 'anthropic.computer_20251124': {
|
|
135
|
+
betas.add('computer-use-2025-11-24');
|
|
136
|
+
anthropicTools.push({
|
|
137
|
+
name: 'computer',
|
|
138
|
+
type: 'computer_20251124',
|
|
139
|
+
display_width_px: tool.args.displayWidthPx as number,
|
|
140
|
+
display_height_px: tool.args.displayHeightPx as number,
|
|
141
|
+
display_number: tool.args.displayNumber as number,
|
|
142
|
+
enable_zoom: tool.args.enableZoom as boolean,
|
|
143
|
+
cache_control: undefined,
|
|
144
|
+
});
|
|
145
|
+
break;
|
|
146
|
+
}
|
|
134
147
|
case 'anthropic.computer_20241022': {
|
|
135
148
|
betas.add('computer-use-2024-10-22');
|
|
136
149
|
anthropicTools.push({
|
package/src/anthropic-tools.ts
CHANGED
|
@@ -4,6 +4,7 @@ import { codeExecution_20250522 } from './tool/code-execution_20250522';
|
|
|
4
4
|
import { codeExecution_20250825 } from './tool/code-execution_20250825';
|
|
5
5
|
import { computer_20241022 } from './tool/computer_20241022';
|
|
6
6
|
import { computer_20250124 } from './tool/computer_20250124';
|
|
7
|
+
import { computer_20251124 } from './tool/computer_20251124';
|
|
7
8
|
import { memory_20250818 } from './tool/memory_20250818';
|
|
8
9
|
import { textEditor_20241022 } from './tool/text-editor_20241022';
|
|
9
10
|
import { textEditor_20250124 } from './tool/text-editor_20250124';
|
|
@@ -77,6 +78,23 @@ export const anthropicTools = {
|
|
|
77
78
|
*/
|
|
78
79
|
computer_20250124,
|
|
79
80
|
|
|
81
|
+
/**
|
|
82
|
+
* Claude can interact with computer environments through the computer use tool, which
|
|
83
|
+
* provides screenshot capabilities and mouse/keyboard control for autonomous desktop interaction.
|
|
84
|
+
*
|
|
85
|
+
* This version adds the zoom action for detailed screen region inspection.
|
|
86
|
+
*
|
|
87
|
+
* Image results are supported.
|
|
88
|
+
*
|
|
89
|
+
* Supported models: Claude Opus 4.5
|
|
90
|
+
*
|
|
91
|
+
* @param displayWidthPx - The width of the display being controlled by the model in pixels.
|
|
92
|
+
* @param displayHeightPx - The height of the display being controlled by the model in pixels.
|
|
93
|
+
* @param displayNumber - The display number to control (only relevant for X11 environments). If specified, the tool will be provided a display number in the tool definition.
|
|
94
|
+
* @param enableZoom - Enable zoom action. Set to true to allow Claude to zoom into specific screen regions. Default: false.
|
|
95
|
+
*/
|
|
96
|
+
computer_20251124,
|
|
97
|
+
|
|
80
98
|
/**
|
|
81
99
|
* The memory tool enables Claude to store and retrieve information across conversations through a memory file directory.
|
|
82
100
|
* Claude can create, read, update, and delete files that persist between sessions,
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import {
|
|
2
|
+
createProviderToolFactory,
|
|
3
|
+
lazySchema,
|
|
4
|
+
zodSchema,
|
|
5
|
+
} from '@ai-sdk/provider-utils';
|
|
6
|
+
import { z } from 'zod/v4';
|
|
7
|
+
|
|
8
|
+
const computer_20251124InputSchema = lazySchema(() =>
|
|
9
|
+
zodSchema(
|
|
10
|
+
z.object({
|
|
11
|
+
action: z.enum([
|
|
12
|
+
'key',
|
|
13
|
+
'hold_key',
|
|
14
|
+
'type',
|
|
15
|
+
'cursor_position',
|
|
16
|
+
'mouse_move',
|
|
17
|
+
'left_mouse_down',
|
|
18
|
+
'left_mouse_up',
|
|
19
|
+
'left_click',
|
|
20
|
+
'left_click_drag',
|
|
21
|
+
'right_click',
|
|
22
|
+
'middle_click',
|
|
23
|
+
'double_click',
|
|
24
|
+
'triple_click',
|
|
25
|
+
'scroll',
|
|
26
|
+
'wait',
|
|
27
|
+
'screenshot',
|
|
28
|
+
'zoom',
|
|
29
|
+
]),
|
|
30
|
+
coordinate: z.tuple([z.number().int(), z.number().int()]).optional(),
|
|
31
|
+
duration: z.number().optional(),
|
|
32
|
+
region: z
|
|
33
|
+
.tuple([
|
|
34
|
+
z.number().int(),
|
|
35
|
+
z.number().int(),
|
|
36
|
+
z.number().int(),
|
|
37
|
+
z.number().int(),
|
|
38
|
+
])
|
|
39
|
+
.optional(),
|
|
40
|
+
scroll_amount: z.number().optional(),
|
|
41
|
+
scroll_direction: z.enum(['up', 'down', 'left', 'right']).optional(),
|
|
42
|
+
start_coordinate: z
|
|
43
|
+
.tuple([z.number().int(), z.number().int()])
|
|
44
|
+
.optional(),
|
|
45
|
+
text: z.string().optional(),
|
|
46
|
+
}),
|
|
47
|
+
),
|
|
48
|
+
);
|
|
49
|
+
|
|
50
|
+
export const computer_20251124 = createProviderToolFactory<
|
|
51
|
+
{
|
|
52
|
+
/**
|
|
53
|
+
* - `key`: Press a key or key-combination on the keyboard.
|
|
54
|
+
* - This supports xdotool's `key` syntax.
|
|
55
|
+
* - Examples: "a", "Return", "alt+Tab", "ctrl+s", "Up", "KP_0" (for the numpad 0 key).
|
|
56
|
+
* - `hold_key`: Hold down a key or multiple keys for a specified duration (in seconds). Supports the same syntax as `key`.
|
|
57
|
+
* - `type`: Type a string of text on the keyboard.
|
|
58
|
+
* - `cursor_position`: Get the current (x, y) pixel coordinate of the cursor on the screen.
|
|
59
|
+
* - `mouse_move`: Move the cursor to a specified (x, y) pixel coordinate on the screen.
|
|
60
|
+
* - `left_mouse_down`: Press the left mouse button.
|
|
61
|
+
* - `left_mouse_up`: Release the left mouse button.
|
|
62
|
+
* - `left_click`: Click the left mouse button at the specified (x, y) pixel coordinate on the screen. You can also include a key combination to hold down while clicking using the `text` parameter.
|
|
63
|
+
* - `left_click_drag`: Click and drag the cursor from `start_coordinate` to a specified (x, y) pixel coordinate on the screen.
|
|
64
|
+
* - `right_click`: Click the right mouse button at the specified (x, y) pixel coordinate on the screen.
|
|
65
|
+
* - `middle_click`: Click the middle mouse button at the specified (x, y) pixel coordinate on the screen.
|
|
66
|
+
* - `double_click`: Double-click the left mouse button at the specified (x, y) pixel coordinate on the screen.
|
|
67
|
+
* - `triple_click`: Triple-click the left mouse button at the specified (x, y) pixel coordinate on the screen.
|
|
68
|
+
* - `scroll`: Scroll the screen in a specified direction by a specified amount of clicks of the scroll wheel, at the specified (x, y) pixel coordinate. DO NOT use PageUp/PageDown to scroll.
|
|
69
|
+
* - `wait`: Wait for a specified duration (in seconds).
|
|
70
|
+
* - `screenshot`: Take a screenshot of the screen.
|
|
71
|
+
* - `zoom`: View a specific region of the screen at full resolution. Requires `enableZoom: true` in tool definition. Takes a `region` parameter with coordinates `[x1, y1, x2, y2]` defining top-left and bottom-right corners of the area to inspect.
|
|
72
|
+
*/
|
|
73
|
+
action:
|
|
74
|
+
| 'key'
|
|
75
|
+
| 'hold_key'
|
|
76
|
+
| 'type'
|
|
77
|
+
| 'cursor_position'
|
|
78
|
+
| 'mouse_move'
|
|
79
|
+
| 'left_mouse_down'
|
|
80
|
+
| 'left_mouse_up'
|
|
81
|
+
| 'left_click'
|
|
82
|
+
| 'left_click_drag'
|
|
83
|
+
| 'right_click'
|
|
84
|
+
| 'middle_click'
|
|
85
|
+
| 'double_click'
|
|
86
|
+
| 'triple_click'
|
|
87
|
+
| 'scroll'
|
|
88
|
+
| 'wait'
|
|
89
|
+
| 'screenshot'
|
|
90
|
+
| 'zoom';
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* (x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates to move the mouse to. Required only by `action=mouse_move` and `action=left_click_drag`.
|
|
94
|
+
*/
|
|
95
|
+
coordinate?: [number, number];
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* The duration to hold the key down for. Required only by `action=hold_key` and `action=wait`.
|
|
99
|
+
*/
|
|
100
|
+
duration?: number;
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* [x1, y1, x2, y2]: The coordinates defining the region to zoom into. x1, y1 is the top-left corner and x2, y2 is the bottom-right corner. Required only by `action=zoom`.
|
|
104
|
+
*/
|
|
105
|
+
region?: [number, number, number, number];
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* The number of 'clicks' to scroll. Required only by `action=scroll`.
|
|
109
|
+
*/
|
|
110
|
+
scroll_amount?: number;
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* The direction to scroll the screen. Required only by `action=scroll`.
|
|
114
|
+
*/
|
|
115
|
+
scroll_direction?: 'up' | 'down' | 'left' | 'right';
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* (x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates to start the drag from. Required only by `action=left_click_drag`.
|
|
119
|
+
*/
|
|
120
|
+
start_coordinate?: [number, number];
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Required only by `action=type`, `action=key`, and `action=hold_key`. Can also be used by click or scroll actions to hold down keys while clicking or scrolling.
|
|
124
|
+
*/
|
|
125
|
+
text?: string;
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
/**
|
|
129
|
+
* The width of the display being controlled by the model in pixels.
|
|
130
|
+
*/
|
|
131
|
+
displayWidthPx: number;
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* The height of the display being controlled by the model in pixels.
|
|
135
|
+
*/
|
|
136
|
+
displayHeightPx: number;
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* The display number to control (only relevant for X11 environments). If specified, the tool will be provided a display number in the tool definition.
|
|
140
|
+
*/
|
|
141
|
+
displayNumber?: number;
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Enable zoom action. Set to true to allow Claude to zoom into specific screen regions. Default: false.
|
|
145
|
+
*/
|
|
146
|
+
enableZoom?: boolean;
|
|
147
|
+
}
|
|
148
|
+
>({
|
|
149
|
+
id: 'anthropic.computer_20251124',
|
|
150
|
+
inputSchema: computer_20251124InputSchema,
|
|
151
|
+
});
|