@ai-sdk/anthropic 3.0.24 → 3.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -510,12 +510,13 @@ Parameters:
510
510
  The Computer Tool enables control of keyboard and mouse actions on a computer:
511
511
 
512
512
  ```ts
513
- const computerTool = anthropic.tools.computer_20241022({
513
+ const computerTool = anthropic.tools.computer_20251124({
514
514
  displayWidthPx: 1920,
515
515
  displayHeightPx: 1080,
516
516
  displayNumber: 0, // Optional, for X11 environments
517
+ enableZoom: true, // Optional, enables the zoom action
517
518
 
518
- execute: async ({ action, coordinate, text }) => {
519
+ execute: async ({ action, coordinate, text, region }) => {
519
520
  // Implement your computer control logic here
520
521
  // Return the result of the action
521
522
 
@@ -530,6 +531,13 @@ const computerTool = anthropic.tools.computer_20241022({
530
531
  .toString('base64'),
531
532
  };
532
533
  }
534
+ case 'zoom': {
535
+ // region is [x1, y1, x2, y2] defining the area to zoom into
536
+ return {
537
+ type: 'image',
538
+ data: fs.readFileSync('./data/zoomed-region.png').toString('base64'),
539
+ };
540
+ }
533
541
  default: {
534
542
  console.log('Action:', action);
535
543
  console.log('Coordinate:', coordinate);
@@ -548,13 +556,22 @@ const computerTool = anthropic.tools.computer_20241022({
548
556
  });
549
557
  ```
550
558
 
559
+ <Note>
560
+ Use `computer_20251124` for Claude Opus 4.5 which supports the zoom action.
561
+ Use `computer_20250124` for Claude Sonnet 4.5, Haiku 4.5, Opus 4.1, Sonnet 4,
562
+ Opus 4, and Sonnet 3.7.
563
+ </Note>
564
+
551
565
  Parameters:
552
566
 
553
- - `action` ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position'): The action to perform.
567
+ - `action` ('key' | 'type' | 'mouse_move' | 'left_click' | 'left_click_drag' | 'right_click' | 'middle_click' | 'double_click' | 'screenshot' | 'cursor_position' | 'zoom'): The action to perform. The `zoom` action is only available with `computer_20251124`.
554
568
  - `coordinate` (number[], optional): Required for `mouse_move` and `left_click_drag` actions. Specifies the (x, y) coordinates.
555
569
  - `text` (string, optional): Required for `type` and `key` actions.
556
-
557
- These tools can be used in conjunction with the `sonnet-3-5-sonnet-20240620` model to enable more complex interactions and tasks.
570
+ - `region` (number[], optional): Required for `zoom` action. Specifies `[x1, y1, x2, y2]` coordinates for the area to inspect.
571
+ - `displayWidthPx` (number): The width of the display in pixels.
572
+ - `displayHeightPx` (number): The height of the display in pixels.
573
+ - `displayNumber` (number, optional): The display number for X11 environments.
574
+ - `enableZoom` (boolean, optional): Enable the zoom action. Only available with `computer_20251124`. Default: `false`.
558
575
 
559
576
  ### Web Search Tool
560
577
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-sdk/anthropic",
3
- "version": "3.0.24",
3
+ "version": "3.0.25",
4
4
  "license": "Apache-2.0",
5
5
  "sideEffects": false,
6
6
  "main": "./dist/index.js",
@@ -360,6 +360,15 @@ export type AnthropicTool =
360
360
  display_number: number;
361
361
  cache_control: AnthropicCacheControl | undefined;
362
362
  }
363
+ | {
364
+ name: string;
365
+ type: 'computer_20251124';
366
+ display_width_px: number;
367
+ display_height_px: number;
368
+ display_number: number;
369
+ enable_zoom?: boolean;
370
+ cache_control: AnthropicCacheControl | undefined;
371
+ }
363
372
  | {
364
373
  name: string;
365
374
  type:
@@ -131,6 +131,19 @@ export async function prepareTools({
131
131
  });
132
132
  break;
133
133
  }
134
+ case 'anthropic.computer_20251124': {
135
+ betas.add('computer-use-2025-11-24');
136
+ anthropicTools.push({
137
+ name: 'computer',
138
+ type: 'computer_20251124',
139
+ display_width_px: tool.args.displayWidthPx as number,
140
+ display_height_px: tool.args.displayHeightPx as number,
141
+ display_number: tool.args.displayNumber as number,
142
+ enable_zoom: tool.args.enableZoom as boolean,
143
+ cache_control: undefined,
144
+ });
145
+ break;
146
+ }
134
147
  case 'anthropic.computer_20241022': {
135
148
  betas.add('computer-use-2024-10-22');
136
149
  anthropicTools.push({
@@ -4,6 +4,7 @@ import { codeExecution_20250522 } from './tool/code-execution_20250522';
4
4
  import { codeExecution_20250825 } from './tool/code-execution_20250825';
5
5
  import { computer_20241022 } from './tool/computer_20241022';
6
6
  import { computer_20250124 } from './tool/computer_20250124';
7
+ import { computer_20251124 } from './tool/computer_20251124';
7
8
  import { memory_20250818 } from './tool/memory_20250818';
8
9
  import { textEditor_20241022 } from './tool/text-editor_20241022';
9
10
  import { textEditor_20250124 } from './tool/text-editor_20250124';
@@ -77,6 +78,23 @@ export const anthropicTools = {
77
78
  */
78
79
  computer_20250124,
79
80
 
81
+ /**
82
+ * Claude can interact with computer environments through the computer use tool, which
83
+ * provides screenshot capabilities and mouse/keyboard control for autonomous desktop interaction.
84
+ *
85
+ * This version adds the zoom action for detailed screen region inspection.
86
+ *
87
+ * Image results are supported.
88
+ *
89
+ * Supported models: Claude Opus 4.5
90
+ *
91
+ * @param displayWidthPx - The width of the display being controlled by the model in pixels.
92
+ * @param displayHeightPx - The height of the display being controlled by the model in pixels.
93
+ * @param displayNumber - The display number to control (only relevant for X11 environments). If specified, the tool will be provided a display number in the tool definition.
94
+ * @param enableZoom - Enable zoom action. Set to true to allow Claude to zoom into specific screen regions. Default: false.
95
+ */
96
+ computer_20251124,
97
+
80
98
  /**
81
99
  * The memory tool enables Claude to store and retrieve information across conversations through a memory file directory.
82
100
  * Claude can create, read, update, and delete files that persist between sessions,
@@ -0,0 +1,151 @@
1
+ import {
2
+ createProviderToolFactory,
3
+ lazySchema,
4
+ zodSchema,
5
+ } from '@ai-sdk/provider-utils';
6
+ import { z } from 'zod/v4';
7
+
8
+ const computer_20251124InputSchema = lazySchema(() =>
9
+ zodSchema(
10
+ z.object({
11
+ action: z.enum([
12
+ 'key',
13
+ 'hold_key',
14
+ 'type',
15
+ 'cursor_position',
16
+ 'mouse_move',
17
+ 'left_mouse_down',
18
+ 'left_mouse_up',
19
+ 'left_click',
20
+ 'left_click_drag',
21
+ 'right_click',
22
+ 'middle_click',
23
+ 'double_click',
24
+ 'triple_click',
25
+ 'scroll',
26
+ 'wait',
27
+ 'screenshot',
28
+ 'zoom',
29
+ ]),
30
+ coordinate: z.tuple([z.number().int(), z.number().int()]).optional(),
31
+ duration: z.number().optional(),
32
+ region: z
33
+ .tuple([
34
+ z.number().int(),
35
+ z.number().int(),
36
+ z.number().int(),
37
+ z.number().int(),
38
+ ])
39
+ .optional(),
40
+ scroll_amount: z.number().optional(),
41
+ scroll_direction: z.enum(['up', 'down', 'left', 'right']).optional(),
42
+ start_coordinate: z
43
+ .tuple([z.number().int(), z.number().int()])
44
+ .optional(),
45
+ text: z.string().optional(),
46
+ }),
47
+ ),
48
+ );
49
+
50
+ export const computer_20251124 = createProviderToolFactory<
51
+ {
52
+ /**
53
+ * - `key`: Press a key or key-combination on the keyboard.
54
+ * - This supports xdotool's `key` syntax.
55
+ * - Examples: "a", "Return", "alt+Tab", "ctrl+s", "Up", "KP_0" (for the numpad 0 key).
56
+ * - `hold_key`: Hold down a key or multiple keys for a specified duration (in seconds). Supports the same syntax as `key`.
57
+ * - `type`: Type a string of text on the keyboard.
58
+ * - `cursor_position`: Get the current (x, y) pixel coordinate of the cursor on the screen.
59
+ * - `mouse_move`: Move the cursor to a specified (x, y) pixel coordinate on the screen.
60
+ * - `left_mouse_down`: Press the left mouse button.
61
+ * - `left_mouse_up`: Release the left mouse button.
62
+ * - `left_click`: Click the left mouse button at the specified (x, y) pixel coordinate on the screen. You can also include a key combination to hold down while clicking using the `text` parameter.
63
+ * - `left_click_drag`: Click and drag the cursor from `start_coordinate` to a specified (x, y) pixel coordinate on the screen.
64
+ * - `right_click`: Click the right mouse button at the specified (x, y) pixel coordinate on the screen.
65
+ * - `middle_click`: Click the middle mouse button at the specified (x, y) pixel coordinate on the screen.
66
+ * - `double_click`: Double-click the left mouse button at the specified (x, y) pixel coordinate on the screen.
67
+ * - `triple_click`: Triple-click the left mouse button at the specified (x, y) pixel coordinate on the screen.
68
+ * - `scroll`: Scroll the screen in a specified direction by a specified amount of clicks of the scroll wheel, at the specified (x, y) pixel coordinate. DO NOT use PageUp/PageDown to scroll.
69
+ * - `wait`: Wait for a specified duration (in seconds).
70
+ * - `screenshot`: Take a screenshot of the screen.
71
+ * - `zoom`: View a specific region of the screen at full resolution. Requires `enableZoom: true` in tool definition. Takes a `region` parameter with coordinates `[x1, y1, x2, y2]` defining top-left and bottom-right corners of the area to inspect.
72
+ */
73
+ action:
74
+ | 'key'
75
+ | 'hold_key'
76
+ | 'type'
77
+ | 'cursor_position'
78
+ | 'mouse_move'
79
+ | 'left_mouse_down'
80
+ | 'left_mouse_up'
81
+ | 'left_click'
82
+ | 'left_click_drag'
83
+ | 'right_click'
84
+ | 'middle_click'
85
+ | 'double_click'
86
+ | 'triple_click'
87
+ | 'scroll'
88
+ | 'wait'
89
+ | 'screenshot'
90
+ | 'zoom';
91
+
92
+ /**
93
+ * (x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates to move the mouse to. Required only by `action=mouse_move` and `action=left_click_drag`.
94
+ */
95
+ coordinate?: [number, number];
96
+
97
+ /**
98
+ * The duration to hold the key down for. Required only by `action=hold_key` and `action=wait`.
99
+ */
100
+ duration?: number;
101
+
102
+ /**
103
+ * [x1, y1, x2, y2]: The coordinates defining the region to zoom into. x1, y1 is the top-left corner and x2, y2 is the bottom-right corner. Required only by `action=zoom`.
104
+ */
105
+ region?: [number, number, number, number];
106
+
107
+ /**
108
+ * The number of 'clicks' to scroll. Required only by `action=scroll`.
109
+ */
110
+ scroll_amount?: number;
111
+
112
+ /**
113
+ * The direction to scroll the screen. Required only by `action=scroll`.
114
+ */
115
+ scroll_direction?: 'up' | 'down' | 'left' | 'right';
116
+
117
+ /**
118
+ * (x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates to start the drag from. Required only by `action=left_click_drag`.
119
+ */
120
+ start_coordinate?: [number, number];
121
+
122
+ /**
123
+ * Required only by `action=type`, `action=key`, and `action=hold_key`. Can also be used by click or scroll actions to hold down keys while clicking or scrolling.
124
+ */
125
+ text?: string;
126
+ },
127
+ {
128
+ /**
129
+ * The width of the display being controlled by the model in pixels.
130
+ */
131
+ displayWidthPx: number;
132
+
133
+ /**
134
+ * The height of the display being controlled by the model in pixels.
135
+ */
136
+ displayHeightPx: number;
137
+
138
+ /**
139
+ * The display number to control (only relevant for X11 environments). If specified, the tool will be provided a display number in the tool definition.
140
+ */
141
+ displayNumber?: number;
142
+
143
+ /**
144
+ * Enable zoom action. Set to true to allow Claude to zoom into specific screen regions. Default: false.
145
+ */
146
+ enableZoom?: boolean;
147
+ }
148
+ >({
149
+ id: 'anthropic.computer_20251124',
150
+ inputSchema: computer_20251124InputSchema,
151
+ });