askui 0.25.1 → 0.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/dist/cjs/core/models/anthropic/askui-agent.js +20 -32
  2. package/dist/cjs/core/models/anthropic/claude-agent.d.ts +16 -4
  3. package/dist/cjs/core/models/anthropic/claude-agent.js +43 -5
  4. package/dist/cjs/core/models/anthropic/index.d.ts +1 -2
  5. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +59 -5
  6. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +340 -75
  7. package/dist/cjs/core/ui-control-commands/input-event.d.ts +3 -1
  8. package/dist/cjs/core/ui-control-commands/input-event.js +2 -0
  9. package/dist/cjs/execution/execution-runtime.d.ts +4 -0
  10. package/dist/cjs/execution/inference-client.d.ts +4 -0
  11. package/dist/cjs/execution/ui-control-client.d.ts +84 -34
  12. package/dist/cjs/execution/ui-control-client.js +4 -48
  13. package/dist/esm/core/models/anthropic/askui-agent.js +21 -33
  14. package/dist/esm/core/models/anthropic/claude-agent.d.ts +16 -4
  15. package/dist/esm/core/models/anthropic/claude-agent.js +43 -5
  16. package/dist/esm/core/models/anthropic/index.d.ts +1 -2
  17. package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +59 -5
  18. package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +332 -72
  19. package/dist/esm/core/ui-control-commands/input-event.d.ts +3 -1
  20. package/dist/esm/core/ui-control-commands/input-event.js +2 -0
  21. package/dist/esm/execution/execution-runtime.d.ts +4 -0
  22. package/dist/esm/execution/inference-client.d.ts +4 -0
  23. package/dist/esm/execution/ui-control-client.d.ts +84 -34
  24. package/dist/esm/execution/ui-control-client.js +4 -48
  25. package/package.json +2 -2
@@ -15,9 +15,40 @@ import { AgentError } from './agent-errors';
15
15
  export class OsAgentHandler {
16
16
  constructor(AgentOsClient, screenDimensions) {
17
17
  this.AgentOsClient = AgentOsClient;
18
- this.TargetResolution = { width: 1280, height: 800 };
18
+ this.targetResolution = { width: 1280, height: 800 };
19
+ this.paddingInfo = null;
19
20
  this.screenDimensions = screenDimensions;
21
+ this.updatePaddingInfo();
22
+ }
23
+ updatePaddingInfo() {
24
+ const targetAspectRatio = this.targetResolution.width / this.targetResolution.height;
25
+ const screenAspectRatio = this.screenDimensions.width / this.screenDimensions.height;
26
+ let scaledWidth;
27
+ let scaledHeight;
28
+ let scaleFactor;
29
+ let padLeft = 0;
30
+ let padTop = 0;
31
+ if (targetAspectRatio > screenAspectRatio) {
32
+ scaleFactor = this.targetResolution.height / this.screenDimensions.height;
33
+ scaledWidth = Math.floor(this.screenDimensions.width * scaleFactor);
34
+ scaledHeight = this.targetResolution.height;
35
+ padLeft = Math.floor((this.targetResolution.width - scaledWidth) / 2);
36
+ }
37
+ else {
38
+ scaleFactor = this.targetResolution.width / this.screenDimensions.width;
39
+ scaledWidth = this.targetResolution.width;
40
+ scaledHeight = Math.floor(this.screenDimensions.height * scaleFactor);
41
+ padTop = Math.floor((this.targetResolution.height - scaledHeight) / 2);
42
+ }
43
+ this.paddingInfo = {
44
+ scaleFactor,
45
+ scaledWidth,
46
+ scaledHeight,
47
+ padLeft,
48
+ padTop
49
+ };
20
50
  }
51
+ // Add image support to act, an check for function overload in typescript.
21
52
  static createInstance(AgentOsClient) {
22
53
  return __awaiter(this, void 0, void 0, function* () {
23
54
  const base64ImageString = yield AgentOsClient.getScreenshot();
@@ -29,10 +60,14 @@ export class OsAgentHandler {
29
60
  });
30
61
  }
31
62
  getTargetResolution() {
32
- return this.TargetResolution;
63
+ return this.targetResolution;
64
+ }
65
+ getScreenDimensions() {
66
+ return this.screenDimensions;
33
67
  }
34
68
  setTargetResolution(width, height) {
35
- this.TargetResolution = { width, height };
69
+ this.targetResolution = { width, height };
70
+ this.updatePaddingInfo();
36
71
  }
37
72
  takeScreenshot() {
38
73
  return __awaiter(this, void 0, void 0, function* () {
@@ -43,36 +78,130 @@ export class OsAgentHandler {
43
78
  width: image_info.width,
44
79
  height: image_info.height,
45
80
  };
46
- const resized_image = yield base64Image.resizeWithSameAspectRatio(this.TargetResolution.width, this.TargetResolution.height);
81
+ this.updatePaddingInfo();
82
+ const resized_image = yield base64Image.resizeWithSameAspectRatio(this.targetResolution.width, this.targetResolution.height);
47
83
  return resized_image.toString(false);
48
84
  });
49
85
  }
50
86
  scaleCoordinates(source, x, y) {
51
- const xScalingFactor = this.TargetResolution.width / this.screenDimensions.width;
52
- const yScalingFactor = this.TargetResolution.height / this.screenDimensions.height;
87
+ if (!this.paddingInfo) {
88
+ throw new ToolError('Padding information not initialized');
89
+ }
90
+ const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop } = this.paddingInfo;
53
91
  if (source === 'api') {
54
- if (x > this.TargetResolution.width || y > this.TargetResolution.height || x < 0 || y < 0) {
92
+ if (x > this.targetResolution.width || y > this.targetResolution.height || x < 0 || y < 0) {
55
93
  throw new ToolError(`Coordinates ${x}, ${y} are outside screen bounds `
56
- + `(${this.TargetResolution.width}x${this.TargetResolution.height})`);
94
+ + `(${this.targetResolution.width}x${this.targetResolution.height})`);
95
+ }
96
+ const adjustedX = x - padLeft;
97
+ const adjustedY = y - padTop;
98
+ if (adjustedX < 0 || adjustedX > scaledWidth || adjustedY < 0 || adjustedY > scaledHeight) {
99
+ throw new ToolError(`Coordinates ${x}, ${y} are outside the scaled image area `
100
+ + `(${scaledWidth}x${scaledHeight} with padding ${padLeft},${padTop})`);
57
101
  }
58
102
  return [
59
- Math.round(x / xScalingFactor),
60
- Math.round(y / yScalingFactor),
103
+ Math.round(adjustedX / scaleFactor),
104
+ Math.round(adjustedY / scaleFactor),
61
105
  ];
62
106
  }
63
- return [
64
- Math.round(x * xScalingFactor),
65
- Math.round(y * yScalingFactor),
66
- ];
107
+ const apiX = Math.round(x * scaleFactor) + padLeft;
108
+ const apiY = Math.round(y * scaleFactor) + padTop;
109
+ return [apiX, apiY];
67
110
  }
68
111
  requestControl(controlCommand) {
69
112
  return __awaiter(this, void 0, void 0, function* () {
70
113
  for (const action of controlCommand.actions) {
71
- [action.position.x, action.position.y] = this.scaleCoordinates('api', action.position.x, action.position.y);
114
+ if (action.inputEvent === InputEvent.MOUSE_MOVE || action.inputEvent === InputEvent.MOUSE_SCROLL) {
115
+ [action.position.x, action.position.y] = this.scaleCoordinates('api', action.position.x, action.position.y);
116
+ }
72
117
  }
73
118
  yield this.AgentOsClient.requestControl(controlCommand);
74
119
  });
75
120
  }
121
+ mouseMove(x, y) {
122
+ return __awaiter(this, void 0, void 0, function* () {
123
+ const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_MOVE, { x, y }, '', {})]);
124
+ yield this.requestControl(controlCommand);
125
+ });
126
+ }
127
+ mouseClick(button, doubleClick) {
128
+ return __awaiter(this, void 0, void 0, function* () {
129
+ let action = InputEvent.MOUSE_CLICK_LEFT;
130
+ if (doubleClick) {
131
+ if (button === "left") {
132
+ action = InputEvent.MOUSE_CLICK_DOUBLE_LEFT;
133
+ }
134
+ else if (button === "right") {
135
+ action = InputEvent.MOUSE_CLICK_DOUBLE_RIGHT;
136
+ }
137
+ else if (button === "middle") {
138
+ action = InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE;
139
+ }
140
+ }
141
+ else {
142
+ if (button === "right") {
143
+ action = InputEvent.MOUSE_CLICK_RIGHT;
144
+ }
145
+ else if (button === "middle") {
146
+ action = InputEvent.MOUSE_CLICK_MIDDLE;
147
+ }
148
+ }
149
+ const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(action, { x: 0, y: 0 }, '', {})]);
150
+ yield this.requestControl(controlCommand);
151
+ });
152
+ }
153
+ mouseScroll(dx, dy) {
154
+ return __awaiter(this, void 0, void 0, function* () {
155
+ const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_SCROLL, { x: dx, y: dy }, '', {})]);
156
+ yield this.requestControl(controlCommand);
157
+ });
158
+ }
159
+ mouseHoldLeftButtonDown() {
160
+ return __awaiter(this, void 0, void 0, function* () {
161
+ const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_DOWN, { x: 0, y: 0 }, '', {})]);
162
+ yield this.requestControl(controlCommand);
163
+ });
164
+ }
165
+ mouseReleaseLeftButton() {
166
+ return __awaiter(this, void 0, void 0, function* () {
167
+ const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_UP, { x: 0, y: 0 }, '', {})]);
168
+ yield this.requestControl(controlCommand);
169
+ });
170
+ }
171
+ desktopKeyPressAndRelease(key_1) {
172
+ return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
173
+ let keyString = key;
174
+ if (modifiers.length > 0) {
175
+ keyString = `${modifiers.join('+')}+${key}`;
176
+ }
177
+ const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, keyString, {})]);
178
+ yield this.requestControl(controlCommand);
179
+ });
180
+ }
181
+ desktopKeyHoldDown(key_1) {
182
+ return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
183
+ const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.KEY_PRESS, { x: 0, y: 0 }, '', {
184
+ key: key,
185
+ modifiers: modifiers,
186
+ })]);
187
+ yield this.requestControl(controlCommand);
188
+ });
189
+ }
190
+ desktopKeyRelease(key_1) {
191
+ return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
192
+ const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.KEY_RELEASE, { x: 0, y: 0 }, '', {
193
+ key: key,
194
+ modifiers: modifiers,
195
+ })]);
196
+ yield this.requestControl(controlCommand);
197
+ });
198
+ }
199
+ typeText(text) {
200
+ return __awaiter(this, void 0, void 0, function* () {
201
+ const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.TYPE, { x: 0, y: 0 }, text, {})]);
202
+ yield this.requestControl(controlCommand);
203
+ });
204
+ }
76
205
  }
77
206
  export class ScreenShotTool extends BaseAgentTool {
78
207
  constructor(osAgentHandler) {
@@ -91,7 +220,7 @@ export class ScreenShotTool extends BaseAgentTool {
91
220
  toParams() {
92
221
  return {
93
222
  name: 'screenshot_tool',
94
- description: 'Takes a screenshot of the current screen and returns it as a base64 image',
223
+ description: 'Takes a screenshot of the current screen and returns it as a base64 image.',
95
224
  input_schema: { type: 'object', properties: {}, required: [] },
96
225
  };
97
226
  }
@@ -103,8 +232,7 @@ export class MouseMoveTool extends BaseAgentTool {
103
232
  }
104
233
  execute(command) {
105
234
  return __awaiter(this, void 0, void 0, function* () {
106
- const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_MOVE, { x: command.x, y: command.y }, '', {})]);
107
- yield this.osAgentHandler.requestControl(controlCommand);
235
+ yield this.osAgentHandler.mouseMove(command.x, command.y);
108
236
  return {
109
237
  output: `Moved mouse to (${command.x}, ${command.y})`,
110
238
  };
@@ -119,11 +247,11 @@ export class MouseMoveTool extends BaseAgentTool {
119
247
  properties: {
120
248
  x: {
121
249
  type: 'number',
122
- description: 'The x coordinate of the element to click on',
250
+ description: 'The x (pixels from the left edge) coordinate to move the mouse to',
123
251
  },
124
252
  y: {
125
253
  type: 'number',
126
- description: 'The y coordinate of the element to click on',
254
+ description: 'The y (pixels from the top edge) coordinate to move the mouse to',
127
255
  },
128
256
  },
129
257
  },
@@ -137,33 +265,7 @@ export class MouseClickTool extends BaseAgentTool {
137
265
  }
138
266
  execute(command) {
139
267
  return __awaiter(this, void 0, void 0, function* () {
140
- let controlCommand;
141
- if (command.doubleClick) {
142
- if (command.button === 'left') {
143
- controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_DOUBLE_LEFT, { x: 0, y: 0 }, '', {})]);
144
- }
145
- if (command.button === 'right') {
146
- controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_DOUBLE_RIGHT, { x: 0, y: 0 }, '', {})]);
147
- }
148
- if (command.button === 'middle') {
149
- controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE, { x: 0, y: 0 }, '', {})]);
150
- }
151
- }
152
- else {
153
- if (command.button === 'left') {
154
- controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_LEFT, { x: 0, y: 0 }, '', {})]);
155
- }
156
- if (command.button === 'right') {
157
- controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_RIGHT, { x: 0, y: 0 }, '', {})]);
158
- }
159
- if (command.button === 'middle') {
160
- controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_MIDDLE, { x: 0, y: 0 }, '', {})]);
161
- }
162
- }
163
- if (!controlCommand) {
164
- throw new ToolError('Invalid input parameter for mouse click tool');
165
- }
166
- yield this.osAgentHandler.requestControl(controlCommand);
268
+ yield this.osAgentHandler.mouseClick(command.button, command.doubleClick);
167
269
  const returnedMessage = command.doubleClick ? `Double clicked ${command.button} button` : `Clicked ${command.button} button`;
168
270
  return {
169
271
  output: returnedMessage,
@@ -199,8 +301,7 @@ export class MouseScrollTool extends BaseAgentTool {
199
301
  }
200
302
  execute(command) {
201
303
  return __awaiter(this, void 0, void 0, function* () {
202
- const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_SCROLL, { x: command.dx, y: command.dy }, '', {})]);
203
- yield this.osAgentHandler.requestControl(controlCommand);
304
+ yield this.osAgentHandler.mouseScroll(command.dx, command.dy);
204
305
  return {
205
306
  output: `Scrolled by (${command.dx}, ${command.dy})`,
206
307
  };
@@ -215,11 +316,11 @@ export class MouseScrollTool extends BaseAgentTool {
215
316
  properties: {
216
317
  dx: {
217
318
  type: 'number',
218
- description: 'The amount to scroll horizontally',
319
+ description: 'The amount to scroll horizontally (positive is right, negative is left)',
219
320
  },
220
321
  dy: {
221
322
  type: 'number',
222
- description: 'The amount to scroll vertically',
323
+ description: 'The amount to scroll vertically (positive is down, negative is up)',
223
324
  },
224
325
  },
225
326
  required: ['dx', 'dy'],
@@ -227,20 +328,104 @@ export class MouseScrollTool extends BaseAgentTool {
227
328
  };
228
329
  }
229
330
  }
230
- export class DesktopKeyPressSequenceTool extends BaseAgentTool {
331
+ export class MouseDragAndDropTool extends BaseAgentTool {
231
332
  constructor(osAgentHandler) {
232
333
  super();
233
334
  this.osAgentHandler = osAgentHandler;
234
335
  }
235
336
  execute(command) {
236
337
  return __awaiter(this, void 0, void 0, function* () {
237
- const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.key, {
238
- firstModifier: command.firstModifier || '',
239
- secondModifier: command.secondModifier || '',
240
- })]);
241
- yield this.osAgentHandler.requestControl(controlCommand);
338
+ yield this.osAgentHandler.mouseMove(command.startX, command.startY);
339
+ yield this.osAgentHandler.mouseHoldLeftButtonDown();
340
+ yield this.osAgentHandler.mouseMove(command.endX, command.endY);
341
+ yield this.osAgentHandler.mouseReleaseLeftButton();
242
342
  return {
243
- output: `Pressed key ${command.key} with modifiers ${command.firstModifier || ''} ${command.secondModifier || ''}`,
343
+ output: `Dragged from (${command.startX}, ${command.startY}) to (${command.endX}, ${command.endY})`,
344
+ };
345
+ });
346
+ }
347
+ toParams() {
348
+ return {
349
+ name: 'mouse_drag_and_drop_tool',
350
+ description: 'Drags the mouse from the specified start coordinates to the specified end coordinates. The top left corner of the screen is (0,0)',
351
+ input_schema: {
352
+ type: 'object',
353
+ properties: {
354
+ startX: {
355
+ type: 'number',
356
+ description: 'The x (pixels from the left edge) coordinate of the start position',
357
+ },
358
+ startY: {
359
+ type: 'number',
360
+ description: 'The y (pixels from the top edge) coordinate of the start position',
361
+ },
362
+ endX: {
363
+ type: 'number',
364
+ description: 'The x (pixels from the left edge) coordinate of the end position',
365
+ },
366
+ endY: {
367
+ type: 'number',
368
+ description: 'The y (pixels from the top edge) coordinate of the end position',
369
+ },
370
+ },
371
+ required: ['startX', 'startY', 'endX', 'endY'],
372
+ },
373
+ };
374
+ }
375
+ }
376
+ export class MouseHoldLeftButtonDownTool extends BaseAgentTool {
377
+ constructor(osAgentHandler) {
378
+ super();
379
+ this.osAgentHandler = osAgentHandler;
380
+ }
381
+ execute() {
382
+ return __awaiter(this, void 0, void 0, function* () {
383
+ yield this.osAgentHandler.mouseHoldLeftButtonDown();
384
+ return {
385
+ output: 'Holding down left mouse button',
386
+ };
387
+ });
388
+ }
389
+ toParams() {
390
+ return {
391
+ name: 'mouse_hold_left_button_down_tool',
392
+ description: 'Hold down the left mouse button at the current position.',
393
+ input_schema: { type: 'object', properties: {}, required: [] },
394
+ };
395
+ }
396
+ }
397
+ export class MouseReleaseLeftButtonTool extends BaseAgentTool {
398
+ constructor(osAgentHandler) {
399
+ super();
400
+ this.osAgentHandler = osAgentHandler;
401
+ }
402
+ execute() {
403
+ return __awaiter(this, void 0, void 0, function* () {
404
+ yield this.osAgentHandler.mouseReleaseLeftButton();
405
+ return {
406
+ output: 'Released left mouse button',
407
+ };
408
+ });
409
+ }
410
+ toParams() {
411
+ return {
412
+ name: 'mouse_release_left_button_tool',
413
+ description: 'Release the left mouse button at the current position.',
414
+ input_schema: { type: 'object', properties: {}, required: [] },
415
+ };
416
+ }
417
+ }
418
+ export class DesktopPressAndReleaseKeysTool extends BaseAgentTool {
419
+ constructor(osAgentHandler) {
420
+ super();
421
+ this.osAgentHandler = osAgentHandler;
422
+ }
423
+ execute(command) {
424
+ return __awaiter(this, void 0, void 0, function* () {
425
+ const modifiers = command.modifiers || [];
426
+ yield this.osAgentHandler.desktopKeyPressAndRelease(command.key, modifiers);
427
+ return {
428
+ output: `Pressed key ${command.key} with modifiers ${modifiers.join(' ')}`,
244
429
  };
245
430
  });
246
431
  }
@@ -256,15 +441,53 @@ export class DesktopKeyPressSequenceTool extends BaseAgentTool {
256
441
  enum: PC_KEY_VALUES,
257
442
  description: 'The key to press',
258
443
  },
259
- firstModifier: {
260
- type: 'string',
261
- enum: MODIFIER_KEY_VALUES,
262
- description: 'The first modifier key',
444
+ modifiers: {
445
+ type: 'array',
446
+ items: {
447
+ type: 'string',
448
+ enum: MODIFIER_KEY_VALUES,
449
+ },
450
+ description: 'The modifiers to press',
263
451
  },
264
- secondModifier: {
452
+ },
453
+ required: ['key'],
454
+ },
455
+ };
456
+ }
457
+ }
458
+ export class DesktopKeyHoldDownTool extends BaseAgentTool {
459
+ constructor(osAgentHandler) {
460
+ super();
461
+ this.osAgentHandler = osAgentHandler;
462
+ }
463
+ execute(command) {
464
+ return __awaiter(this, void 0, void 0, function* () {
465
+ const modifiers = command.modifiers || [];
466
+ yield this.osAgentHandler.desktopKeyHoldDown(command.key, modifiers);
467
+ return {
468
+ output: `Holding down key ${command.key} with modifiers ${modifiers.join(' ')}`,
469
+ };
470
+ });
471
+ }
472
+ toParams() {
473
+ return {
474
+ name: 'desktop_key_hold_down_tool',
475
+ description: 'Hold down a key and optional modifiers. Keys will be still pressed after the tool is finished.',
476
+ input_schema: {
477
+ type: 'object',
478
+ properties: {
479
+ key: {
265
480
  type: 'string',
266
- enum: MODIFIER_KEY_VALUES,
267
- description: 'The second modifier key',
481
+ enum: [...PC_KEY_VALUES, ...MODIFIER_KEY_VALUES],
482
+ description: 'The key to hold down',
483
+ },
484
+ modifiers: {
485
+ type: 'array',
486
+ items: {
487
+ type: 'string',
488
+ enum: MODIFIER_KEY_VALUES,
489
+ },
490
+ description: 'The modifiers to hold down',
268
491
  },
269
492
  },
270
493
  required: ['key'],
@@ -272,31 +495,39 @@ export class DesktopKeyPressSequenceTool extends BaseAgentTool {
272
495
  };
273
496
  }
274
497
  }
275
- export class DesktopSingleKeyPressTool extends BaseAgentTool {
498
+ export class DesktopKeyReleaseTool extends BaseAgentTool {
276
499
  constructor(osAgentHandler) {
277
500
  super();
278
501
  this.osAgentHandler = osAgentHandler;
279
502
  }
280
503
  execute(command) {
281
504
  return __awaiter(this, void 0, void 0, function* () {
282
- const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.key, {})]);
283
- yield this.osAgentHandler.requestControl(controlCommand);
505
+ const modifiers = command.modifiers || [];
506
+ yield this.osAgentHandler.desktopKeyRelease(command.key, modifiers);
284
507
  return {
285
- output: `Pressed key ${command.key}`,
508
+ output: `Released key ${command.key} with modifiers ${modifiers.join(' ')}`,
286
509
  };
287
510
  });
288
511
  }
289
512
  toParams() {
290
513
  return {
291
- name: 'desktop_single_key_press_tool',
292
- description: 'Presses a single key',
514
+ name: 'desktop_key_release_tool',
515
+ description: 'Releases a key and optional modifiers. This can be used after keys were held down with the desktop_key_hold_down_tool',
293
516
  input_schema: {
294
517
  type: 'object',
295
518
  properties: {
296
519
  key: {
297
520
  type: 'string',
298
521
  enum: [...PC_KEY_VALUES, ...MODIFIER_KEY_VALUES],
299
- description: 'The key to press',
522
+ description: 'The key to release',
523
+ },
524
+ modifiers: {
525
+ type: 'array',
526
+ items: {
527
+ type: 'string',
528
+ enum: MODIFIER_KEY_VALUES,
529
+ },
530
+ description: 'The modifiers to release',
300
531
  },
301
532
  },
302
533
  required: ['key'],
@@ -459,3 +690,32 @@ export class ExecuteShellCommandTool extends BaseAgentTool {
459
690
  };
460
691
  }
461
692
  }
693
+ export class WaitTool extends BaseAgentTool {
694
+ constructor() {
695
+ super();
696
+ }
697
+ execute(command) {
698
+ return __awaiter(this, void 0, void 0, function* () {
699
+ yield new Promise(resolve => setTimeout(resolve, command.milliseconds));
700
+ return {
701
+ output: `Waited for ${command.milliseconds} milliseconds`,
702
+ };
703
+ });
704
+ }
705
+ toParams() {
706
+ return {
707
+ name: 'wait_tool',
708
+ description: 'Waits for a specified number of milliseconds',
709
+ input_schema: {
710
+ type: 'object',
711
+ properties: {
712
+ milliseconds: {
713
+ type: 'number',
714
+ description: 'The number of milliseconds to wait',
715
+ },
716
+ },
717
+ required: ['milliseconds'],
718
+ },
719
+ };
720
+ }
721
+ }
@@ -16,5 +16,7 @@ export declare enum InputEvent {
16
16
  MOUSE_MOVE = "MOUSE_MOVE",
17
17
  MOUSE_DOWN = "MOUSE_DOWN",
18
18
  MOUSE_UP = "MOUSE_UP",
19
- EXECUTE_COMMAND = "EXECUTE_COMMAND"
19
+ EXECUTE_COMMAND = "EXECUTE_COMMAND",
20
+ KEY_PRESS = "KEY_PRESS",
21
+ KEY_RELEASE = "KEY_RELEASE"
20
22
  }
@@ -18,4 +18,6 @@ export var InputEvent;
18
18
  InputEvent["MOUSE_DOWN"] = "MOUSE_DOWN";
19
19
  InputEvent["MOUSE_UP"] = "MOUSE_UP";
20
20
  InputEvent["EXECUTE_COMMAND"] = "EXECUTE_COMMAND";
21
+ InputEvent["KEY_PRESS"] = "KEY_PRESS";
22
+ InputEvent["KEY_RELEASE"] = "KEY_RELEASE";
21
23
  })(InputEvent || (InputEvent = {}));
@@ -47,5 +47,9 @@ export declare class ExecutionRuntime {
47
47
  system?: string;
48
48
  tools?: object[];
49
49
  betas?: string[];
50
+ tool_choice?: {
51
+ type: 'tool' | 'any' | 'auto';
52
+ name?: string;
53
+ };
50
54
  }): Promise<BetaMessage>;
51
55
  }
@@ -26,6 +26,10 @@ export declare class InferenceClient {
26
26
  predictVQAAnswer(prompt: string, image: string, config?: object): Promise<any>;
27
27
  predictActResponse(params: {
28
28
  max_tokens: number;
29
+ tool_choice?: {
30
+ type: 'tool' | 'any' | 'auto';
31
+ name?: string;
32
+ };
29
33
  messages: BetaMessageParam[];
30
34
  model: string;
31
35
  system?: string;