askui 0.25.1 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/cjs/core/ai-element/ai-element-collection.d.ts +1 -0
  2. package/dist/cjs/core/ai-element/ai-element-collection.js +3 -0
  3. package/dist/cjs/core/models/anthropic/askui-agent.d.ts +2 -0
  4. package/dist/cjs/core/models/anthropic/askui-agent.js +27 -32
  5. package/dist/cjs/core/models/anthropic/claude-agent.d.ts +16 -4
  6. package/dist/cjs/core/models/anthropic/claude-agent.js +43 -5
  7. package/dist/cjs/core/models/anthropic/index.d.ts +2 -3
  8. package/dist/cjs/core/models/anthropic/tools/askui-api-tools.d.ts +19 -0
  9. package/dist/cjs/core/models/anthropic/tools/askui-api-tools.js +81 -0
  10. package/dist/cjs/core/models/anthropic/tools/base.d.ts +2 -0
  11. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +70 -6
  12. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +393 -84
  13. package/dist/cjs/core/ui-control-commands/input-event.d.ts +3 -1
  14. package/dist/cjs/core/ui-control-commands/input-event.js +2 -0
  15. package/dist/cjs/execution/execution-runtime.d.ts +4 -0
  16. package/dist/cjs/execution/inference-client.d.ts +4 -0
  17. package/dist/cjs/execution/ui-control-client.d.ts +112 -33
  18. package/dist/cjs/execution/ui-control-client.js +105 -45
  19. package/dist/cjs/lib/interactive_cli/create-example-project.js +1 -1
  20. package/dist/cjs/main.d.ts +1 -1
  21. package/dist/esm/core/ai-element/ai-element-collection.d.ts +1 -0
  22. package/dist/esm/core/ai-element/ai-element-collection.js +3 -0
  23. package/dist/esm/core/models/anthropic/askui-agent.d.ts +2 -0
  24. package/dist/esm/core/models/anthropic/askui-agent.js +28 -33
  25. package/dist/esm/core/models/anthropic/claude-agent.d.ts +16 -4
  26. package/dist/esm/core/models/anthropic/claude-agent.js +43 -5
  27. package/dist/esm/core/models/anthropic/index.d.ts +2 -3
  28. package/dist/esm/core/models/anthropic/index.js +1 -1
  29. package/dist/esm/core/models/anthropic/tools/askui-api-tools.d.ts +19 -0
  30. package/dist/esm/core/models/anthropic/tools/askui-api-tools.js +76 -0
  31. package/dist/esm/core/models/anthropic/tools/base.d.ts +2 -0
  32. package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +70 -6
  33. package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +384 -81
  34. package/dist/esm/core/ui-control-commands/input-event.d.ts +3 -1
  35. package/dist/esm/core/ui-control-commands/input-event.js +2 -0
  36. package/dist/esm/execution/execution-runtime.d.ts +4 -0
  37. package/dist/esm/execution/inference-client.d.ts +4 -0
  38. package/dist/esm/execution/ui-control-client.d.ts +112 -33
  39. package/dist/esm/execution/ui-control-client.js +105 -45
  40. package/dist/esm/lib/interactive_cli/create-example-project.js +1 -1
  41. package/dist/esm/main.d.ts +1 -1
  42. package/dist/esm/main.js +1 -1
  43. package/package.json +2 -2
@@ -9,7 +9,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
9
9
  });
10
10
  };
11
11
  Object.defineProperty(exports, "__esModule", { value: true });
12
- exports.ExecuteShellCommandTool = exports.AgentErrorTool = exports.AndroidSequenceKeyPressTool = exports.AndroidSingleKeyPressTool = exports.TypeTool = exports.DesktopSingleKeyPressTool = exports.DesktopKeyPressSequenceTool = exports.MouseScrollTool = exports.MouseClickTool = exports.MouseMoveTool = exports.ScreenShotTool = exports.OsAgentHandler = void 0;
12
+ exports.PrintTool = exports.WaitTool = exports.ExecuteShellCommandTool = exports.AgentErrorTool = exports.AndroidSequenceKeyPressTool = exports.AndroidSingleKeyPressTool = exports.TypeTool = exports.DesktopKeyReleaseTool = exports.DesktopKeyHoldDownTool = exports.DesktopPressAndReleaseKeysTool = exports.MouseReleaseLeftButtonTool = exports.MouseHoldLeftButtonDownTool = exports.MouseDragAndDropTool = exports.MouseScrollTool = exports.MouseClickTool = exports.MouseMoveTool = exports.ScreenShotTool = exports.OsAgentHandler = void 0;
13
13
  const dsl_1 = require("../../../../execution/dsl");
14
14
  const base_1 = require("./base");
15
15
  const ui_control_commands_1 = require("../../../ui-control-commands");
@@ -18,9 +18,40 @@ const agent_errors_1 = require("./agent-errors");
18
18
  class OsAgentHandler {
19
19
  constructor(AgentOsClient, screenDimensions) {
20
20
  this.AgentOsClient = AgentOsClient;
21
- this.TargetResolution = { width: 1280, height: 800 };
21
+ this.targetResolution = { width: 1280, height: 800 };
22
+ this.paddingInfo = null;
22
23
  this.screenDimensions = screenDimensions;
24
+ this.updatePaddingInfo();
25
+ }
26
+ updatePaddingInfo() {
27
+ const targetAspectRatio = this.targetResolution.width / this.targetResolution.height;
28
+ const screenAspectRatio = this.screenDimensions.width / this.screenDimensions.height;
29
+ let scaledWidth;
30
+ let scaledHeight;
31
+ let scaleFactor;
32
+ let padLeft = 0;
33
+ let padTop = 0;
34
+ if (targetAspectRatio > screenAspectRatio) {
35
+ scaleFactor = this.targetResolution.height / this.screenDimensions.height;
36
+ scaledWidth = Math.floor(this.screenDimensions.width * scaleFactor);
37
+ scaledHeight = this.targetResolution.height;
38
+ padLeft = Math.floor((this.targetResolution.width - scaledWidth) / 2);
39
+ }
40
+ else {
41
+ scaleFactor = this.targetResolution.width / this.screenDimensions.width;
42
+ scaledWidth = this.targetResolution.width;
43
+ scaledHeight = Math.floor(this.screenDimensions.height * scaleFactor);
44
+ padTop = Math.floor((this.targetResolution.height - scaledHeight) / 2);
45
+ }
46
+ this.paddingInfo = {
47
+ scaleFactor,
48
+ scaledWidth,
49
+ scaledHeight,
50
+ padLeft,
51
+ padTop
52
+ };
23
53
  }
54
+ // Add image support to act, an check for function overload in typescript.
24
55
  static createInstance(AgentOsClient) {
25
56
  return __awaiter(this, void 0, void 0, function* () {
26
57
  const base64ImageString = yield AgentOsClient.getScreenshot();
@@ -32,10 +63,14 @@ class OsAgentHandler {
32
63
  });
33
64
  }
34
65
  getTargetResolution() {
35
- return this.TargetResolution;
66
+ return this.targetResolution;
67
+ }
68
+ getScreenDimensions() {
69
+ return this.screenDimensions;
36
70
  }
37
71
  setTargetResolution(width, height) {
38
- this.TargetResolution = { width, height };
72
+ this.targetResolution = { width, height };
73
+ this.updatePaddingInfo();
39
74
  }
40
75
  takeScreenshot() {
41
76
  return __awaiter(this, void 0, void 0, function* () {
@@ -46,36 +81,148 @@ class OsAgentHandler {
46
81
  width: image_info.width,
47
82
  height: image_info.height,
48
83
  };
49
- const resized_image = yield base64Image.resizeWithSameAspectRatio(this.TargetResolution.width, this.TargetResolution.height);
84
+ this.updatePaddingInfo();
85
+ const resized_image = yield base64Image.resizeWithSameAspectRatio(this.targetResolution.width, this.targetResolution.height);
50
86
  return resized_image.toString(false);
51
87
  });
52
88
  }
53
89
  scaleCoordinates(source, x, y) {
54
- const xScalingFactor = this.TargetResolution.width / this.screenDimensions.width;
55
- const yScalingFactor = this.TargetResolution.height / this.screenDimensions.height;
90
+ if (!this.paddingInfo) {
91
+ throw new base_1.ToolError('Padding information not initialized');
92
+ }
93
+ const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop } = this.paddingInfo;
56
94
  if (source === 'api') {
57
- if (x > this.TargetResolution.width || y > this.TargetResolution.height || x < 0 || y < 0) {
95
+ if (x > this.targetResolution.width || y > this.targetResolution.height || x < 0 || y < 0) {
58
96
  throw new base_1.ToolError(`Coordinates ${x}, ${y} are outside screen bounds `
59
- + `(${this.TargetResolution.width}x${this.TargetResolution.height})`);
97
+ + `(${this.targetResolution.width}x${this.targetResolution.height})`);
98
+ }
99
+ const adjustedX = x - padLeft;
100
+ const adjustedY = y - padTop;
101
+ if (adjustedX < 0 || adjustedX > scaledWidth || adjustedY < 0 || adjustedY > scaledHeight) {
102
+ throw new base_1.ToolError(`Coordinates ${x}, ${y} are outside the scaled image area `
103
+ + `(${scaledWidth}x${scaledHeight} with padding ${padLeft},${padTop})`);
60
104
  }
61
105
  return [
62
- Math.round(x / xScalingFactor),
63
- Math.round(y / yScalingFactor),
106
+ Math.round(adjustedX / scaleFactor),
107
+ Math.round(adjustedY / scaleFactor),
64
108
  ];
65
109
  }
66
- return [
67
- Math.round(x * xScalingFactor),
68
- Math.round(y * yScalingFactor),
69
- ];
110
+ const apiX = Math.round(x * scaleFactor) + padLeft;
111
+ const apiY = Math.round(y * scaleFactor) + padTop;
112
+ return [apiX, apiY];
70
113
  }
71
114
  requestControl(controlCommand) {
72
115
  return __awaiter(this, void 0, void 0, function* () {
73
116
  for (const action of controlCommand.actions) {
74
- [action.position.x, action.position.y] = this.scaleCoordinates('api', action.position.x, action.position.y);
117
+ if (action.inputEvent === ui_control_commands_1.InputEvent.MOUSE_MOVE || action.inputEvent === ui_control_commands_1.InputEvent.MOUSE_SCROLL) {
118
+ [action.position.x, action.position.y] = this.scaleCoordinates('api', action.position.x, action.position.y);
119
+ }
75
120
  }
76
121
  yield this.AgentOsClient.requestControl(controlCommand);
77
122
  });
78
123
  }
124
+ mouseMove(x, y) {
125
+ return __awaiter(this, void 0, void 0, function* () {
126
+ const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_MOVE, { x, y }, '', {})]);
127
+ yield this.requestControl(controlCommand);
128
+ });
129
+ }
130
+ mouseClick(button, doubleClick) {
131
+ return __awaiter(this, void 0, void 0, function* () {
132
+ let action = ui_control_commands_1.InputEvent.MOUSE_CLICK_LEFT;
133
+ if (doubleClick) {
134
+ if (button === "left") {
135
+ action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_LEFT;
136
+ }
137
+ else if (button === "right") {
138
+ action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_RIGHT;
139
+ }
140
+ else if (button === "middle") {
141
+ action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE;
142
+ }
143
+ }
144
+ else {
145
+ if (button === "right") {
146
+ action = ui_control_commands_1.InputEvent.MOUSE_CLICK_RIGHT;
147
+ }
148
+ else if (button === "middle") {
149
+ action = ui_control_commands_1.InputEvent.MOUSE_CLICK_MIDDLE;
150
+ }
151
+ }
152
+ const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(action, { x: 0, y: 0 }, '', {})]);
153
+ yield this.requestControl(controlCommand);
154
+ });
155
+ }
156
+ mouseScroll(dx, dy) {
157
+ return __awaiter(this, void 0, void 0, function* () {
158
+ const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_SCROLL, { x: dx, y: dy }, '', {})]);
159
+ yield this.requestControl(controlCommand);
160
+ });
161
+ }
162
+ mouseHoldLeftButtonDown() {
163
+ return __awaiter(this, void 0, void 0, function* () {
164
+ const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_DOWN, { x: 0, y: 0 }, '', {})]);
165
+ yield this.requestControl(controlCommand);
166
+ });
167
+ }
168
+ mouseReleaseLeftButton() {
169
+ return __awaiter(this, void 0, void 0, function* () {
170
+ const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_UP, { x: 0, y: 0 }, '', {})]);
171
+ yield this.requestControl(controlCommand);
172
+ });
173
+ }
174
+ desktopKeyPressAndRelease(key_1) {
175
+ return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
176
+ let keyString = key;
177
+ if (modifiers.length > 0) {
178
+ keyString = `${modifiers.join('+')}+${key}`;
179
+ }
180
+ const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, keyString, {})]);
181
+ yield this.requestControl(controlCommand);
182
+ });
183
+ }
184
+ desktopKeyHoldDown(key_1) {
185
+ return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
186
+ const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.KEY_PRESS, { x: 0, y: 0 }, '', {
187
+ key: key,
188
+ modifiers: modifiers,
189
+ })]);
190
+ yield this.requestControl(controlCommand);
191
+ });
192
+ }
193
+ desktopKeyRelease(key_1) {
194
+ return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
195
+ const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.KEY_RELEASE, { x: 0, y: 0 }, '', {
196
+ key: key,
197
+ modifiers: modifiers,
198
+ })]);
199
+ yield this.requestControl(controlCommand);
200
+ });
201
+ }
202
+ typeText(text) {
203
+ return __awaiter(this, void 0, void 0, function* () {
204
+ const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.TYPE, { x: 0, y: 0 }, text, {})]);
205
+ yield this.requestControl(controlCommand);
206
+ });
207
+ }
208
+ androidKeyPress(key) {
209
+ return __awaiter(this, void 0, void 0, function* () {
210
+ const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_ANDROID_SINGLE_KEY, { x: 0, y: 0 }, key, {})]);
211
+ yield this.requestControl(controlCommand);
212
+ });
213
+ }
214
+ androidKeySequencePress(keys) {
215
+ return __awaiter(this, void 0, void 0, function* () {
216
+ const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_ANDROID_KEY_SEQUENCE, { x: 0, y: 0 }, keys.join(' '), {})]);
217
+ yield this.requestControl(controlCommand);
218
+ });
219
+ }
220
+ executeShellCommand(command) {
221
+ return __awaiter(this, void 0, void 0, function* () {
222
+ const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.EXECUTE_COMMAND, { x: 0, y: 0 }, command, {})]);
223
+ yield this.requestControl(controlCommand);
224
+ });
225
+ }
79
226
  }
80
227
  exports.OsAgentHandler = OsAgentHandler;
81
228
  class ScreenShotTool extends base_1.BaseAgentTool {
@@ -95,7 +242,7 @@ class ScreenShotTool extends base_1.BaseAgentTool {
95
242
  toParams() {
96
243
  return {
97
244
  name: 'screenshot_tool',
98
- description: 'Takes a screenshot of the current screen and returns it as a base64 image',
245
+ description: 'Takes a screenshot of the current screen and returns it as a base64 image.',
99
246
  input_schema: { type: 'object', properties: {}, required: [] },
100
247
  };
101
248
  }
@@ -108,8 +255,7 @@ class MouseMoveTool extends base_1.BaseAgentTool {
108
255
  }
109
256
  execute(command) {
110
257
  return __awaiter(this, void 0, void 0, function* () {
111
- const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_MOVE, { x: command.x, y: command.y }, '', {})]);
112
- yield this.osAgentHandler.requestControl(controlCommand);
258
+ yield this.osAgentHandler.mouseMove(command.x, command.y);
113
259
  return {
114
260
  output: `Moved mouse to (${command.x}, ${command.y})`,
115
261
  };
@@ -124,11 +270,11 @@ class MouseMoveTool extends base_1.BaseAgentTool {
124
270
  properties: {
125
271
  x: {
126
272
  type: 'number',
127
- description: 'The x coordinate of the element to click on',
273
+ description: 'The x (pixels from the left edge) coordinate to move the mouse to',
128
274
  },
129
275
  y: {
130
276
  type: 'number',
131
- description: 'The y coordinate of the element to click on',
277
+ description: 'The y (pixels from the top edge) coordinate to move the mouse to',
132
278
  },
133
279
  },
134
280
  },
@@ -143,33 +289,7 @@ class MouseClickTool extends base_1.BaseAgentTool {
143
289
  }
144
290
  execute(command) {
145
291
  return __awaiter(this, void 0, void 0, function* () {
146
- let controlCommand;
147
- if (command.doubleClick) {
148
- if (command.button === 'left') {
149
- controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_LEFT, { x: 0, y: 0 }, '', {})]);
150
- }
151
- if (command.button === 'right') {
152
- controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_RIGHT, { x: 0, y: 0 }, '', {})]);
153
- }
154
- if (command.button === 'middle') {
155
- controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE, { x: 0, y: 0 }, '', {})]);
156
- }
157
- }
158
- else {
159
- if (command.button === 'left') {
160
- controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_LEFT, { x: 0, y: 0 }, '', {})]);
161
- }
162
- if (command.button === 'right') {
163
- controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_RIGHT, { x: 0, y: 0 }, '', {})]);
164
- }
165
- if (command.button === 'middle') {
166
- controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_MIDDLE, { x: 0, y: 0 }, '', {})]);
167
- }
168
- }
169
- if (!controlCommand) {
170
- throw new base_1.ToolError('Invalid input parameter for mouse click tool');
171
- }
172
- yield this.osAgentHandler.requestControl(controlCommand);
292
+ yield this.osAgentHandler.mouseClick(command.button, command.doubleClick);
173
293
  const returnedMessage = command.doubleClick ? `Double clicked ${command.button} button` : `Clicked ${command.button} button`;
174
294
  return {
175
295
  output: returnedMessage,
@@ -206,8 +326,7 @@ class MouseScrollTool extends base_1.BaseAgentTool {
206
326
  }
207
327
  execute(command) {
208
328
  return __awaiter(this, void 0, void 0, function* () {
209
- const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_SCROLL, { x: command.dx, y: command.dy }, '', {})]);
210
- yield this.osAgentHandler.requestControl(controlCommand);
329
+ yield this.osAgentHandler.mouseScroll(command.dx, command.dy);
211
330
  return {
212
331
  output: `Scrolled by (${command.dx}, ${command.dy})`,
213
332
  };
@@ -222,11 +341,11 @@ class MouseScrollTool extends base_1.BaseAgentTool {
222
341
  properties: {
223
342
  dx: {
224
343
  type: 'number',
225
- description: 'The amount to scroll horizontally',
344
+ description: 'The amount to scroll horizontally (positive is right, negative is left)',
226
345
  },
227
346
  dy: {
228
347
  type: 'number',
229
- description: 'The amount to scroll vertically',
348
+ description: 'The amount to scroll vertically (positive is down, negative is up)',
230
349
  },
231
350
  },
232
351
  required: ['dx', 'dy'],
@@ -235,20 +354,107 @@ class MouseScrollTool extends base_1.BaseAgentTool {
235
354
  }
236
355
  }
237
356
  exports.MouseScrollTool = MouseScrollTool;
238
- class DesktopKeyPressSequenceTool extends base_1.BaseAgentTool {
357
+ class MouseDragAndDropTool extends base_1.BaseAgentTool {
239
358
  constructor(osAgentHandler) {
240
359
  super();
241
360
  this.osAgentHandler = osAgentHandler;
242
361
  }
243
362
  execute(command) {
244
363
  return __awaiter(this, void 0, void 0, function* () {
245
- const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.key, {
246
- firstModifier: command.firstModifier || '',
247
- secondModifier: command.secondModifier || '',
248
- })]);
249
- yield this.osAgentHandler.requestControl(controlCommand);
364
+ yield this.osAgentHandler.mouseMove(command.startX, command.startY);
365
+ yield this.osAgentHandler.mouseHoldLeftButtonDown();
366
+ yield this.osAgentHandler.mouseMove(command.endX, command.endY);
367
+ yield this.osAgentHandler.mouseReleaseLeftButton();
250
368
  return {
251
- output: `Pressed key ${command.key} with modifiers ${command.firstModifier || ''} ${command.secondModifier || ''}`,
369
+ output: `Dragged from (${command.startX}, ${command.startY}) to (${command.endX}, ${command.endY})`,
370
+ };
371
+ });
372
+ }
373
+ toParams() {
374
+ return {
375
+ name: 'mouse_drag_and_drop_tool',
376
+ description: 'Drags the mouse from the specified start coordinates to the specified end coordinates. The top left corner of the screen is (0,0)',
377
+ input_schema: {
378
+ type: 'object',
379
+ properties: {
380
+ startX: {
381
+ type: 'number',
382
+ description: 'The x (pixels from the left edge) coordinate of the start position',
383
+ },
384
+ startY: {
385
+ type: 'number',
386
+ description: 'The y (pixels from the top edge) coordinate of the start position',
387
+ },
388
+ endX: {
389
+ type: 'number',
390
+ description: 'The x (pixels from the left edge) coordinate of the end position',
391
+ },
392
+ endY: {
393
+ type: 'number',
394
+ description: 'The y (pixels from the top edge) coordinate of the end position',
395
+ },
396
+ },
397
+ required: ['startX', 'startY', 'endX', 'endY'],
398
+ },
399
+ };
400
+ }
401
+ }
402
+ exports.MouseDragAndDropTool = MouseDragAndDropTool;
403
+ class MouseHoldLeftButtonDownTool extends base_1.BaseAgentTool {
404
+ constructor(osAgentHandler) {
405
+ super();
406
+ this.osAgentHandler = osAgentHandler;
407
+ }
408
+ execute() {
409
+ return __awaiter(this, void 0, void 0, function* () {
410
+ yield this.osAgentHandler.mouseHoldLeftButtonDown();
411
+ return {
412
+ output: 'Holding down left mouse button',
413
+ };
414
+ });
415
+ }
416
+ toParams() {
417
+ return {
418
+ name: 'mouse_hold_left_button_down_tool',
419
+ description: 'Hold down the left mouse button at the current position.',
420
+ input_schema: { type: 'object', properties: {}, required: [] },
421
+ };
422
+ }
423
+ }
424
+ exports.MouseHoldLeftButtonDownTool = MouseHoldLeftButtonDownTool;
425
+ class MouseReleaseLeftButtonTool extends base_1.BaseAgentTool {
426
+ constructor(osAgentHandler) {
427
+ super();
428
+ this.osAgentHandler = osAgentHandler;
429
+ }
430
+ execute() {
431
+ return __awaiter(this, void 0, void 0, function* () {
432
+ yield this.osAgentHandler.mouseReleaseLeftButton();
433
+ return {
434
+ output: 'Released left mouse button',
435
+ };
436
+ });
437
+ }
438
+ toParams() {
439
+ return {
440
+ name: 'mouse_release_left_button_tool',
441
+ description: 'Release the left mouse button at the current position.',
442
+ input_schema: { type: 'object', properties: {}, required: [] },
443
+ };
444
+ }
445
+ }
446
+ exports.MouseReleaseLeftButtonTool = MouseReleaseLeftButtonTool;
447
+ class DesktopPressAndReleaseKeysTool extends base_1.BaseAgentTool {
448
+ constructor(osAgentHandler) {
449
+ super();
450
+ this.osAgentHandler = osAgentHandler;
451
+ }
452
+ execute(command) {
453
+ return __awaiter(this, void 0, void 0, function* () {
454
+ const modifiers = command.modifiers || [];
455
+ yield this.osAgentHandler.desktopKeyPressAndRelease(command.key, modifiers);
456
+ return {
457
+ output: `Pressed key ${command.key} with modifiers ${modifiers.join(' ')}`,
252
458
  };
253
459
  });
254
460
  }
@@ -264,15 +470,54 @@ class DesktopKeyPressSequenceTool extends base_1.BaseAgentTool {
264
470
  enum: dsl_1.PC_KEY_VALUES,
265
471
  description: 'The key to press',
266
472
  },
267
- firstModifier: {
268
- type: 'string',
269
- enum: dsl_1.MODIFIER_KEY_VALUES,
270
- description: 'The first modifier key',
473
+ modifiers: {
474
+ type: 'array',
475
+ items: {
476
+ type: 'string',
477
+ enum: dsl_1.MODIFIER_KEY_VALUES,
478
+ },
479
+ description: 'The modifiers to press',
271
480
  },
272
- secondModifier: {
481
+ },
482
+ required: ['key'],
483
+ },
484
+ };
485
+ }
486
+ }
487
+ exports.DesktopPressAndReleaseKeysTool = DesktopPressAndReleaseKeysTool;
488
+ class DesktopKeyHoldDownTool extends base_1.BaseAgentTool {
489
+ constructor(osAgentHandler) {
490
+ super();
491
+ this.osAgentHandler = osAgentHandler;
492
+ }
493
+ execute(command) {
494
+ return __awaiter(this, void 0, void 0, function* () {
495
+ const modifiers = command.modifiers || [];
496
+ yield this.osAgentHandler.desktopKeyHoldDown(command.key, modifiers);
497
+ return {
498
+ output: `Holding down key ${command.key} with modifiers ${modifiers.join(' ')}`,
499
+ };
500
+ });
501
+ }
502
+ toParams() {
503
+ return {
504
+ name: 'desktop_key_hold_down_tool',
505
+ description: 'Hold down a key and optional modifiers. Keys will be still pressed after the tool is finished.',
506
+ input_schema: {
507
+ type: 'object',
508
+ properties: {
509
+ key: {
273
510
  type: 'string',
274
- enum: dsl_1.MODIFIER_KEY_VALUES,
275
- description: 'The second modifier key',
511
+ enum: [...dsl_1.PC_KEY_VALUES, ...dsl_1.MODIFIER_KEY_VALUES],
512
+ description: 'The key to hold down',
513
+ },
514
+ modifiers: {
515
+ type: 'array',
516
+ items: {
517
+ type: 'string',
518
+ enum: dsl_1.MODIFIER_KEY_VALUES,
519
+ },
520
+ description: 'The modifiers to hold down',
276
521
  },
277
522
  },
278
523
  required: ['key'],
@@ -280,32 +525,40 @@ class DesktopKeyPressSequenceTool extends base_1.BaseAgentTool {
280
525
  };
281
526
  }
282
527
  }
283
- exports.DesktopKeyPressSequenceTool = DesktopKeyPressSequenceTool;
284
- class DesktopSingleKeyPressTool extends base_1.BaseAgentTool {
528
+ exports.DesktopKeyHoldDownTool = DesktopKeyHoldDownTool;
529
+ class DesktopKeyReleaseTool extends base_1.BaseAgentTool {
285
530
  constructor(osAgentHandler) {
286
531
  super();
287
532
  this.osAgentHandler = osAgentHandler;
288
533
  }
289
534
  execute(command) {
290
535
  return __awaiter(this, void 0, void 0, function* () {
291
- const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.key, {})]);
292
- yield this.osAgentHandler.requestControl(controlCommand);
536
+ const modifiers = command.modifiers || [];
537
+ yield this.osAgentHandler.desktopKeyRelease(command.key, modifiers);
293
538
  return {
294
- output: `Pressed key ${command.key}`,
539
+ output: `Released key ${command.key} with modifiers ${modifiers.join(' ')}`,
295
540
  };
296
541
  });
297
542
  }
298
543
  toParams() {
299
544
  return {
300
- name: 'desktop_single_key_press_tool',
301
- description: 'Presses a single key',
545
+ name: 'desktop_key_release_tool',
546
+ description: 'Releases a key and optional modifiers. This can be used after keys were held down with the desktop_key_hold_down_tool',
302
547
  input_schema: {
303
548
  type: 'object',
304
549
  properties: {
305
550
  key: {
306
551
  type: 'string',
307
552
  enum: [...dsl_1.PC_KEY_VALUES, ...dsl_1.MODIFIER_KEY_VALUES],
308
- description: 'The key to press',
553
+ description: 'The key to release',
554
+ },
555
+ modifiers: {
556
+ type: 'array',
557
+ items: {
558
+ type: 'string',
559
+ enum: dsl_1.MODIFIER_KEY_VALUES,
560
+ },
561
+ description: 'The modifiers to release',
309
562
  },
310
563
  },
311
564
  required: ['key'],
@@ -313,7 +566,7 @@ class DesktopSingleKeyPressTool extends base_1.BaseAgentTool {
313
566
  };
314
567
  }
315
568
  }
316
- exports.DesktopSingleKeyPressTool = DesktopSingleKeyPressTool;
569
+ exports.DesktopKeyReleaseTool = DesktopKeyReleaseTool;
317
570
  class TypeTool extends base_1.BaseAgentTool {
318
571
  constructor(osAgentHandler) {
319
572
  super();
@@ -321,8 +574,7 @@ class TypeTool extends base_1.BaseAgentTool {
321
574
  }
322
575
  execute(command) {
323
576
  return __awaiter(this, void 0, void 0, function* () {
324
- const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.TYPE, { x: 0, y: 0 }, command.text, {})]);
325
- yield this.osAgentHandler.requestControl(controlCommand);
577
+ yield this.osAgentHandler.typeText(command.text);
326
578
  return {
327
579
  output: `Typed text: ${command.text}`,
328
580
  };
@@ -353,8 +605,7 @@ class AndroidSingleKeyPressTool extends base_1.BaseAgentTool {
353
605
  }
354
606
  execute(command) {
355
607
  return __awaiter(this, void 0, void 0, function* () {
356
- const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.key, {})]);
357
- yield this.osAgentHandler.requestControl(controlCommand);
608
+ yield this.osAgentHandler.androidKeyPress(command.key);
358
609
  return {
359
610
  output: `Pressed Android key ${command.key}`,
360
611
  };
@@ -386,8 +637,7 @@ class AndroidSequenceKeyPressTool extends base_1.BaseAgentTool {
386
637
  }
387
638
  execute(command) {
388
639
  return __awaiter(this, void 0, void 0, function* () {
389
- const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.keys.join(' '), {})]);
390
- yield this.osAgentHandler.requestControl(controlCommand);
640
+ yield this.osAgentHandler.androidKeySequencePress(command.keys);
391
641
  return {
392
642
  output: `Pressed Android keys: ${command.keys.join(', ')}`,
393
643
  };
@@ -427,7 +677,7 @@ class AgentErrorTool extends base_1.BaseAgentTool {
427
677
  toParams() {
428
678
  return {
429
679
  name: 'agent_error_tool',
430
- description: 'Raises an error in the agent',
680
+ description: 'Intentionally raises an error to signal that the agent cannot proceed with the current task. Use this when the agent encounters an unsolvable problem, gets stuck in a loop, or needs to communicate a critical failure that prevents further automation.',
431
681
  input_schema: {
432
682
  type: 'object',
433
683
  properties: {
@@ -449,8 +699,7 @@ class ExecuteShellCommandTool extends base_1.BaseAgentTool {
449
699
  }
450
700
  execute(command) {
451
701
  return __awaiter(this, void 0, void 0, function* () {
452
- const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.EXECUTE_COMMAND, { x: 0, y: 0 }, command.command, {})]);
453
- yield this.osAgentHandler.requestControl(controlCommand);
702
+ yield this.osAgentHandler.executeShellCommand(command.command);
454
703
  return {
455
704
  output: `Executed shell command: ${command.command}`,
456
705
  };
@@ -474,3 +723,63 @@ class ExecuteShellCommandTool extends base_1.BaseAgentTool {
474
723
  }
475
724
  }
476
725
  exports.ExecuteShellCommandTool = ExecuteShellCommandTool;
726
+ class WaitTool extends base_1.BaseAgentTool {
727
+ constructor() {
728
+ super();
729
+ }
730
+ execute(command) {
731
+ return __awaiter(this, void 0, void 0, function* () {
732
+ yield new Promise(resolve => setTimeout(resolve, command.milliseconds));
733
+ return {
734
+ output: `Waited for ${command.milliseconds} milliseconds`,
735
+ };
736
+ });
737
+ }
738
+ toParams() {
739
+ return {
740
+ name: 'wait_tool',
741
+ description: 'Waits for a specified number of milliseconds',
742
+ input_schema: {
743
+ type: 'object',
744
+ properties: {
745
+ milliseconds: {
746
+ type: 'number',
747
+ description: 'The number of milliseconds to wait',
748
+ },
749
+ },
750
+ required: ['milliseconds'],
751
+ },
752
+ };
753
+ }
754
+ }
755
+ exports.WaitTool = WaitTool;
756
+ class PrintTool extends base_1.BaseAgentTool {
757
+ constructor() {
758
+ super();
759
+ }
760
+ execute(command) {
761
+ return __awaiter(this, void 0, void 0, function* () {
762
+ console.log(command.text);
763
+ return {
764
+ output: `Printed text: ${command.text}`,
765
+ };
766
+ });
767
+ }
768
+ toParams() {
769
+ return {
770
+ name: 'print_tool',
771
+ description: 'Outputs text to the console for debugging, status updates, or user communication. Useful for providing feedback about automation progress, errors, or important information during test execution.',
772
+ input_schema: {
773
+ type: 'object',
774
+ properties: {
775
+ text: {
776
+ type: 'string',
777
+ description: 'The text to output to the console.',
778
+ },
779
+ },
780
+ required: ['text'],
781
+ },
782
+ };
783
+ }
784
+ }
785
+ exports.PrintTool = PrintTool;