askui 0.29.0 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/dist/cjs/core/cache/cache-config.d.ts +5 -0
  2. package/dist/cjs/core/cache/cache-config.js +2 -0
  3. package/dist/cjs/core/cache/cache-entry-reference.d.ts +7 -0
  4. package/dist/cjs/core/cache/cache-entry-reference.js +20 -0
  5. package/dist/cjs/core/cache/cache-entry.d.ts +11 -0
  6. package/dist/cjs/core/cache/cache-entry.js +47 -0
  7. package/dist/cjs/core/cache/cache-interface.d.ts +11 -0
  8. package/dist/cjs/core/cache/cache-interface.js +2 -0
  9. package/dist/cjs/core/cache/cache-manager.d.ts +24 -0
  10. package/dist/cjs/core/cache/cache-manager.js +145 -0
  11. package/dist/cjs/core/cache/cahe-file.d.ts +19 -0
  12. package/dist/cjs/core/cache/cahe-file.js +128 -0
  13. package/dist/cjs/core/cache/dummy-cache-manager.d.ts +12 -0
  14. package/dist/cjs/core/cache/dummy-cache-manager.js +27 -0
  15. package/dist/cjs/core/cache/image-reference.d.ts +10 -0
  16. package/dist/cjs/core/cache/image-reference.js +40 -0
  17. package/dist/cjs/core/cache/index.d.ts +7 -0
  18. package/dist/cjs/core/cache/index.js +13 -0
  19. package/dist/cjs/core/model/annotation-result/boundary-box.js +1 -1
  20. package/dist/cjs/core/model/custom-element.d.ts +1 -0
  21. package/dist/cjs/core/model/custom-element.js +3 -0
  22. package/dist/cjs/core/models/anthropic/askui-agent.d.ts +2 -2
  23. package/dist/cjs/core/models/anthropic/askui-agent.js +33 -33
  24. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +47 -3
  25. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +220 -23
  26. package/dist/cjs/core/ui-control-commands/action.d.ts +1 -0
  27. package/dist/cjs/core/ui-control-commands/action.js +10 -2
  28. package/dist/cjs/core/ui-control-commands/control-command.d.ts +1 -0
  29. package/dist/cjs/core/ui-control-commands/control-command.js +7 -0
  30. package/dist/cjs/execution/dsl.d.ts +12 -5
  31. package/dist/cjs/execution/dsl.js +30 -15
  32. package/dist/cjs/execution/execution-runtime.d.ts +1 -1
  33. package/dist/cjs/execution/execution-runtime.js +21 -17
  34. package/dist/cjs/execution/inference-client.d.ts +5 -3
  35. package/dist/cjs/execution/inference-client.js +22 -3
  36. package/dist/cjs/execution/ui-control-client-dependency-builder.js +6 -1
  37. package/dist/cjs/execution/ui-control-client.d.ts +2 -2
  38. package/dist/cjs/execution/ui-control-client.js +29 -12
  39. package/dist/cjs/execution/ui-controller-client-interface.d.ts +2 -0
  40. package/dist/cjs/utils/base_64_image/base-64-image.d.ts +2 -0
  41. package/dist/cjs/utils/base_64_image/base-64-image.js +27 -4
  42. package/dist/esm/core/cache/cache-config.d.ts +5 -0
  43. package/dist/esm/core/cache/cache-config.js +1 -0
  44. package/dist/esm/core/cache/cache-entry-reference.d.ts +7 -0
  45. package/dist/esm/core/cache/cache-entry-reference.js +16 -0
  46. package/dist/esm/core/cache/cache-entry.d.ts +11 -0
  47. package/dist/esm/core/cache/cache-entry.js +43 -0
  48. package/dist/esm/core/cache/cache-interface.d.ts +11 -0
  49. package/dist/esm/core/cache/cache-interface.js +1 -0
  50. package/dist/esm/core/cache/cache-manager.d.ts +24 -0
  51. package/dist/esm/core/cache/cache-manager.js +141 -0
  52. package/dist/esm/core/cache/cahe-file.d.ts +19 -0
  53. package/dist/esm/core/cache/cahe-file.js +121 -0
  54. package/dist/esm/core/cache/dummy-cache-manager.d.ts +12 -0
  55. package/dist/esm/core/cache/dummy-cache-manager.js +23 -0
  56. package/dist/esm/core/cache/image-reference.d.ts +10 -0
  57. package/dist/esm/core/cache/image-reference.js +36 -0
  58. package/dist/esm/core/cache/index.d.ts +7 -0
  59. package/dist/esm/core/cache/index.js +5 -0
  60. package/dist/esm/core/model/annotation-result/boundary-box.js +1 -1
  61. package/dist/esm/core/model/custom-element.d.ts +1 -0
  62. package/dist/esm/core/model/custom-element.js +3 -0
  63. package/dist/esm/core/models/anthropic/askui-agent.d.ts +2 -2
  64. package/dist/esm/core/models/anthropic/askui-agent.js +34 -34
  65. package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +47 -3
  66. package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +215 -22
  67. package/dist/esm/core/ui-control-commands/action.d.ts +1 -0
  68. package/dist/esm/core/ui-control-commands/action.js +10 -2
  69. package/dist/esm/core/ui-control-commands/control-command.d.ts +1 -0
  70. package/dist/esm/core/ui-control-commands/control-command.js +7 -0
  71. package/dist/esm/execution/dsl.d.ts +12 -5
  72. package/dist/esm/execution/dsl.js +30 -15
  73. package/dist/esm/execution/execution-runtime.d.ts +1 -1
  74. package/dist/esm/execution/execution-runtime.js +21 -17
  75. package/dist/esm/execution/inference-client.d.ts +5 -3
  76. package/dist/esm/execution/inference-client.js +22 -3
  77. package/dist/esm/execution/ui-control-client-dependency-builder.js +6 -1
  78. package/dist/esm/execution/ui-control-client.d.ts +2 -2
  79. package/dist/esm/execution/ui-control-client.js +29 -12
  80. package/dist/esm/execution/ui-controller-client-interface.d.ts +2 -0
  81. package/dist/esm/utils/base_64_image/base-64-image.d.ts +2 -0
  82. package/dist/esm/utils/base_64_image/base-64-image.js +27 -4
  83. package/package.json +1 -1
  84. package/dist/example_projects_templates/templates/askui-helper-windows.nj +0 -32
@@ -16,6 +16,7 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
16
16
  constructor(executionRuntime) {
17
17
  super((params) => executionRuntime.predictActResponse(params));
18
18
  this.osAgentHandler = undefined;
19
+ this.runtime = 'desktop';
19
20
  this.executionRuntime = executionRuntime;
20
21
  }
21
22
  isConnected() {
@@ -24,6 +25,7 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
24
25
  initializeOsAgentHandler() {
25
26
  return __awaiter(this, void 0, void 0, function* () {
26
27
  this.osAgentHandler = yield os_agent_tools_1.OsAgentHandler.createInstance(this.executionRuntime);
28
+ this.runtime = this.osAgentHandler.runtime;
27
29
  });
28
30
  }
29
31
  getOsAgentHandler() {
@@ -32,50 +34,48 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
32
34
  }
33
35
  return this.osAgentHandler;
34
36
  }
35
- configureAsDesktopAgent() {
37
+ configureAgent() {
36
38
  return __awaiter(this, void 0, void 0, function* () {
37
39
  if (!this.osAgentHandler) {
38
40
  throw new Error('Agent OS client is not connected');
39
41
  }
40
- const tools = [
42
+ let systemPrompt = AskUIAgent.DesktopSystemPrompt;
43
+ let tools = [
41
44
  new os_agent_tools_1.AgentErrorTool(),
42
45
  new os_agent_tools_1.PrintTool(),
43
- new os_agent_tools_1.ScreenShotTool(this.osAgentHandler),
44
- new os_agent_tools_1.MouseMoveTool(this.osAgentHandler),
45
- new os_agent_tools_1.MouseClickTool(this.osAgentHandler),
46
- new os_agent_tools_1.MouseScrollTool(this.osAgentHandler),
47
- new os_agent_tools_1.TypeTool(this.osAgentHandler),
48
- new os_agent_tools_1.DesktopPressAndReleaseKeysTool(this.osAgentHandler),
49
- new os_agent_tools_1.DesktopKeyHoldDownTool(this.osAgentHandler),
50
- new os_agent_tools_1.DesktopKeyReleaseTool(this.osAgentHandler),
51
- new os_agent_tools_1.MouseHoldLeftButtonDownTool(this.osAgentHandler),
52
- new os_agent_tools_1.MouseReleaseLeftButtonTool(this.osAgentHandler),
53
- new os_agent_tools_1.MouseDragAndDropTool(this.osAgentHandler),
54
46
  new os_agent_tools_1.WaitTool(),
55
- ];
56
- this.setTools(tools);
57
- this.setSystemPrompt(AskUIAgent.DesktopSystemPrompt);
58
- });
59
- }
60
- configureAsAndroidAgent() {
61
- return __awaiter(this, void 0, void 0, function* () {
62
- if (!this.osAgentHandler) {
63
- throw new Error('Agent OS client is not connected');
64
- }
65
- const tools = [
66
- new os_agent_tools_1.AgentErrorTool(),
67
47
  new os_agent_tools_1.ScreenShotTool(this.osAgentHandler),
68
- new os_agent_tools_1.MouseMoveTool(this.osAgentHandler),
69
- new os_agent_tools_1.MouseClickTool(this.osAgentHandler),
70
- new os_agent_tools_1.MouseScrollTool(this.osAgentHandler),
71
- new os_agent_tools_1.AndroidSingleKeyPressTool(this.osAgentHandler),
72
- new os_agent_tools_1.AndroidSequenceKeyPressTool(this.osAgentHandler),
73
48
  new os_agent_tools_1.TypeTool(this.osAgentHandler),
74
- new os_agent_tools_1.ExecuteShellCommandTool(this.osAgentHandler),
75
- new os_agent_tools_1.WaitTool(),
76
49
  ];
50
+ if (this.runtime === 'desktop') {
51
+ tools = [
52
+ ...tools,
53
+ new os_agent_tools_1.MouseMoveTool(this.osAgentHandler),
54
+ new os_agent_tools_1.MouseClickTool(this.osAgentHandler),
55
+ new os_agent_tools_1.MouseScrollTool(this.osAgentHandler),
56
+ new os_agent_tools_1.DesktopPressAndReleaseKeysTool(this.osAgentHandler),
57
+ new os_agent_tools_1.DesktopKeyHoldDownTool(this.osAgentHandler),
58
+ new os_agent_tools_1.DesktopKeyReleaseTool(this.osAgentHandler),
59
+ new os_agent_tools_1.MouseHoldLeftButtonDownTool(this.osAgentHandler),
60
+ new os_agent_tools_1.MouseReleaseLeftButtonTool(this.osAgentHandler),
61
+ new os_agent_tools_1.MouseDragAndDropTool(this.osAgentHandler),
62
+ new os_agent_tools_1.ExecuteShellCommandTool(this.osAgentHandler),
63
+ ];
64
+ }
65
+ if (this.runtime === 'android') {
66
+ tools = [
67
+ ...tools,
68
+ new os_agent_tools_1.AndroidSingleKeyPressTool(this.osAgentHandler),
69
+ new os_agent_tools_1.AndroidSequenceKeyPressTool(this.osAgentHandler),
70
+ new os_agent_tools_1.AndroidSwipeTool(this.osAgentHandler),
71
+ new os_agent_tools_1.AndroidDragAndDropTool(this.osAgentHandler),
72
+ new os_agent_tools_1.AndroidTapTool(this.osAgentHandler),
73
+ new os_agent_tools_1.AndroidShellCommandTool(this.osAgentHandler),
74
+ ];
75
+ systemPrompt = AskUIAgent.AndroidSystemPrompt;
76
+ }
77
77
  this.setTools(tools);
78
- this.setSystemPrompt(AskUIAgent.AndroidSystemPrompt);
78
+ this.setSystemPrompt(systemPrompt);
79
79
  });
80
80
  }
81
81
  }
@@ -5,13 +5,14 @@ import { ExecutionRuntime } from '../../../../execution/execution-runtime';
5
5
  import { ControlCommand } from '../../../ui-control-commands';
6
6
  export declare class OsAgentHandler {
7
7
  private AgentOsClient;
8
- private targetResolution;
9
8
  private screenDimensions;
9
+ runtime: 'android' | 'desktop';
10
+ private targetResolution;
10
11
  private paddingInfo;
11
12
  constructor(AgentOsClient: ExecutionRuntime, screenDimensions: {
12
13
  width: number;
13
14
  height: number;
14
- });
15
+ }, runtime: 'android' | 'desktop');
15
16
  private updatePaddingInfo;
16
17
  static createInstance(AgentOsClient: ExecutionRuntime): Promise<OsAgentHandler>;
17
18
  getTargetResolution(): {
@@ -27,7 +28,7 @@ export declare class OsAgentHandler {
27
28
  scaleCoordinates(source: 'api' | 'computer', x: number, y: number): [number, number];
28
29
  requestControl(controlCommand: ControlCommand): Promise<void>;
29
30
  mouseMove(x: number, y: number): Promise<void>;
30
- mouseClick(button: "left" | "right" | "middle", doubleClick: boolean): Promise<void>;
31
+ mouseClick(button: 'left' | 'right' | 'middle', doubleClick: boolean): Promise<void>;
31
32
  mouseScroll(dx: number, dy: number): Promise<void>;
32
33
  mouseHoldLeftButtonDown(): Promise<void>;
33
34
  mouseReleaseLeftButton(): Promise<void>;
@@ -38,6 +39,10 @@ export declare class OsAgentHandler {
38
39
  androidKeyPress(key: ANDROID_KEY): Promise<void>;
39
40
  androidKeySequencePress(keys: ANDROID_KEY[]): Promise<void>;
40
41
  executeShellCommand(command: string): Promise<void>;
42
+ AndroidSwipeTool(startX: number, startY: number, endX: number, endY: number): Promise<void>;
43
+ AndroidDragAndDropTool(startX: number, startY: number, endX: number, endY: number): Promise<void>;
44
+ AndroidTapTool(x: number, y: number): Promise<void>;
45
+ executeAndroidShellCommand(command: string): Promise<void>;
41
46
  }
42
47
  export declare class ScreenShotTool extends BaseAgentTool {
43
48
  private osAgentHandler;
@@ -175,3 +180,42 @@ export declare class PrintTool extends BaseAgentTool {
175
180
  }): Promise<ToolResult>;
176
181
  toParams(): BetaTool;
177
182
  }
183
+ export declare class AndroidSwipeTool extends BaseAgentTool {
184
+ private osAgentHandler;
185
+ constructor(osAgentHandler: OsAgentHandler);
186
+ execute(command: {
187
+ startX: number;
188
+ startY: number;
189
+ endX: number;
190
+ endY: number;
191
+ }): Promise<ToolResult>;
192
+ toParams(): BetaTool;
193
+ }
194
+ export declare class AndroidDragAndDropTool extends BaseAgentTool {
195
+ private osAgentHandler;
196
+ constructor(osAgentHandler: OsAgentHandler);
197
+ execute(command: {
198
+ startX: number;
199
+ startY: number;
200
+ endX: number;
201
+ endY: number;
202
+ }): Promise<ToolResult>;
203
+ toParams(): BetaTool;
204
+ }
205
+ export declare class AndroidTapTool extends BaseAgentTool {
206
+ private osAgentHandler;
207
+ constructor(osAgentHandler: OsAgentHandler);
208
+ execute(command: {
209
+ x: number;
210
+ y: number;
211
+ }): Promise<ToolResult>;
212
+ toParams(): BetaTool;
213
+ }
214
+ export declare class AndroidShellCommandTool extends BaseAgentTool {
215
+ private osAgentHandler;
216
+ constructor(osAgentHandler: OsAgentHandler);
217
+ execute(command: {
218
+ command: string;
219
+ }): Promise<ToolResult>;
220
+ toParams(): BetaTool;
221
+ }
@@ -9,15 +9,17 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
9
9
  });
10
10
  };
11
11
  Object.defineProperty(exports, "__esModule", { value: true });
12
- exports.PrintTool = exports.WaitTool = exports.ExecuteShellCommandTool = exports.AgentErrorTool = exports.AndroidSequenceKeyPressTool = exports.AndroidSingleKeyPressTool = exports.TypeTool = exports.DesktopKeyReleaseTool = exports.DesktopKeyHoldDownTool = exports.DesktopPressAndReleaseKeysTool = exports.MouseReleaseLeftButtonTool = exports.MouseHoldLeftButtonDownTool = exports.MouseDragAndDropTool = exports.MouseScrollTool = exports.MouseClickTool = exports.MouseMoveTool = exports.ScreenShotTool = exports.OsAgentHandler = void 0;
12
+ exports.AndroidShellCommandTool = exports.AndroidTapTool = exports.AndroidDragAndDropTool = exports.AndroidSwipeTool = exports.PrintTool = exports.WaitTool = exports.ExecuteShellCommandTool = exports.AgentErrorTool = exports.AndroidSequenceKeyPressTool = exports.AndroidSingleKeyPressTool = exports.TypeTool = exports.DesktopKeyReleaseTool = exports.DesktopKeyHoldDownTool = exports.DesktopPressAndReleaseKeysTool = exports.MouseReleaseLeftButtonTool = exports.MouseHoldLeftButtonDownTool = exports.MouseDragAndDropTool = exports.MouseScrollTool = exports.MouseClickTool = exports.MouseMoveTool = exports.ScreenShotTool = exports.OsAgentHandler = void 0;
13
13
  const dsl_1 = require("../../../../execution/dsl");
14
14
  const base_1 = require("./base");
15
15
  const ui_control_commands_1 = require("../../../ui-control-commands");
16
16
  const base_64_image_1 = require("../../../../utils/base_64_image/base-64-image");
17
17
  const agent_errors_1 = require("./agent-errors");
18
18
  class OsAgentHandler {
19
- constructor(AgentOsClient, screenDimensions) {
19
+ constructor(AgentOsClient, screenDimensions, runtime) {
20
20
  this.AgentOsClient = AgentOsClient;
21
+ this.screenDimensions = screenDimensions;
22
+ this.runtime = runtime;
21
23
  this.targetResolution = { width: 1280, height: 800 };
22
24
  this.paddingInfo = null;
23
25
  this.screenDimensions = screenDimensions;
@@ -48,7 +50,7 @@ class OsAgentHandler {
48
50
  scaledWidth,
49
51
  scaledHeight,
50
52
  padLeft,
51
- padTop
53
+ padTop,
52
54
  };
53
55
  }
54
56
  // Add image support to act, an check for function overload in typescript.
@@ -56,10 +58,12 @@ class OsAgentHandler {
56
58
  return __awaiter(this, void 0, void 0, function* () {
57
59
  const base64ImageString = yield AgentOsClient.getScreenshot();
58
60
  const image_info = yield (yield base_64_image_1.Base64Image.fromString(base64ImageString)).getInfo();
61
+ const startingArguments = yield AgentOsClient.getStartingArguments();
62
+ const runtime = startingArguments['runtime'] === 'android' ? 'android' : 'desktop';
59
63
  return new OsAgentHandler(AgentOsClient, {
60
64
  width: image_info.width,
61
65
  height: image_info.height,
62
- });
66
+ }, runtime);
63
67
  });
64
68
  }
65
69
  getTargetResolution() {
@@ -90,7 +94,7 @@ class OsAgentHandler {
90
94
  if (!this.paddingInfo) {
91
95
  throw new base_1.ToolError('Padding information not initialized');
92
96
  }
93
- const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop } = this.paddingInfo;
97
+ const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop, } = this.paddingInfo;
94
98
  if (source === 'api') {
95
99
  if (x > this.targetResolution.width || y > this.targetResolution.height || x < 0 || y < 0) {
96
100
  throw new base_1.ToolError(`Coordinates ${x}, ${y} are outside screen bounds `
@@ -131,23 +135,21 @@ class OsAgentHandler {
131
135
  return __awaiter(this, void 0, void 0, function* () {
132
136
  let action = ui_control_commands_1.InputEvent.MOUSE_CLICK_LEFT;
133
137
  if (doubleClick) {
134
- if (button === "left") {
138
+ if (button === 'left') {
135
139
  action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_LEFT;
136
140
  }
137
- else if (button === "right") {
141
+ else if (button === 'right') {
138
142
  action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_RIGHT;
139
143
  }
140
- else if (button === "middle") {
144
+ else if (button === 'middle') {
141
145
  action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE;
142
146
  }
143
147
  }
144
- else {
145
- if (button === "right") {
146
- action = ui_control_commands_1.InputEvent.MOUSE_CLICK_RIGHT;
147
- }
148
- else if (button === "middle") {
149
- action = ui_control_commands_1.InputEvent.MOUSE_CLICK_MIDDLE;
150
- }
148
+ else if (button === 'right') {
149
+ action = ui_control_commands_1.InputEvent.MOUSE_CLICK_RIGHT;
150
+ }
151
+ else if (button === 'middle') {
152
+ action = ui_control_commands_1.InputEvent.MOUSE_CLICK_MIDDLE;
151
153
  }
152
154
  const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(action, { x: 0, y: 0 }, '', {})]);
153
155
  yield this.requestControl(controlCommand);
@@ -184,8 +186,8 @@ class OsAgentHandler {
184
186
  desktopKeyHoldDown(key_1) {
185
187
  return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
186
188
  const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.KEY_PRESS, { x: 0, y: 0 }, '', {
187
- key: key,
188
- modifiers: modifiers,
189
+ key,
190
+ modifiers,
189
191
  })]);
190
192
  yield this.requestControl(controlCommand);
191
193
  });
@@ -193,8 +195,8 @@ class OsAgentHandler {
193
195
  desktopKeyRelease(key_1) {
194
196
  return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
195
197
  const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.KEY_RELEASE, { x: 0, y: 0 }, '', {
196
- key: key,
197
- modifiers: modifiers,
198
+ key,
199
+ modifiers,
198
200
  })]);
199
201
  yield this.requestControl(controlCommand);
200
202
  });
@@ -223,6 +225,47 @@ class OsAgentHandler {
223
225
  yield this.requestControl(controlCommand);
224
226
  });
225
227
  }
228
+ AndroidSwipeTool(startX, startY, endX, endY) {
229
+ return __awaiter(this, void 0, void 0, function* () {
230
+ if (this.runtime !== 'android') {
231
+ throw new base_1.ToolError('This tool is only available on Android devices');
232
+ }
233
+ [startX, startY] = this.scaleCoordinates('api', startX, startY);
234
+ [endX, endY] = this.scaleCoordinates('api', endX, endY);
235
+ const adbCommand = `input swipe ${startX} ${startY} ${endX} ${endY}`;
236
+ yield this.executeShellCommand(adbCommand);
237
+ });
238
+ }
239
+ AndroidDragAndDropTool(startX, startY, endX, endY) {
240
+ return __awaiter(this, void 0, void 0, function* () {
241
+ if (this.runtime !== 'android') {
242
+ throw new base_1.ToolError('This tool is only available on Android devices');
243
+ }
244
+ [startX, startY] = this.scaleCoordinates('api', startX, startY);
245
+ [endX, endY] = this.scaleCoordinates('api', endX, endY);
246
+ const adbCommand = `input draganddrop ${startX} ${startY} ${endX} ${endY}`;
247
+ yield this.executeShellCommand(adbCommand);
248
+ });
249
+ }
250
+ AndroidTapTool(x, y) {
251
+ return __awaiter(this, void 0, void 0, function* () {
252
+ if (this.runtime !== 'android') {
253
+ throw new base_1.ToolError('This tool is only available on Android devices');
254
+ }
255
+ [x, y] = this.scaleCoordinates('api', x, y);
256
+ const adbCommand = `input tap ${x} ${y}`;
257
+ yield this.executeShellCommand(adbCommand);
258
+ });
259
+ }
260
+ executeAndroidShellCommand(command) {
261
+ return __awaiter(this, void 0, void 0, function* () {
262
+ if (this.runtime !== 'android') {
263
+ throw new base_1.ToolError('This tool is only available on Android devices');
264
+ }
265
+ command = command.replace(/^adb shell /, '');
266
+ yield this.executeShellCommand(command);
267
+ });
268
+ }
226
269
  }
227
270
  exports.OsAgentHandler = OsAgentHandler;
228
271
  class ScreenShotTool extends base_1.BaseAgentTool {
@@ -605,7 +648,8 @@ class AndroidSingleKeyPressTool extends base_1.BaseAgentTool {
605
648
  }
606
649
  execute(command) {
607
650
  return __awaiter(this, void 0, void 0, function* () {
608
- yield this.osAgentHandler.androidKeyPress(command.key);
651
+ const adbCommand = `input keyevent ${command.key.toUpperCase()}`;
652
+ yield this.osAgentHandler.executeShellCommand(adbCommand);
609
653
  return {
610
654
  output: `Pressed Android key ${command.key}`,
611
655
  };
@@ -637,7 +681,8 @@ class AndroidSequenceKeyPressTool extends base_1.BaseAgentTool {
637
681
  }
638
682
  execute(command) {
639
683
  return __awaiter(this, void 0, void 0, function* () {
640
- yield this.osAgentHandler.androidKeySequencePress(command.keys);
684
+ const adbCommand = `input keyevent ${command.keys.map((key) => key.toUpperCase()).join(' ')}`;
685
+ yield this.osAgentHandler.executeShellCommand(adbCommand);
641
686
  return {
642
687
  output: `Pressed Android keys: ${command.keys.join(', ')}`,
643
688
  };
@@ -708,7 +753,7 @@ class ExecuteShellCommandTool extends base_1.BaseAgentTool {
708
753
  toParams() {
709
754
  return {
710
755
  name: 'execute_shell_command_tool',
711
- description: 'Executes a shell command',
756
+ description: 'Executes a shell command. It does not return the output of the command.',
712
757
  input_schema: {
713
758
  type: 'object',
714
759
  properties: {
@@ -729,7 +774,7 @@ class WaitTool extends base_1.BaseAgentTool {
729
774
  }
730
775
  execute(command) {
731
776
  return __awaiter(this, void 0, void 0, function* () {
732
- yield new Promise(resolve => setTimeout(resolve, command.milliseconds));
777
+ yield new Promise((resolve) => setTimeout(resolve, command.milliseconds));
733
778
  return {
734
779
  output: `Waited for ${command.milliseconds} milliseconds`,
735
780
  };
@@ -783,3 +828,155 @@ class PrintTool extends base_1.BaseAgentTool {
783
828
  }
784
829
  }
785
830
  exports.PrintTool = PrintTool;
831
+ class AndroidSwipeTool extends base_1.BaseAgentTool {
832
+ constructor(osAgentHandler) {
833
+ super();
834
+ this.osAgentHandler = osAgentHandler;
835
+ }
836
+ execute(command) {
837
+ return __awaiter(this, void 0, void 0, function* () {
838
+ yield this.osAgentHandler.AndroidSwipeTool(command.startX, command.startY, command.endX, command.endY);
839
+ return {
840
+ output: `Swiped from ${command.startX}, ${command.startY} to ${command.endX}, ${command.endY} on the screen`,
841
+ };
842
+ });
843
+ }
844
+ toParams() {
845
+ return {
846
+ name: 'android_swipe_tool',
847
+ description: 'Swipes from a starting point to an ending point on the screen',
848
+ input_schema: {
849
+ type: 'object',
850
+ properties: {
851
+ startX: {
852
+ type: 'number',
853
+ description: 'The x (pixels from the left edge) coordinate of the start position',
854
+ },
855
+ startY: {
856
+ type: 'number',
857
+ description: 'The y (pixels from the top edge) coordinate of the start position',
858
+ },
859
+ endX: {
860
+ type: 'number',
861
+ description: 'The x (pixels from the left edge) coordinate of the end position',
862
+ },
863
+ endY: {
864
+ type: 'number',
865
+ description: 'The y (pixels from the top edge) coordinate of the end position',
866
+ },
867
+ },
868
+ required: ['startX', 'startY', 'endX', 'endY'],
869
+ },
870
+ };
871
+ }
872
+ }
873
+ exports.AndroidSwipeTool = AndroidSwipeTool;
874
+ class AndroidDragAndDropTool extends base_1.BaseAgentTool {
875
+ constructor(osAgentHandler) {
876
+ super();
877
+ this.osAgentHandler = osAgentHandler;
878
+ }
879
+ execute(command) {
880
+ return __awaiter(this, void 0, void 0, function* () {
881
+ yield this.osAgentHandler.AndroidDragAndDropTool(command.startX, command.startY, command.endX, command.endY);
882
+ return {
883
+ output: `Dragged and dropped from ${command.startX}, ${command.startY} to ${command.endX}, ${command.endY} on the screen`,
884
+ };
885
+ });
886
+ }
887
+ toParams() {
888
+ return {
889
+ name: 'android_drag_and_drop_tool',
890
+ description: 'Drags and drops from a starting point to an ending point on the screen',
891
+ input_schema: {
892
+ type: 'object',
893
+ properties: {
894
+ startX: {
895
+ type: 'number',
896
+ description: 'The x (pixels from the left edge) coordinate of the start position',
897
+ },
898
+ startY: {
899
+ type: 'number',
900
+ description: 'The y (pixels from the top edge) coordinate of the start position',
901
+ },
902
+ endX: {
903
+ type: 'number',
904
+ description: 'The x (pixels from the left edge) coordinate of the end position',
905
+ },
906
+ endY: {
907
+ type: 'number',
908
+ description: 'The y (pixels from the top edge) coordinate of the end position',
909
+ },
910
+ },
911
+ required: ['startX', 'startY', 'endX', 'endY'],
912
+ },
913
+ };
914
+ }
915
+ }
916
+ exports.AndroidDragAndDropTool = AndroidDragAndDropTool;
917
+ class AndroidTapTool extends base_1.BaseAgentTool {
918
+ constructor(osAgentHandler) {
919
+ super();
920
+ this.osAgentHandler = osAgentHandler;
921
+ }
922
+ execute(command) {
923
+ return __awaiter(this, void 0, void 0, function* () {
924
+ yield this.osAgentHandler.AndroidTapTool(command.x, command.y);
925
+ return {
926
+ output: `Tapped the screen at ${command.x}, ${command.y}`,
927
+ };
928
+ });
929
+ }
930
+ toParams() {
931
+ return {
932
+ name: 'android_tap_tool',
933
+ description: 'Taps the screen at the specified coordinates',
934
+ input_schema: {
935
+ type: 'object',
936
+ properties: {
937
+ x: {
938
+ type: 'number',
939
+ description: 'The x (pixels from the left edge) coordinate of the tap position',
940
+ },
941
+ y: {
942
+ type: 'number',
943
+ description: 'The y (pixels from the top edge) coordinate of the tap position',
944
+ },
945
+ },
946
+ required: ['x', 'y'],
947
+ },
948
+ };
949
+ }
950
+ }
951
+ exports.AndroidTapTool = AndroidTapTool;
952
+ class AndroidShellCommandTool extends base_1.BaseAgentTool {
953
+ constructor(osAgentHandler) {
954
+ super();
955
+ this.osAgentHandler = osAgentHandler;
956
+ }
957
+ execute(command) {
958
+ return __awaiter(this, void 0, void 0, function* () {
959
+ yield this.osAgentHandler.executeAndroidShellCommand(command.command);
960
+ return {
961
+ output: `Executed shell command: ${command.command}`,
962
+ };
963
+ });
964
+ }
965
+ toParams() {
966
+ return {
967
+ name: 'android_shell_command_tool',
968
+ description: 'Executes a shell command on the Android device. It does not return the output of the command.',
969
+ input_schema: {
970
+ type: 'object',
971
+ properties: {
972
+ command: {
973
+ type: 'string',
974
+ description: 'The shell command to execute without the "adb shell" prefix',
975
+ },
976
+ },
977
+ required: ['command'],
978
+ },
979
+ };
980
+ }
981
+ }
982
+ exports.AndroidShellCommandTool = AndroidShellCommandTool;
@@ -15,4 +15,5 @@ export declare class Action {
15
15
  y: number;
16
16
  }, text: string, parameters?: ActionParameters);
17
17
  static fromJson(action: Action, resizeRatio?: number): Action;
18
+ toJson(): object;
18
19
  }
@@ -11,9 +11,17 @@ class Action {
11
11
  }
12
12
  static fromJson(action, resizeRatio = 1) {
13
13
  return new Action(input_event_1.InputEvent[action.inputEvent], {
14
- x: action.position.x * resizeRatio,
15
- y: action.position.y * resizeRatio,
14
+ x: Math.round(action.position.x * resizeRatio),
15
+ y: Math.round(action.position.y * resizeRatio),
16
16
  }, action.text, action.parameters ? action.parameters : {});
17
17
  }
18
+ toJson() {
19
+ return {
20
+ inputEvent: this.inputEvent,
21
+ parameters: this.parameters,
22
+ position: this.position,
23
+ text: this.text,
24
+ };
25
+ }
18
26
  }
19
27
  exports.Action = Action;
@@ -7,4 +7,5 @@ export declare class ControlCommand {
7
7
  constructor(code: ControlCommandCode, actions: Action[], tryToRepeat?: boolean);
8
8
  static fromJson(json: unknown, resizeRatio?: number): ControlCommand;
9
9
  setTextToBeTyped(text: string): void;
10
+ toJson(): object;
10
11
  }
@@ -18,5 +18,12 @@ class ControlCommand {
18
18
  this.actions = this.actions.map((action) => ([input_event_1.InputEvent.TYPE, input_event_1.InputEvent.TYPE_TEXT].includes(action.inputEvent)
19
19
  ? new action_1.Action(action.inputEvent, action.position, text, action.parameters) : action));
20
20
  }
21
+ toJson() {
22
+ return {
23
+ actions: this.actions.map((action) => action.toJson()),
24
+ code: this.code,
25
+ tryToRepeat: this.tryToRepeat,
26
+ };
27
+ }
21
28
  }
22
29
  exports.ControlCommand = ControlCommand;
@@ -1,6 +1,7 @@
1
1
  import { CustomElementJson } from '../core/model/custom-element-json';
2
2
  import { DetectedElement } from '../core/model/annotation-result/detected-element';
3
3
  import { ModelCompositionBranch } from './model-composition-branch';
4
+ import { RetryStrategy } from './retry-strategies/retry-strategy';
4
5
  export declare enum Separators {
5
6
  STRING = "<|string|>"
6
7
  }
@@ -17,22 +18,27 @@ export interface CommandExecutorContext {
17
18
  customElementsJson: CustomElementJson[];
18
19
  aiElementNames: string[];
19
20
  }
21
+ export interface ExecOptions {
22
+ modelComposition?: ModelCompositionBranch[];
23
+ skipCache?: boolean;
24
+ retryStrategy?: RetryStrategy;
25
+ }
20
26
  declare abstract class FluentBase {
21
27
  protected prev?: FluentBase | undefined;
22
28
  constructor(prev?: FluentBase | undefined);
23
29
  protected _textStr: string;
24
30
  protected _params: Map<string, unknown>;
25
31
  protected static addParams(paramsList: Map<string, unknown[]>, params: Map<string, unknown>): Map<string, unknown[]>;
26
- protected fluentCommandStringBuilder(modelComposition: ModelCompositionBranch[], currentInstruction?: string, paramsList?: Map<string, unknown[]>): Promise<void>;
32
+ protected fluentCommandStringBuilder(modelComposition?: ModelCompositionBranch[], skipCache?: boolean, retryStrategy?: RetryStrategy, currentInstruction?: string, paramsList?: Map<string, unknown[]>): Promise<void>;
27
33
  protected getterStringBuilder(currentInstruction?: string, paramsList?: Map<string, unknown[]>): Promise<DetectedElement[]>;
28
34
  protected get textStr(): string;
29
35
  protected get params(): Map<string, unknown>;
30
36
  }
31
37
  export interface Executable {
32
- exec(): Promise<void>;
38
+ exec(execOptions?: ExecOptions): Promise<void>;
33
39
  }
34
40
  export declare class Exec extends FluentBase implements Executable {
35
- exec(modelComposition?: ModelCompositionBranch[]): Promise<void>;
41
+ exec(execOptions?: ExecOptions): Promise<void>;
36
42
  }
37
43
  export declare class FluentFilters extends FluentBase {
38
44
  /**
@@ -808,7 +814,7 @@ export declare class FluentFiltersOrRelations extends FluentFilters {
808
814
  * @return {FluentFilters}
809
815
  */
810
816
  contains(): FluentFilters;
811
- exec(modelComposition?: ModelCompositionBranch[]): Promise<void>;
817
+ exec(execOptions?: ExecOptions): Promise<void>;
812
818
  }
813
819
  export declare class FluentFiltersCondition extends FluentBase {
814
820
  /**
@@ -1634,6 +1640,7 @@ export declare class FluentFiltersOrRelationsCondition extends FluentFiltersCond
1634
1640
  notExists(): ExecCondition;
1635
1641
  }
1636
1642
  declare class ExecCondition extends Exec {
1643
+ exec(execOptions?: ExecOptions): Promise<void>;
1637
1644
  }
1638
1645
  export declare abstract class FluentCommand extends FluentBase {
1639
1646
  constructor();
@@ -2082,7 +2089,7 @@ export declare abstract class FluentCommand extends FluentBase {
2082
2089
  * @return {Exec}
2083
2090
  */
2084
2091
  pressAndroidKey(key: ANDROID_KEY): Exec;
2085
- abstract fluentCommandExecutor(instruction: string, modelComposition: ModelCompositionBranch[], context: CommandExecutorContext): Promise<void>;
2092
+ abstract fluentCommandExecutor(instruction: string, modelComposition: ModelCompositionBranch[], context: CommandExecutorContext, skipCache: boolean, retryStrategy?: RetryStrategy): Promise<void>;
2086
2093
  }
2087
2094
  export interface ExecutableGetter {
2088
2095
  exec(): Promise<DetectedElement[]>;