askui 0.29.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,12 +4,12 @@ import { ExecutionRuntime } from '../../../execution/execution-runtime';
4
4
  export declare class AskUIAgent extends ClaudeAgent {
5
5
  private osAgentHandler;
6
6
  private executionRuntime;
7
+ private runtime;
7
8
  constructor(executionRuntime: ExecutionRuntime);
8
9
  isConnected(): boolean;
9
10
  initializeOsAgentHandler(): Promise<void>;
10
11
  getOsAgentHandler(): OsAgentHandler;
11
- configureAsDesktopAgent(): Promise<void>;
12
- configureAsAndroidAgent(): Promise<void>;
12
+ configureAgent(): Promise<void>;
13
13
  private static DesktopSystemPrompt;
14
14
  private static AndroidSystemPrompt;
15
15
  }
@@ -16,6 +16,7 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
16
16
  constructor(executionRuntime) {
17
17
  super((params) => executionRuntime.predictActResponse(params));
18
18
  this.osAgentHandler = undefined;
19
+ this.runtime = 'desktop';
19
20
  this.executionRuntime = executionRuntime;
20
21
  }
21
22
  isConnected() {
@@ -24,6 +25,7 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
24
25
  initializeOsAgentHandler() {
25
26
  return __awaiter(this, void 0, void 0, function* () {
26
27
  this.osAgentHandler = yield os_agent_tools_1.OsAgentHandler.createInstance(this.executionRuntime);
28
+ this.runtime = this.osAgentHandler.runtime;
27
29
  });
28
30
  }
29
31
  getOsAgentHandler() {
@@ -32,50 +34,48 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
32
34
  }
33
35
  return this.osAgentHandler;
34
36
  }
35
- configureAsDesktopAgent() {
37
+ configureAgent() {
36
38
  return __awaiter(this, void 0, void 0, function* () {
37
39
  if (!this.osAgentHandler) {
38
40
  throw new Error('Agent OS client is not connected');
39
41
  }
40
- const tools = [
42
+ let systemPrompt = AskUIAgent.DesktopSystemPrompt;
43
+ let tools = [
41
44
  new os_agent_tools_1.AgentErrorTool(),
42
45
  new os_agent_tools_1.PrintTool(),
43
- new os_agent_tools_1.ScreenShotTool(this.osAgentHandler),
44
- new os_agent_tools_1.MouseMoveTool(this.osAgentHandler),
45
- new os_agent_tools_1.MouseClickTool(this.osAgentHandler),
46
- new os_agent_tools_1.MouseScrollTool(this.osAgentHandler),
47
- new os_agent_tools_1.TypeTool(this.osAgentHandler),
48
- new os_agent_tools_1.DesktopPressAndReleaseKeysTool(this.osAgentHandler),
49
- new os_agent_tools_1.DesktopKeyHoldDownTool(this.osAgentHandler),
50
- new os_agent_tools_1.DesktopKeyReleaseTool(this.osAgentHandler),
51
- new os_agent_tools_1.MouseHoldLeftButtonDownTool(this.osAgentHandler),
52
- new os_agent_tools_1.MouseReleaseLeftButtonTool(this.osAgentHandler),
53
- new os_agent_tools_1.MouseDragAndDropTool(this.osAgentHandler),
54
46
  new os_agent_tools_1.WaitTool(),
55
- ];
56
- this.setTools(tools);
57
- this.setSystemPrompt(AskUIAgent.DesktopSystemPrompt);
58
- });
59
- }
60
- configureAsAndroidAgent() {
61
- return __awaiter(this, void 0, void 0, function* () {
62
- if (!this.osAgentHandler) {
63
- throw new Error('Agent OS client is not connected');
64
- }
65
- const tools = [
66
- new os_agent_tools_1.AgentErrorTool(),
67
47
  new os_agent_tools_1.ScreenShotTool(this.osAgentHandler),
68
- new os_agent_tools_1.MouseMoveTool(this.osAgentHandler),
69
- new os_agent_tools_1.MouseClickTool(this.osAgentHandler),
70
- new os_agent_tools_1.MouseScrollTool(this.osAgentHandler),
71
- new os_agent_tools_1.AndroidSingleKeyPressTool(this.osAgentHandler),
72
- new os_agent_tools_1.AndroidSequenceKeyPressTool(this.osAgentHandler),
73
48
  new os_agent_tools_1.TypeTool(this.osAgentHandler),
74
- new os_agent_tools_1.ExecuteShellCommandTool(this.osAgentHandler),
75
- new os_agent_tools_1.WaitTool(),
76
49
  ];
50
+ if (this.runtime === 'desktop') {
51
+ tools = [
52
+ ...tools,
53
+ new os_agent_tools_1.MouseMoveTool(this.osAgentHandler),
54
+ new os_agent_tools_1.MouseClickTool(this.osAgentHandler),
55
+ new os_agent_tools_1.MouseScrollTool(this.osAgentHandler),
56
+ new os_agent_tools_1.DesktopPressAndReleaseKeysTool(this.osAgentHandler),
57
+ new os_agent_tools_1.DesktopKeyHoldDownTool(this.osAgentHandler),
58
+ new os_agent_tools_1.DesktopKeyReleaseTool(this.osAgentHandler),
59
+ new os_agent_tools_1.MouseHoldLeftButtonDownTool(this.osAgentHandler),
60
+ new os_agent_tools_1.MouseReleaseLeftButtonTool(this.osAgentHandler),
61
+ new os_agent_tools_1.MouseDragAndDropTool(this.osAgentHandler),
62
+ new os_agent_tools_1.ExecuteShellCommandTool(this.osAgentHandler),
63
+ ];
64
+ }
65
+ if (this.runtime === 'android') {
66
+ tools = [
67
+ ...tools,
68
+ new os_agent_tools_1.AndroidSingleKeyPressTool(this.osAgentHandler),
69
+ new os_agent_tools_1.AndroidSequenceKeyPressTool(this.osAgentHandler),
70
+ new os_agent_tools_1.AndroidSwipeTool(this.osAgentHandler),
71
+ new os_agent_tools_1.AndroidDragAndDropTool(this.osAgentHandler),
72
+ new os_agent_tools_1.AndroidTapTool(this.osAgentHandler),
73
+ new os_agent_tools_1.AndroidShellCommandTool(this.osAgentHandler),
74
+ ];
75
+ systemPrompt = AskUIAgent.AndroidSystemPrompt;
76
+ }
77
77
  this.setTools(tools);
78
- this.setSystemPrompt(AskUIAgent.AndroidSystemPrompt);
78
+ this.setSystemPrompt(systemPrompt);
79
79
  });
80
80
  }
81
81
  }
@@ -5,13 +5,14 @@ import { ExecutionRuntime } from '../../../../execution/execution-runtime';
5
5
  import { ControlCommand } from '../../../ui-control-commands';
6
6
  export declare class OsAgentHandler {
7
7
  private AgentOsClient;
8
- private targetResolution;
9
8
  private screenDimensions;
9
+ runtime: 'android' | 'desktop';
10
+ private targetResolution;
10
11
  private paddingInfo;
11
12
  constructor(AgentOsClient: ExecutionRuntime, screenDimensions: {
12
13
  width: number;
13
14
  height: number;
14
- });
15
+ }, runtime: 'android' | 'desktop');
15
16
  private updatePaddingInfo;
16
17
  static createInstance(AgentOsClient: ExecutionRuntime): Promise<OsAgentHandler>;
17
18
  getTargetResolution(): {
@@ -27,7 +28,7 @@ export declare class OsAgentHandler {
27
28
  scaleCoordinates(source: 'api' | 'computer', x: number, y: number): [number, number];
28
29
  requestControl(controlCommand: ControlCommand): Promise<void>;
29
30
  mouseMove(x: number, y: number): Promise<void>;
30
- mouseClick(button: "left" | "right" | "middle", doubleClick: boolean): Promise<void>;
31
+ mouseClick(button: 'left' | 'right' | 'middle', doubleClick: boolean): Promise<void>;
31
32
  mouseScroll(dx: number, dy: number): Promise<void>;
32
33
  mouseHoldLeftButtonDown(): Promise<void>;
33
34
  mouseReleaseLeftButton(): Promise<void>;
@@ -38,6 +39,10 @@ export declare class OsAgentHandler {
38
39
  androidKeyPress(key: ANDROID_KEY): Promise<void>;
39
40
  androidKeySequencePress(keys: ANDROID_KEY[]): Promise<void>;
40
41
  executeShellCommand(command: string): Promise<void>;
42
+ AndroidSwipeTool(startX: number, startY: number, endX: number, endY: number): Promise<void>;
43
+ AndroidDragAndDropTool(startX: number, startY: number, endX: number, endY: number): Promise<void>;
44
+ AndroidTapTool(x: number, y: number): Promise<void>;
45
+ executeAndroidShellCommand(command: string): Promise<void>;
41
46
  }
42
47
  export declare class ScreenShotTool extends BaseAgentTool {
43
48
  private osAgentHandler;
@@ -175,3 +180,42 @@ export declare class PrintTool extends BaseAgentTool {
175
180
  }): Promise<ToolResult>;
176
181
  toParams(): BetaTool;
177
182
  }
183
+ export declare class AndroidSwipeTool extends BaseAgentTool {
184
+ private osAgentHandler;
185
+ constructor(osAgentHandler: OsAgentHandler);
186
+ execute(command: {
187
+ startX: number;
188
+ startY: number;
189
+ endX: number;
190
+ endY: number;
191
+ }): Promise<ToolResult>;
192
+ toParams(): BetaTool;
193
+ }
194
+ export declare class AndroidDragAndDropTool extends BaseAgentTool {
195
+ private osAgentHandler;
196
+ constructor(osAgentHandler: OsAgentHandler);
197
+ execute(command: {
198
+ startX: number;
199
+ startY: number;
200
+ endX: number;
201
+ endY: number;
202
+ }): Promise<ToolResult>;
203
+ toParams(): BetaTool;
204
+ }
205
+ export declare class AndroidTapTool extends BaseAgentTool {
206
+ private osAgentHandler;
207
+ constructor(osAgentHandler: OsAgentHandler);
208
+ execute(command: {
209
+ x: number;
210
+ y: number;
211
+ }): Promise<ToolResult>;
212
+ toParams(): BetaTool;
213
+ }
214
+ export declare class AndroidShellCommandTool extends BaseAgentTool {
215
+ private osAgentHandler;
216
+ constructor(osAgentHandler: OsAgentHandler);
217
+ execute(command: {
218
+ command: string;
219
+ }): Promise<ToolResult>;
220
+ toParams(): BetaTool;
221
+ }
@@ -9,15 +9,17 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
9
9
  });
10
10
  };
11
11
  Object.defineProperty(exports, "__esModule", { value: true });
12
- exports.PrintTool = exports.WaitTool = exports.ExecuteShellCommandTool = exports.AgentErrorTool = exports.AndroidSequenceKeyPressTool = exports.AndroidSingleKeyPressTool = exports.TypeTool = exports.DesktopKeyReleaseTool = exports.DesktopKeyHoldDownTool = exports.DesktopPressAndReleaseKeysTool = exports.MouseReleaseLeftButtonTool = exports.MouseHoldLeftButtonDownTool = exports.MouseDragAndDropTool = exports.MouseScrollTool = exports.MouseClickTool = exports.MouseMoveTool = exports.ScreenShotTool = exports.OsAgentHandler = void 0;
12
+ exports.AndroidShellCommandTool = exports.AndroidTapTool = exports.AndroidDragAndDropTool = exports.AndroidSwipeTool = exports.PrintTool = exports.WaitTool = exports.ExecuteShellCommandTool = exports.AgentErrorTool = exports.AndroidSequenceKeyPressTool = exports.AndroidSingleKeyPressTool = exports.TypeTool = exports.DesktopKeyReleaseTool = exports.DesktopKeyHoldDownTool = exports.DesktopPressAndReleaseKeysTool = exports.MouseReleaseLeftButtonTool = exports.MouseHoldLeftButtonDownTool = exports.MouseDragAndDropTool = exports.MouseScrollTool = exports.MouseClickTool = exports.MouseMoveTool = exports.ScreenShotTool = exports.OsAgentHandler = void 0;
13
13
  const dsl_1 = require("../../../../execution/dsl");
14
14
  const base_1 = require("./base");
15
15
  const ui_control_commands_1 = require("../../../ui-control-commands");
16
16
  const base_64_image_1 = require("../../../../utils/base_64_image/base-64-image");
17
17
  const agent_errors_1 = require("./agent-errors");
18
18
  class OsAgentHandler {
19
- constructor(AgentOsClient, screenDimensions) {
19
+ constructor(AgentOsClient, screenDimensions, runtime) {
20
20
  this.AgentOsClient = AgentOsClient;
21
+ this.screenDimensions = screenDimensions;
22
+ this.runtime = runtime;
21
23
  this.targetResolution = { width: 1280, height: 800 };
22
24
  this.paddingInfo = null;
23
25
  this.screenDimensions = screenDimensions;
@@ -48,7 +50,7 @@ class OsAgentHandler {
48
50
  scaledWidth,
49
51
  scaledHeight,
50
52
  padLeft,
51
- padTop
53
+ padTop,
52
54
  };
53
55
  }
54
56
  // Add image support to act, an check for function overload in typescript.
@@ -56,10 +58,12 @@ class OsAgentHandler {
56
58
  return __awaiter(this, void 0, void 0, function* () {
57
59
  const base64ImageString = yield AgentOsClient.getScreenshot();
58
60
  const image_info = yield (yield base_64_image_1.Base64Image.fromString(base64ImageString)).getInfo();
61
+ const startingArguments = yield AgentOsClient.getStartingArguments();
62
+ const runtime = startingArguments['runtime'] === 'android' ? 'android' : 'desktop';
59
63
  return new OsAgentHandler(AgentOsClient, {
60
64
  width: image_info.width,
61
65
  height: image_info.height,
62
- });
66
+ }, runtime);
63
67
  });
64
68
  }
65
69
  getTargetResolution() {
@@ -90,7 +94,7 @@ class OsAgentHandler {
90
94
  if (!this.paddingInfo) {
91
95
  throw new base_1.ToolError('Padding information not initialized');
92
96
  }
93
- const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop } = this.paddingInfo;
97
+ const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop, } = this.paddingInfo;
94
98
  if (source === 'api') {
95
99
  if (x > this.targetResolution.width || y > this.targetResolution.height || x < 0 || y < 0) {
96
100
  throw new base_1.ToolError(`Coordinates ${x}, ${y} are outside screen bounds `
@@ -131,23 +135,21 @@ class OsAgentHandler {
131
135
  return __awaiter(this, void 0, void 0, function* () {
132
136
  let action = ui_control_commands_1.InputEvent.MOUSE_CLICK_LEFT;
133
137
  if (doubleClick) {
134
- if (button === "left") {
138
+ if (button === 'left') {
135
139
  action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_LEFT;
136
140
  }
137
- else if (button === "right") {
141
+ else if (button === 'right') {
138
142
  action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_RIGHT;
139
143
  }
140
- else if (button === "middle") {
144
+ else if (button === 'middle') {
141
145
  action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE;
142
146
  }
143
147
  }
144
- else {
145
- if (button === "right") {
146
- action = ui_control_commands_1.InputEvent.MOUSE_CLICK_RIGHT;
147
- }
148
- else if (button === "middle") {
149
- action = ui_control_commands_1.InputEvent.MOUSE_CLICK_MIDDLE;
150
- }
148
+ else if (button === 'right') {
149
+ action = ui_control_commands_1.InputEvent.MOUSE_CLICK_RIGHT;
150
+ }
151
+ else if (button === 'middle') {
152
+ action = ui_control_commands_1.InputEvent.MOUSE_CLICK_MIDDLE;
151
153
  }
152
154
  const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(action, { x: 0, y: 0 }, '', {})]);
153
155
  yield this.requestControl(controlCommand);
@@ -184,8 +186,8 @@ class OsAgentHandler {
184
186
  desktopKeyHoldDown(key_1) {
185
187
  return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
186
188
  const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.KEY_PRESS, { x: 0, y: 0 }, '', {
187
- key: key,
188
- modifiers: modifiers,
189
+ key,
190
+ modifiers,
189
191
  })]);
190
192
  yield this.requestControl(controlCommand);
191
193
  });
@@ -193,8 +195,8 @@ class OsAgentHandler {
193
195
  desktopKeyRelease(key_1) {
194
196
  return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
195
197
  const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.KEY_RELEASE, { x: 0, y: 0 }, '', {
196
- key: key,
197
- modifiers: modifiers,
198
+ key,
199
+ modifiers,
198
200
  })]);
199
201
  yield this.requestControl(controlCommand);
200
202
  });
@@ -223,6 +225,47 @@ class OsAgentHandler {
223
225
  yield this.requestControl(controlCommand);
224
226
  });
225
227
  }
228
+ AndroidSwipeTool(startX, startY, endX, endY) {
229
+ return __awaiter(this, void 0, void 0, function* () {
230
+ if (this.runtime !== 'android') {
231
+ throw new base_1.ToolError('This tool is only available on Android devices');
232
+ }
233
+ [startX, startY] = this.scaleCoordinates('api', startX, startY);
234
+ [endX, endY] = this.scaleCoordinates('api', endX, endY);
235
+ const adbCommand = `input swipe ${startX} ${startY} ${endX} ${endY}`;
236
+ yield this.executeShellCommand(adbCommand);
237
+ });
238
+ }
239
+ AndroidDragAndDropTool(startX, startY, endX, endY) {
240
+ return __awaiter(this, void 0, void 0, function* () {
241
+ if (this.runtime !== 'android') {
242
+ throw new base_1.ToolError('This tool is only available on Android devices');
243
+ }
244
+ [startX, startY] = this.scaleCoordinates('api', startX, startY);
245
+ [endX, endY] = this.scaleCoordinates('api', endX, endY);
246
+ const adbCommand = `input draganddrop ${startX} ${startY} ${endX} ${endY}`;
247
+ yield this.executeShellCommand(adbCommand);
248
+ });
249
+ }
250
+ AndroidTapTool(x, y) {
251
+ return __awaiter(this, void 0, void 0, function* () {
252
+ if (this.runtime !== 'android') {
253
+ throw new base_1.ToolError('This tool is only available on Android devices');
254
+ }
255
+ [x, y] = this.scaleCoordinates('api', x, y);
256
+ const adbCommand = `input tap ${x} ${y}`;
257
+ yield this.executeShellCommand(adbCommand);
258
+ });
259
+ }
260
+ executeAndroidShellCommand(command) {
261
+ return __awaiter(this, void 0, void 0, function* () {
262
+ if (this.runtime !== 'android') {
263
+ throw new base_1.ToolError('This tool is only available on Android devices');
264
+ }
265
+ command = command.replace(/^adb shell /, '');
266
+ yield this.executeShellCommand(command);
267
+ });
268
+ }
226
269
  }
227
270
  exports.OsAgentHandler = OsAgentHandler;
228
271
  class ScreenShotTool extends base_1.BaseAgentTool {
@@ -605,7 +648,8 @@ class AndroidSingleKeyPressTool extends base_1.BaseAgentTool {
605
648
  }
606
649
  execute(command) {
607
650
  return __awaiter(this, void 0, void 0, function* () {
608
- yield this.osAgentHandler.androidKeyPress(command.key);
651
+ const adbCommand = `input keyevent ${command.key.toUpperCase()}`;
652
+ yield this.osAgentHandler.executeShellCommand(adbCommand);
609
653
  return {
610
654
  output: `Pressed Android key ${command.key}`,
611
655
  };
@@ -637,7 +681,8 @@ class AndroidSequenceKeyPressTool extends base_1.BaseAgentTool {
637
681
  }
638
682
  execute(command) {
639
683
  return __awaiter(this, void 0, void 0, function* () {
640
- yield this.osAgentHandler.androidKeySequencePress(command.keys);
684
+ const adbCommand = `input keyevent ${command.keys.map((key) => key.toUpperCase()).join(' ')}`;
685
+ yield this.osAgentHandler.executeShellCommand(adbCommand);
641
686
  return {
642
687
  output: `Pressed Android keys: ${command.keys.join(', ')}`,
643
688
  };
@@ -708,7 +753,7 @@ class ExecuteShellCommandTool extends base_1.BaseAgentTool {
708
753
  toParams() {
709
754
  return {
710
755
  name: 'execute_shell_command_tool',
711
- description: 'Executes a shell command',
756
+ description: 'Executes a shell command. It does not return the output of the command.',
712
757
  input_schema: {
713
758
  type: 'object',
714
759
  properties: {
@@ -729,7 +774,7 @@ class WaitTool extends base_1.BaseAgentTool {
729
774
  }
730
775
  execute(command) {
731
776
  return __awaiter(this, void 0, void 0, function* () {
732
- yield new Promise(resolve => setTimeout(resolve, command.milliseconds));
777
+ yield new Promise((resolve) => setTimeout(resolve, command.milliseconds));
733
778
  return {
734
779
  output: `Waited for ${command.milliseconds} milliseconds`,
735
780
  };
@@ -783,3 +828,155 @@ class PrintTool extends base_1.BaseAgentTool {
783
828
  }
784
829
  }
785
830
  exports.PrintTool = PrintTool;
831
+ class AndroidSwipeTool extends base_1.BaseAgentTool {
832
+ constructor(osAgentHandler) {
833
+ super();
834
+ this.osAgentHandler = osAgentHandler;
835
+ }
836
+ execute(command) {
837
+ return __awaiter(this, void 0, void 0, function* () {
838
+ yield this.osAgentHandler.AndroidSwipeTool(command.startX, command.startY, command.endX, command.endY);
839
+ return {
840
+ output: `Swiped from ${command.startX}, ${command.startY} to ${command.endX}, ${command.endY} on the screen`,
841
+ };
842
+ });
843
+ }
844
+ toParams() {
845
+ return {
846
+ name: 'android_swipe_tool',
847
+ description: 'Swipes from a starting point to an ending point on the screen',
848
+ input_schema: {
849
+ type: 'object',
850
+ properties: {
851
+ startX: {
852
+ type: 'number',
853
+ description: 'The x (pixels from the left edge) coordinate of the start position',
854
+ },
855
+ startY: {
856
+ type: 'number',
857
+ description: 'The y (pixels from the top edge) coordinate of the start position',
858
+ },
859
+ endX: {
860
+ type: 'number',
861
+ description: 'The x (pixels from the left edge) coordinate of the end position',
862
+ },
863
+ endY: {
864
+ type: 'number',
865
+ description: 'The y (pixels from the top edge) coordinate of the end position',
866
+ },
867
+ },
868
+ required: ['startX', 'startY', 'endX', 'endY'],
869
+ },
870
+ };
871
+ }
872
+ }
873
+ exports.AndroidSwipeTool = AndroidSwipeTool;
874
+ class AndroidDragAndDropTool extends base_1.BaseAgentTool {
875
+ constructor(osAgentHandler) {
876
+ super();
877
+ this.osAgentHandler = osAgentHandler;
878
+ }
879
+ execute(command) {
880
+ return __awaiter(this, void 0, void 0, function* () {
881
+ yield this.osAgentHandler.AndroidDragAndDropTool(command.startX, command.startY, command.endX, command.endY);
882
+ return {
883
+ output: `Dragged and dropped from ${command.startX}, ${command.startY} to ${command.endX}, ${command.endY} on the screen`,
884
+ };
885
+ });
886
+ }
887
+ toParams() {
888
+ return {
889
+ name: 'android_drag_and_drop_tool',
890
+ description: 'Drags and drops from a starting point to an ending point on the screen',
891
+ input_schema: {
892
+ type: 'object',
893
+ properties: {
894
+ startX: {
895
+ type: 'number',
896
+ description: 'The x (pixels from the left edge) coordinate of the start position',
897
+ },
898
+ startY: {
899
+ type: 'number',
900
+ description: 'The y (pixels from the top edge) coordinate of the start position',
901
+ },
902
+ endX: {
903
+ type: 'number',
904
+ description: 'The x (pixels from the left edge) coordinate of the end position',
905
+ },
906
+ endY: {
907
+ type: 'number',
908
+ description: 'The y (pixels from the top edge) coordinate of the end position',
909
+ },
910
+ },
911
+ required: ['startX', 'startY', 'endX', 'endY'],
912
+ },
913
+ };
914
+ }
915
+ }
916
+ exports.AndroidDragAndDropTool = AndroidDragAndDropTool;
917
+ class AndroidTapTool extends base_1.BaseAgentTool {
918
+ constructor(osAgentHandler) {
919
+ super();
920
+ this.osAgentHandler = osAgentHandler;
921
+ }
922
+ execute(command) {
923
+ return __awaiter(this, void 0, void 0, function* () {
924
+ yield this.osAgentHandler.AndroidTapTool(command.x, command.y);
925
+ return {
926
+ output: `Tapped the screen at ${command.x}, ${command.y}`,
927
+ };
928
+ });
929
+ }
930
+ toParams() {
931
+ return {
932
+ name: 'android_tap_tool',
933
+ description: 'Taps the screen at the specified coordinates',
934
+ input_schema: {
935
+ type: 'object',
936
+ properties: {
937
+ x: {
938
+ type: 'number',
939
+ description: 'The x (pixels from the left edge) coordinate of the tap position',
940
+ },
941
+ y: {
942
+ type: 'number',
943
+ description: 'The y (pixels from the top edge) coordinate of the tap position',
944
+ },
945
+ },
946
+ required: ['x', 'y'],
947
+ },
948
+ };
949
+ }
950
+ }
951
+ exports.AndroidTapTool = AndroidTapTool;
952
+ class AndroidShellCommandTool extends base_1.BaseAgentTool {
953
+ constructor(osAgentHandler) {
954
+ super();
955
+ this.osAgentHandler = osAgentHandler;
956
+ }
957
+ execute(command) {
958
+ return __awaiter(this, void 0, void 0, function* () {
959
+ yield this.osAgentHandler.executeAndroidShellCommand(command.command);
960
+ return {
961
+ output: `Executed shell command: ${command.command}`,
962
+ };
963
+ });
964
+ }
965
+ toParams() {
966
+ return {
967
+ name: 'android_shell_command_tool',
968
+ description: 'Executes a shell command on the Android device. It does not return the output of the command.',
969
+ input_schema: {
970
+ type: 'object',
971
+ properties: {
972
+ command: {
973
+ type: 'string',
974
+ description: 'The shell command to execute without the "adb shell" prefix',
975
+ },
976
+ },
977
+ required: ['command'],
978
+ },
979
+ };
980
+ }
981
+ }
982
+ exports.AndroidShellCommandTool = AndroidShellCommandTool;
@@ -563,7 +563,6 @@ export declare class UiControlClient extends ApiCommands {
563
563
  * #### Cross-Platform Coordination
564
564
  * ```typescript
565
565
  * // Share context between desktop and mobile agents
566
- * await auiAndroid.agent.configureAsAndroidAgent();
567
566
  *
568
567
  * const history = await auiDesktop.act("Copy username from desktop app");
569
568
  * await auiAndroid.act("Paste username into mobile login", {
@@ -46,7 +46,7 @@ class UiControlClient extends dsl_1.ApiCommands {
46
46
  return __awaiter(this, void 0, void 0, function* () {
47
47
  const connectionState = yield this.executionRuntime.connect();
48
48
  yield this.agent.initializeOsAgentHandler();
49
- yield this.agent.configureAsDesktopAgent();
49
+ yield this.agent.configureAgent();
50
50
  return connectionState;
51
51
  });
52
52
  }
@@ -4,12 +4,12 @@ import { ExecutionRuntime } from '../../../execution/execution-runtime';
4
4
  export declare class AskUIAgent extends ClaudeAgent {
5
5
  private osAgentHandler;
6
6
  private executionRuntime;
7
+ private runtime;
7
8
  constructor(executionRuntime: ExecutionRuntime);
8
9
  isConnected(): boolean;
9
10
  initializeOsAgentHandler(): Promise<void>;
10
11
  getOsAgentHandler(): OsAgentHandler;
11
- configureAsDesktopAgent(): Promise<void>;
12
- configureAsAndroidAgent(): Promise<void>;
12
+ configureAgent(): Promise<void>;
13
13
  private static DesktopSystemPrompt;
14
14
  private static AndroidSystemPrompt;
15
15
  }
@@ -7,12 +7,13 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
7
7
  step((generator = generator.apply(thisArg, _arguments || [])).next());
8
8
  });
9
9
  };
10
- import { DesktopPressAndReleaseKeysTool, MouseClickTool, MouseMoveTool, MouseScrollTool, OsAgentHandler, ScreenShotTool, TypeTool, AgentErrorTool, AndroidSequenceKeyPressTool, AndroidSingleKeyPressTool, ExecuteShellCommandTool, DesktopKeyHoldDownTool, DesktopKeyReleaseTool, MouseReleaseLeftButtonTool, MouseHoldLeftButtonDownTool, MouseDragAndDropTool, WaitTool, PrintTool, } from './tools/os-agent-tools';
10
+ import { DesktopPressAndReleaseKeysTool, MouseClickTool, MouseMoveTool, MouseScrollTool, OsAgentHandler, ScreenShotTool, TypeTool, AgentErrorTool, AndroidSequenceKeyPressTool, AndroidSingleKeyPressTool, ExecuteShellCommandTool, DesktopKeyHoldDownTool, DesktopKeyReleaseTool, MouseReleaseLeftButtonTool, MouseHoldLeftButtonDownTool, MouseDragAndDropTool, WaitTool, PrintTool, AndroidSwipeTool, AndroidDragAndDropTool, AndroidTapTool, AndroidShellCommandTool, } from './tools/os-agent-tools';
11
11
  import { ClaudeAgent } from './claude-agent';
12
12
  export class AskUIAgent extends ClaudeAgent {
13
13
  constructor(executionRuntime) {
14
14
  super((params) => executionRuntime.predictActResponse(params));
15
15
  this.osAgentHandler = undefined;
16
+ this.runtime = 'desktop';
16
17
  this.executionRuntime = executionRuntime;
17
18
  }
18
19
  isConnected() {
@@ -21,6 +22,7 @@ export class AskUIAgent extends ClaudeAgent {
21
22
  initializeOsAgentHandler() {
22
23
  return __awaiter(this, void 0, void 0, function* () {
23
24
  this.osAgentHandler = yield OsAgentHandler.createInstance(this.executionRuntime);
25
+ this.runtime = this.osAgentHandler.runtime;
24
26
  });
25
27
  }
26
28
  getOsAgentHandler() {
@@ -29,50 +31,48 @@ export class AskUIAgent extends ClaudeAgent {
29
31
  }
30
32
  return this.osAgentHandler;
31
33
  }
32
- configureAsDesktopAgent() {
34
+ configureAgent() {
33
35
  return __awaiter(this, void 0, void 0, function* () {
34
36
  if (!this.osAgentHandler) {
35
37
  throw new Error('Agent OS client is not connected');
36
38
  }
37
- const tools = [
39
+ let systemPrompt = AskUIAgent.DesktopSystemPrompt;
40
+ let tools = [
38
41
  new AgentErrorTool(),
39
42
  new PrintTool(),
40
- new ScreenShotTool(this.osAgentHandler),
41
- new MouseMoveTool(this.osAgentHandler),
42
- new MouseClickTool(this.osAgentHandler),
43
- new MouseScrollTool(this.osAgentHandler),
44
- new TypeTool(this.osAgentHandler),
45
- new DesktopPressAndReleaseKeysTool(this.osAgentHandler),
46
- new DesktopKeyHoldDownTool(this.osAgentHandler),
47
- new DesktopKeyReleaseTool(this.osAgentHandler),
48
- new MouseHoldLeftButtonDownTool(this.osAgentHandler),
49
- new MouseReleaseLeftButtonTool(this.osAgentHandler),
50
- new MouseDragAndDropTool(this.osAgentHandler),
51
43
  new WaitTool(),
52
- ];
53
- this.setTools(tools);
54
- this.setSystemPrompt(AskUIAgent.DesktopSystemPrompt);
55
- });
56
- }
57
- configureAsAndroidAgent() {
58
- return __awaiter(this, void 0, void 0, function* () {
59
- if (!this.osAgentHandler) {
60
- throw new Error('Agent OS client is not connected');
61
- }
62
- const tools = [
63
- new AgentErrorTool(),
64
44
  new ScreenShotTool(this.osAgentHandler),
65
- new MouseMoveTool(this.osAgentHandler),
66
- new MouseClickTool(this.osAgentHandler),
67
- new MouseScrollTool(this.osAgentHandler),
68
- new AndroidSingleKeyPressTool(this.osAgentHandler),
69
- new AndroidSequenceKeyPressTool(this.osAgentHandler),
70
45
  new TypeTool(this.osAgentHandler),
71
- new ExecuteShellCommandTool(this.osAgentHandler),
72
- new WaitTool(),
73
46
  ];
47
+ if (this.runtime === 'desktop') {
48
+ tools = [
49
+ ...tools,
50
+ new MouseMoveTool(this.osAgentHandler),
51
+ new MouseClickTool(this.osAgentHandler),
52
+ new MouseScrollTool(this.osAgentHandler),
53
+ new DesktopPressAndReleaseKeysTool(this.osAgentHandler),
54
+ new DesktopKeyHoldDownTool(this.osAgentHandler),
55
+ new DesktopKeyReleaseTool(this.osAgentHandler),
56
+ new MouseHoldLeftButtonDownTool(this.osAgentHandler),
57
+ new MouseReleaseLeftButtonTool(this.osAgentHandler),
58
+ new MouseDragAndDropTool(this.osAgentHandler),
59
+ new ExecuteShellCommandTool(this.osAgentHandler),
60
+ ];
61
+ }
62
+ if (this.runtime === 'android') {
63
+ tools = [
64
+ ...tools,
65
+ new AndroidSingleKeyPressTool(this.osAgentHandler),
66
+ new AndroidSequenceKeyPressTool(this.osAgentHandler),
67
+ new AndroidSwipeTool(this.osAgentHandler),
68
+ new AndroidDragAndDropTool(this.osAgentHandler),
69
+ new AndroidTapTool(this.osAgentHandler),
70
+ new AndroidShellCommandTool(this.osAgentHandler),
71
+ ];
72
+ systemPrompt = AskUIAgent.AndroidSystemPrompt;
73
+ }
74
74
  this.setTools(tools);
75
- this.setSystemPrompt(AskUIAgent.AndroidSystemPrompt);
75
+ this.setSystemPrompt(systemPrompt);
76
76
  });
77
77
  }
78
78
  }
@@ -5,13 +5,14 @@ import { ExecutionRuntime } from '../../../../execution/execution-runtime';
5
5
  import { ControlCommand } from '../../../ui-control-commands';
6
6
  export declare class OsAgentHandler {
7
7
  private AgentOsClient;
8
- private targetResolution;
9
8
  private screenDimensions;
9
+ runtime: 'android' | 'desktop';
10
+ private targetResolution;
10
11
  private paddingInfo;
11
12
  constructor(AgentOsClient: ExecutionRuntime, screenDimensions: {
12
13
  width: number;
13
14
  height: number;
14
- });
15
+ }, runtime: 'android' | 'desktop');
15
16
  private updatePaddingInfo;
16
17
  static createInstance(AgentOsClient: ExecutionRuntime): Promise<OsAgentHandler>;
17
18
  getTargetResolution(): {
@@ -27,7 +28,7 @@ export declare class OsAgentHandler {
27
28
  scaleCoordinates(source: 'api' | 'computer', x: number, y: number): [number, number];
28
29
  requestControl(controlCommand: ControlCommand): Promise<void>;
29
30
  mouseMove(x: number, y: number): Promise<void>;
30
- mouseClick(button: "left" | "right" | "middle", doubleClick: boolean): Promise<void>;
31
+ mouseClick(button: 'left' | 'right' | 'middle', doubleClick: boolean): Promise<void>;
31
32
  mouseScroll(dx: number, dy: number): Promise<void>;
32
33
  mouseHoldLeftButtonDown(): Promise<void>;
33
34
  mouseReleaseLeftButton(): Promise<void>;
@@ -38,6 +39,10 @@ export declare class OsAgentHandler {
38
39
  androidKeyPress(key: ANDROID_KEY): Promise<void>;
39
40
  androidKeySequencePress(keys: ANDROID_KEY[]): Promise<void>;
40
41
  executeShellCommand(command: string): Promise<void>;
42
+ AndroidSwipeTool(startX: number, startY: number, endX: number, endY: number): Promise<void>;
43
+ AndroidDragAndDropTool(startX: number, startY: number, endX: number, endY: number): Promise<void>;
44
+ AndroidTapTool(x: number, y: number): Promise<void>;
45
+ executeAndroidShellCommand(command: string): Promise<void>;
41
46
  }
42
47
  export declare class ScreenShotTool extends BaseAgentTool {
43
48
  private osAgentHandler;
@@ -175,3 +180,42 @@ export declare class PrintTool extends BaseAgentTool {
175
180
  }): Promise<ToolResult>;
176
181
  toParams(): BetaTool;
177
182
  }
183
+ export declare class AndroidSwipeTool extends BaseAgentTool {
184
+ private osAgentHandler;
185
+ constructor(osAgentHandler: OsAgentHandler);
186
+ execute(command: {
187
+ startX: number;
188
+ startY: number;
189
+ endX: number;
190
+ endY: number;
191
+ }): Promise<ToolResult>;
192
+ toParams(): BetaTool;
193
+ }
194
+ export declare class AndroidDragAndDropTool extends BaseAgentTool {
195
+ private osAgentHandler;
196
+ constructor(osAgentHandler: OsAgentHandler);
197
+ execute(command: {
198
+ startX: number;
199
+ startY: number;
200
+ endX: number;
201
+ endY: number;
202
+ }): Promise<ToolResult>;
203
+ toParams(): BetaTool;
204
+ }
205
+ export declare class AndroidTapTool extends BaseAgentTool {
206
+ private osAgentHandler;
207
+ constructor(osAgentHandler: OsAgentHandler);
208
+ execute(command: {
209
+ x: number;
210
+ y: number;
211
+ }): Promise<ToolResult>;
212
+ toParams(): BetaTool;
213
+ }
214
+ export declare class AndroidShellCommandTool extends BaseAgentTool {
215
+ private osAgentHandler;
216
+ constructor(osAgentHandler: OsAgentHandler);
217
+ execute(command: {
218
+ command: string;
219
+ }): Promise<ToolResult>;
220
+ toParams(): BetaTool;
221
+ }
@@ -13,8 +13,10 @@ import { ControlCommand, ControlCommandCode, InputEvent, Action, } from '../../.
13
13
  import { Base64Image } from '../../../../utils/base_64_image/base-64-image';
14
14
  import { AgentError } from './agent-errors';
15
15
  export class OsAgentHandler {
16
- constructor(AgentOsClient, screenDimensions) {
16
+ constructor(AgentOsClient, screenDimensions, runtime) {
17
17
  this.AgentOsClient = AgentOsClient;
18
+ this.screenDimensions = screenDimensions;
19
+ this.runtime = runtime;
18
20
  this.targetResolution = { width: 1280, height: 800 };
19
21
  this.paddingInfo = null;
20
22
  this.screenDimensions = screenDimensions;
@@ -45,7 +47,7 @@ export class OsAgentHandler {
45
47
  scaledWidth,
46
48
  scaledHeight,
47
49
  padLeft,
48
- padTop
50
+ padTop,
49
51
  };
50
52
  }
51
53
  // Add image support to act, an check for function overload in typescript.
@@ -53,10 +55,12 @@ export class OsAgentHandler {
53
55
  return __awaiter(this, void 0, void 0, function* () {
54
56
  const base64ImageString = yield AgentOsClient.getScreenshot();
55
57
  const image_info = yield (yield Base64Image.fromString(base64ImageString)).getInfo();
58
+ const startingArguments = yield AgentOsClient.getStartingArguments();
59
+ const runtime = startingArguments['runtime'] === 'android' ? 'android' : 'desktop';
56
60
  return new OsAgentHandler(AgentOsClient, {
57
61
  width: image_info.width,
58
62
  height: image_info.height,
59
- });
63
+ }, runtime);
60
64
  });
61
65
  }
62
66
  getTargetResolution() {
@@ -87,7 +91,7 @@ export class OsAgentHandler {
87
91
  if (!this.paddingInfo) {
88
92
  throw new ToolError('Padding information not initialized');
89
93
  }
90
- const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop } = this.paddingInfo;
94
+ const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop, } = this.paddingInfo;
91
95
  if (source === 'api') {
92
96
  if (x > this.targetResolution.width || y > this.targetResolution.height || x < 0 || y < 0) {
93
97
  throw new ToolError(`Coordinates ${x}, ${y} are outside screen bounds `
@@ -128,23 +132,21 @@ export class OsAgentHandler {
128
132
  return __awaiter(this, void 0, void 0, function* () {
129
133
  let action = InputEvent.MOUSE_CLICK_LEFT;
130
134
  if (doubleClick) {
131
- if (button === "left") {
135
+ if (button === 'left') {
132
136
  action = InputEvent.MOUSE_CLICK_DOUBLE_LEFT;
133
137
  }
134
- else if (button === "right") {
138
+ else if (button === 'right') {
135
139
  action = InputEvent.MOUSE_CLICK_DOUBLE_RIGHT;
136
140
  }
137
- else if (button === "middle") {
141
+ else if (button === 'middle') {
138
142
  action = InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE;
139
143
  }
140
144
  }
141
- else {
142
- if (button === "right") {
143
- action = InputEvent.MOUSE_CLICK_RIGHT;
144
- }
145
- else if (button === "middle") {
146
- action = InputEvent.MOUSE_CLICK_MIDDLE;
147
- }
145
+ else if (button === 'right') {
146
+ action = InputEvent.MOUSE_CLICK_RIGHT;
147
+ }
148
+ else if (button === 'middle') {
149
+ action = InputEvent.MOUSE_CLICK_MIDDLE;
148
150
  }
149
151
  const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(action, { x: 0, y: 0 }, '', {})]);
150
152
  yield this.requestControl(controlCommand);
@@ -181,8 +183,8 @@ export class OsAgentHandler {
181
183
  desktopKeyHoldDown(key_1) {
182
184
  return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
183
185
  const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.KEY_PRESS, { x: 0, y: 0 }, '', {
184
- key: key,
185
- modifiers: modifiers,
186
+ key,
187
+ modifiers,
186
188
  })]);
187
189
  yield this.requestControl(controlCommand);
188
190
  });
@@ -190,8 +192,8 @@ export class OsAgentHandler {
190
192
  desktopKeyRelease(key_1) {
191
193
  return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
192
194
  const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.KEY_RELEASE, { x: 0, y: 0 }, '', {
193
- key: key,
194
- modifiers: modifiers,
195
+ key,
196
+ modifiers,
195
197
  })]);
196
198
  yield this.requestControl(controlCommand);
197
199
  });
@@ -220,6 +222,47 @@ export class OsAgentHandler {
220
222
  yield this.requestControl(controlCommand);
221
223
  });
222
224
  }
225
+ AndroidSwipeTool(startX, startY, endX, endY) {
226
+ return __awaiter(this, void 0, void 0, function* () {
227
+ if (this.runtime !== 'android') {
228
+ throw new ToolError('This tool is only available on Android devices');
229
+ }
230
+ [startX, startY] = this.scaleCoordinates('api', startX, startY);
231
+ [endX, endY] = this.scaleCoordinates('api', endX, endY);
232
+ const adbCommand = `input swipe ${startX} ${startY} ${endX} ${endY}`;
233
+ yield this.executeShellCommand(adbCommand);
234
+ });
235
+ }
236
+ AndroidDragAndDropTool(startX, startY, endX, endY) {
237
+ return __awaiter(this, void 0, void 0, function* () {
238
+ if (this.runtime !== 'android') {
239
+ throw new ToolError('This tool is only available on Android devices');
240
+ }
241
+ [startX, startY] = this.scaleCoordinates('api', startX, startY);
242
+ [endX, endY] = this.scaleCoordinates('api', endX, endY);
243
+ const adbCommand = `input draganddrop ${startX} ${startY} ${endX} ${endY}`;
244
+ yield this.executeShellCommand(adbCommand);
245
+ });
246
+ }
247
+ AndroidTapTool(x, y) {
248
+ return __awaiter(this, void 0, void 0, function* () {
249
+ if (this.runtime !== 'android') {
250
+ throw new ToolError('This tool is only available on Android devices');
251
+ }
252
+ [x, y] = this.scaleCoordinates('api', x, y);
253
+ const adbCommand = `input tap ${x} ${y}`;
254
+ yield this.executeShellCommand(adbCommand);
255
+ });
256
+ }
257
+ executeAndroidShellCommand(command) {
258
+ return __awaiter(this, void 0, void 0, function* () {
259
+ if (this.runtime !== 'android') {
260
+ throw new ToolError('This tool is only available on Android devices');
261
+ }
262
+ command = command.replace(/^adb shell /, '');
263
+ yield this.executeShellCommand(command);
264
+ });
265
+ }
223
266
  }
224
267
  export class ScreenShotTool extends BaseAgentTool {
225
268
  constructor(osAgentHandler) {
@@ -590,7 +633,8 @@ export class AndroidSingleKeyPressTool extends BaseAgentTool {
590
633
  }
591
634
  execute(command) {
592
635
  return __awaiter(this, void 0, void 0, function* () {
593
- yield this.osAgentHandler.androidKeyPress(command.key);
636
+ const adbCommand = `input keyevent ${command.key.toUpperCase()}`;
637
+ yield this.osAgentHandler.executeShellCommand(adbCommand);
594
638
  return {
595
639
  output: `Pressed Android key ${command.key}`,
596
640
  };
@@ -621,7 +665,8 @@ export class AndroidSequenceKeyPressTool extends BaseAgentTool {
621
665
  }
622
666
  execute(command) {
623
667
  return __awaiter(this, void 0, void 0, function* () {
624
- yield this.osAgentHandler.androidKeySequencePress(command.keys);
668
+ const adbCommand = `input keyevent ${command.keys.map((key) => key.toUpperCase()).join(' ')}`;
669
+ yield this.osAgentHandler.executeShellCommand(adbCommand);
625
670
  return {
626
671
  output: `Pressed Android keys: ${command.keys.join(', ')}`,
627
672
  };
@@ -690,7 +735,7 @@ export class ExecuteShellCommandTool extends BaseAgentTool {
690
735
  toParams() {
691
736
  return {
692
737
  name: 'execute_shell_command_tool',
693
- description: 'Executes a shell command',
738
+ description: 'Executes a shell command. It does not return the output of the command.',
694
739
  input_schema: {
695
740
  type: 'object',
696
741
  properties: {
@@ -710,7 +755,7 @@ export class WaitTool extends BaseAgentTool {
710
755
  }
711
756
  execute(command) {
712
757
  return __awaiter(this, void 0, void 0, function* () {
713
- yield new Promise(resolve => setTimeout(resolve, command.milliseconds));
758
+ yield new Promise((resolve) => setTimeout(resolve, command.milliseconds));
714
759
  return {
715
760
  output: `Waited for ${command.milliseconds} milliseconds`,
716
761
  };
@@ -762,3 +807,151 @@ export class PrintTool extends BaseAgentTool {
762
807
  };
763
808
  }
764
809
  }
810
+ export class AndroidSwipeTool extends BaseAgentTool {
811
+ constructor(osAgentHandler) {
812
+ super();
813
+ this.osAgentHandler = osAgentHandler;
814
+ }
815
+ execute(command) {
816
+ return __awaiter(this, void 0, void 0, function* () {
817
+ yield this.osAgentHandler.AndroidSwipeTool(command.startX, command.startY, command.endX, command.endY);
818
+ return {
819
+ output: `Swiped from ${command.startX}, ${command.startY} to ${command.endX}, ${command.endY} on the screen`,
820
+ };
821
+ });
822
+ }
823
+ toParams() {
824
+ return {
825
+ name: 'android_swipe_tool',
826
+ description: 'Swipes from a starting point to an ending point on the screen',
827
+ input_schema: {
828
+ type: 'object',
829
+ properties: {
830
+ startX: {
831
+ type: 'number',
832
+ description: 'The x (pixels from the left edge) coordinate of the start position',
833
+ },
834
+ startY: {
835
+ type: 'number',
836
+ description: 'The y (pixels from the top edge) coordinate of the start position',
837
+ },
838
+ endX: {
839
+ type: 'number',
840
+ description: 'The x (pixels from the left edge) coordinate of the end position',
841
+ },
842
+ endY: {
843
+ type: 'number',
844
+ description: 'The y (pixels from the top edge) coordinate of the end position',
845
+ },
846
+ },
847
+ required: ['startX', 'startY', 'endX', 'endY'],
848
+ },
849
+ };
850
+ }
851
+ }
852
+ export class AndroidDragAndDropTool extends BaseAgentTool {
853
+ constructor(osAgentHandler) {
854
+ super();
855
+ this.osAgentHandler = osAgentHandler;
856
+ }
857
+ execute(command) {
858
+ return __awaiter(this, void 0, void 0, function* () {
859
+ yield this.osAgentHandler.AndroidDragAndDropTool(command.startX, command.startY, command.endX, command.endY);
860
+ return {
861
+ output: `Dragged and dropped from ${command.startX}, ${command.startY} to ${command.endX}, ${command.endY} on the screen`,
862
+ };
863
+ });
864
+ }
865
+ toParams() {
866
+ return {
867
+ name: 'android_drag_and_drop_tool',
868
+ description: 'Drags and drops from a starting point to an ending point on the screen',
869
+ input_schema: {
870
+ type: 'object',
871
+ properties: {
872
+ startX: {
873
+ type: 'number',
874
+ description: 'The x (pixels from the left edge) coordinate of the start position',
875
+ },
876
+ startY: {
877
+ type: 'number',
878
+ description: 'The y (pixels from the top edge) coordinate of the start position',
879
+ },
880
+ endX: {
881
+ type: 'number',
882
+ description: 'The x (pixels from the left edge) coordinate of the end position',
883
+ },
884
+ endY: {
885
+ type: 'number',
886
+ description: 'The y (pixels from the top edge) coordinate of the end position',
887
+ },
888
+ },
889
+ required: ['startX', 'startY', 'endX', 'endY'],
890
+ },
891
+ };
892
+ }
893
+ }
894
+ export class AndroidTapTool extends BaseAgentTool {
895
+ constructor(osAgentHandler) {
896
+ super();
897
+ this.osAgentHandler = osAgentHandler;
898
+ }
899
+ execute(command) {
900
+ return __awaiter(this, void 0, void 0, function* () {
901
+ yield this.osAgentHandler.AndroidTapTool(command.x, command.y);
902
+ return {
903
+ output: `Tapped the screen at ${command.x}, ${command.y}`,
904
+ };
905
+ });
906
+ }
907
+ toParams() {
908
+ return {
909
+ name: 'android_tap_tool',
910
+ description: 'Taps the screen at the specified coordinates',
911
+ input_schema: {
912
+ type: 'object',
913
+ properties: {
914
+ x: {
915
+ type: 'number',
916
+ description: 'The x (pixels from the left edge) coordinate of the tap position',
917
+ },
918
+ y: {
919
+ type: 'number',
920
+ description: 'The y (pixels from the top edge) coordinate of the tap position',
921
+ },
922
+ },
923
+ required: ['x', 'y'],
924
+ },
925
+ };
926
+ }
927
+ }
928
+ export class AndroidShellCommandTool extends BaseAgentTool {
929
+ constructor(osAgentHandler) {
930
+ super();
931
+ this.osAgentHandler = osAgentHandler;
932
+ }
933
+ execute(command) {
934
+ return __awaiter(this, void 0, void 0, function* () {
935
+ yield this.osAgentHandler.executeAndroidShellCommand(command.command);
936
+ return {
937
+ output: `Executed shell command: ${command.command}`,
938
+ };
939
+ });
940
+ }
941
+ toParams() {
942
+ return {
943
+ name: 'android_shell_command_tool',
944
+ description: 'Executes a shell command on the Android device. It does not return the output of the command.',
945
+ input_schema: {
946
+ type: 'object',
947
+ properties: {
948
+ command: {
949
+ type: 'string',
950
+ description: 'The shell command to execute without the "adb shell" prefix',
951
+ },
952
+ },
953
+ required: ['command'],
954
+ },
955
+ };
956
+ }
957
+ }
@@ -563,7 +563,6 @@ export declare class UiControlClient extends ApiCommands {
563
563
  * #### Cross-Platform Coordination
564
564
  * ```typescript
565
565
  * // Share context between desktop and mobile agents
566
- * await auiAndroid.agent.configureAsAndroidAgent();
567
566
  *
568
567
  * const history = await auiDesktop.act("Copy username from desktop app");
569
568
  * await auiAndroid.act("Paste username into mobile login", {
@@ -43,7 +43,7 @@ export class UiControlClient extends ApiCommands {
43
43
  return __awaiter(this, void 0, void 0, function* () {
44
44
  const connectionState = yield this.executionRuntime.connect();
45
45
  yield this.agent.initializeOsAgentHandler();
46
- yield this.agent.configureAsDesktopAgent();
46
+ yield this.agent.configureAgent();
47
47
  return connectionState;
48
48
  });
49
49
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "askui",
3
- "version": "0.29.0",
3
+ "version": "0.30.0",
4
4
  "license": "MIT",
5
5
  "author": "askui GmbH <info@askui.com> (http://www.askui.com/)",
6
6
  "description": "Reliable, automated end-to-end-testing that depends on what is shown on your screen instead of the technology you are running on",
@@ -1,32 +0,0 @@
1
- import { UiControlClient } from 'askui';
2
- {{ allure_stepreporter_import }}
3
-
4
- // Client is necessary to use the askui API
5
- // eslint-disable-next-line import/no-mutable-exports
6
- let aui: UiControlClient;
7
-
8
- {{ timeout_placeholder }}
9
-
10
- beforeAll(async () => {
11
- aui = await UiControlClient.build({
12
- {{ credentials }}
13
- {{ reporter_placeholder }}
14
- });
15
-
16
- await aui.connect();
17
- });
18
-
19
- beforeEach(async () => {
20
- await aui.startVideoRecording();
21
- });
22
-
23
- afterEach(async () => {
24
- await aui.stopVideoRecording();
25
- {{ allure_stepreporter_attach_video }}
26
- });
27
-
28
- afterAll(async () => {
29
- aui.disconnect();
30
- });
31
-
32
- export { aui };