askui 0.29.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/core/models/anthropic/askui-agent.d.ts +2 -2
- package/dist/cjs/core/models/anthropic/askui-agent.js +33 -33
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +47 -3
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +220 -23
- package/dist/cjs/execution/ui-control-client.d.ts +0 -1
- package/dist/cjs/execution/ui-control-client.js +1 -1
- package/dist/esm/core/models/anthropic/askui-agent.d.ts +2 -2
- package/dist/esm/core/models/anthropic/askui-agent.js +34 -34
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +47 -3
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +215 -22
- package/dist/esm/execution/ui-control-client.d.ts +0 -1
- package/dist/esm/execution/ui-control-client.js +1 -1
- package/package.json +1 -1
- package/dist/example_projects_templates/templates/askui-helper-windows.nj +0 -32
|
@@ -4,12 +4,12 @@ import { ExecutionRuntime } from '../../../execution/execution-runtime';
|
|
|
4
4
|
export declare class AskUIAgent extends ClaudeAgent {
|
|
5
5
|
private osAgentHandler;
|
|
6
6
|
private executionRuntime;
|
|
7
|
+
private runtime;
|
|
7
8
|
constructor(executionRuntime: ExecutionRuntime);
|
|
8
9
|
isConnected(): boolean;
|
|
9
10
|
initializeOsAgentHandler(): Promise<void>;
|
|
10
11
|
getOsAgentHandler(): OsAgentHandler;
|
|
11
|
-
|
|
12
|
-
configureAsAndroidAgent(): Promise<void>;
|
|
12
|
+
configureAgent(): Promise<void>;
|
|
13
13
|
private static DesktopSystemPrompt;
|
|
14
14
|
private static AndroidSystemPrompt;
|
|
15
15
|
}
|
|
@@ -16,6 +16,7 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
|
|
|
16
16
|
constructor(executionRuntime) {
|
|
17
17
|
super((params) => executionRuntime.predictActResponse(params));
|
|
18
18
|
this.osAgentHandler = undefined;
|
|
19
|
+
this.runtime = 'desktop';
|
|
19
20
|
this.executionRuntime = executionRuntime;
|
|
20
21
|
}
|
|
21
22
|
isConnected() {
|
|
@@ -24,6 +25,7 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
|
|
|
24
25
|
initializeOsAgentHandler() {
|
|
25
26
|
return __awaiter(this, void 0, void 0, function* () {
|
|
26
27
|
this.osAgentHandler = yield os_agent_tools_1.OsAgentHandler.createInstance(this.executionRuntime);
|
|
28
|
+
this.runtime = this.osAgentHandler.runtime;
|
|
27
29
|
});
|
|
28
30
|
}
|
|
29
31
|
getOsAgentHandler() {
|
|
@@ -32,50 +34,48 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
|
|
|
32
34
|
}
|
|
33
35
|
return this.osAgentHandler;
|
|
34
36
|
}
|
|
35
|
-
|
|
37
|
+
configureAgent() {
|
|
36
38
|
return __awaiter(this, void 0, void 0, function* () {
|
|
37
39
|
if (!this.osAgentHandler) {
|
|
38
40
|
throw new Error('Agent OS client is not connected');
|
|
39
41
|
}
|
|
40
|
-
|
|
42
|
+
let systemPrompt = AskUIAgent.DesktopSystemPrompt;
|
|
43
|
+
let tools = [
|
|
41
44
|
new os_agent_tools_1.AgentErrorTool(),
|
|
42
45
|
new os_agent_tools_1.PrintTool(),
|
|
43
|
-
new os_agent_tools_1.ScreenShotTool(this.osAgentHandler),
|
|
44
|
-
new os_agent_tools_1.MouseMoveTool(this.osAgentHandler),
|
|
45
|
-
new os_agent_tools_1.MouseClickTool(this.osAgentHandler),
|
|
46
|
-
new os_agent_tools_1.MouseScrollTool(this.osAgentHandler),
|
|
47
|
-
new os_agent_tools_1.TypeTool(this.osAgentHandler),
|
|
48
|
-
new os_agent_tools_1.DesktopPressAndReleaseKeysTool(this.osAgentHandler),
|
|
49
|
-
new os_agent_tools_1.DesktopKeyHoldDownTool(this.osAgentHandler),
|
|
50
|
-
new os_agent_tools_1.DesktopKeyReleaseTool(this.osAgentHandler),
|
|
51
|
-
new os_agent_tools_1.MouseHoldLeftButtonDownTool(this.osAgentHandler),
|
|
52
|
-
new os_agent_tools_1.MouseReleaseLeftButtonTool(this.osAgentHandler),
|
|
53
|
-
new os_agent_tools_1.MouseDragAndDropTool(this.osAgentHandler),
|
|
54
46
|
new os_agent_tools_1.WaitTool(),
|
|
55
|
-
];
|
|
56
|
-
this.setTools(tools);
|
|
57
|
-
this.setSystemPrompt(AskUIAgent.DesktopSystemPrompt);
|
|
58
|
-
});
|
|
59
|
-
}
|
|
60
|
-
configureAsAndroidAgent() {
|
|
61
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
62
|
-
if (!this.osAgentHandler) {
|
|
63
|
-
throw new Error('Agent OS client is not connected');
|
|
64
|
-
}
|
|
65
|
-
const tools = [
|
|
66
|
-
new os_agent_tools_1.AgentErrorTool(),
|
|
67
47
|
new os_agent_tools_1.ScreenShotTool(this.osAgentHandler),
|
|
68
|
-
new os_agent_tools_1.MouseMoveTool(this.osAgentHandler),
|
|
69
|
-
new os_agent_tools_1.MouseClickTool(this.osAgentHandler),
|
|
70
|
-
new os_agent_tools_1.MouseScrollTool(this.osAgentHandler),
|
|
71
|
-
new os_agent_tools_1.AndroidSingleKeyPressTool(this.osAgentHandler),
|
|
72
|
-
new os_agent_tools_1.AndroidSequenceKeyPressTool(this.osAgentHandler),
|
|
73
48
|
new os_agent_tools_1.TypeTool(this.osAgentHandler),
|
|
74
|
-
new os_agent_tools_1.ExecuteShellCommandTool(this.osAgentHandler),
|
|
75
|
-
new os_agent_tools_1.WaitTool(),
|
|
76
49
|
];
|
|
50
|
+
if (this.runtime === 'desktop') {
|
|
51
|
+
tools = [
|
|
52
|
+
...tools,
|
|
53
|
+
new os_agent_tools_1.MouseMoveTool(this.osAgentHandler),
|
|
54
|
+
new os_agent_tools_1.MouseClickTool(this.osAgentHandler),
|
|
55
|
+
new os_agent_tools_1.MouseScrollTool(this.osAgentHandler),
|
|
56
|
+
new os_agent_tools_1.DesktopPressAndReleaseKeysTool(this.osAgentHandler),
|
|
57
|
+
new os_agent_tools_1.DesktopKeyHoldDownTool(this.osAgentHandler),
|
|
58
|
+
new os_agent_tools_1.DesktopKeyReleaseTool(this.osAgentHandler),
|
|
59
|
+
new os_agent_tools_1.MouseHoldLeftButtonDownTool(this.osAgentHandler),
|
|
60
|
+
new os_agent_tools_1.MouseReleaseLeftButtonTool(this.osAgentHandler),
|
|
61
|
+
new os_agent_tools_1.MouseDragAndDropTool(this.osAgentHandler),
|
|
62
|
+
new os_agent_tools_1.ExecuteShellCommandTool(this.osAgentHandler),
|
|
63
|
+
];
|
|
64
|
+
}
|
|
65
|
+
if (this.runtime === 'android') {
|
|
66
|
+
tools = [
|
|
67
|
+
...tools,
|
|
68
|
+
new os_agent_tools_1.AndroidSingleKeyPressTool(this.osAgentHandler),
|
|
69
|
+
new os_agent_tools_1.AndroidSequenceKeyPressTool(this.osAgentHandler),
|
|
70
|
+
new os_agent_tools_1.AndroidSwipeTool(this.osAgentHandler),
|
|
71
|
+
new os_agent_tools_1.AndroidDragAndDropTool(this.osAgentHandler),
|
|
72
|
+
new os_agent_tools_1.AndroidTapTool(this.osAgentHandler),
|
|
73
|
+
new os_agent_tools_1.AndroidShellCommandTool(this.osAgentHandler),
|
|
74
|
+
];
|
|
75
|
+
systemPrompt = AskUIAgent.AndroidSystemPrompt;
|
|
76
|
+
}
|
|
77
77
|
this.setTools(tools);
|
|
78
|
-
this.setSystemPrompt(
|
|
78
|
+
this.setSystemPrompt(systemPrompt);
|
|
79
79
|
});
|
|
80
80
|
}
|
|
81
81
|
}
|
|
@@ -5,13 +5,14 @@ import { ExecutionRuntime } from '../../../../execution/execution-runtime';
|
|
|
5
5
|
import { ControlCommand } from '../../../ui-control-commands';
|
|
6
6
|
export declare class OsAgentHandler {
|
|
7
7
|
private AgentOsClient;
|
|
8
|
-
private targetResolution;
|
|
9
8
|
private screenDimensions;
|
|
9
|
+
runtime: 'android' | 'desktop';
|
|
10
|
+
private targetResolution;
|
|
10
11
|
private paddingInfo;
|
|
11
12
|
constructor(AgentOsClient: ExecutionRuntime, screenDimensions: {
|
|
12
13
|
width: number;
|
|
13
14
|
height: number;
|
|
14
|
-
});
|
|
15
|
+
}, runtime: 'android' | 'desktop');
|
|
15
16
|
private updatePaddingInfo;
|
|
16
17
|
static createInstance(AgentOsClient: ExecutionRuntime): Promise<OsAgentHandler>;
|
|
17
18
|
getTargetResolution(): {
|
|
@@ -27,7 +28,7 @@ export declare class OsAgentHandler {
|
|
|
27
28
|
scaleCoordinates(source: 'api' | 'computer', x: number, y: number): [number, number];
|
|
28
29
|
requestControl(controlCommand: ControlCommand): Promise<void>;
|
|
29
30
|
mouseMove(x: number, y: number): Promise<void>;
|
|
30
|
-
mouseClick(button:
|
|
31
|
+
mouseClick(button: 'left' | 'right' | 'middle', doubleClick: boolean): Promise<void>;
|
|
31
32
|
mouseScroll(dx: number, dy: number): Promise<void>;
|
|
32
33
|
mouseHoldLeftButtonDown(): Promise<void>;
|
|
33
34
|
mouseReleaseLeftButton(): Promise<void>;
|
|
@@ -38,6 +39,10 @@ export declare class OsAgentHandler {
|
|
|
38
39
|
androidKeyPress(key: ANDROID_KEY): Promise<void>;
|
|
39
40
|
androidKeySequencePress(keys: ANDROID_KEY[]): Promise<void>;
|
|
40
41
|
executeShellCommand(command: string): Promise<void>;
|
|
42
|
+
AndroidSwipeTool(startX: number, startY: number, endX: number, endY: number): Promise<void>;
|
|
43
|
+
AndroidDragAndDropTool(startX: number, startY: number, endX: number, endY: number): Promise<void>;
|
|
44
|
+
AndroidTapTool(x: number, y: number): Promise<void>;
|
|
45
|
+
executeAndroidShellCommand(command: string): Promise<void>;
|
|
41
46
|
}
|
|
42
47
|
export declare class ScreenShotTool extends BaseAgentTool {
|
|
43
48
|
private osAgentHandler;
|
|
@@ -175,3 +180,42 @@ export declare class PrintTool extends BaseAgentTool {
|
|
|
175
180
|
}): Promise<ToolResult>;
|
|
176
181
|
toParams(): BetaTool;
|
|
177
182
|
}
|
|
183
|
+
export declare class AndroidSwipeTool extends BaseAgentTool {
|
|
184
|
+
private osAgentHandler;
|
|
185
|
+
constructor(osAgentHandler: OsAgentHandler);
|
|
186
|
+
execute(command: {
|
|
187
|
+
startX: number;
|
|
188
|
+
startY: number;
|
|
189
|
+
endX: number;
|
|
190
|
+
endY: number;
|
|
191
|
+
}): Promise<ToolResult>;
|
|
192
|
+
toParams(): BetaTool;
|
|
193
|
+
}
|
|
194
|
+
export declare class AndroidDragAndDropTool extends BaseAgentTool {
|
|
195
|
+
private osAgentHandler;
|
|
196
|
+
constructor(osAgentHandler: OsAgentHandler);
|
|
197
|
+
execute(command: {
|
|
198
|
+
startX: number;
|
|
199
|
+
startY: number;
|
|
200
|
+
endX: number;
|
|
201
|
+
endY: number;
|
|
202
|
+
}): Promise<ToolResult>;
|
|
203
|
+
toParams(): BetaTool;
|
|
204
|
+
}
|
|
205
|
+
export declare class AndroidTapTool extends BaseAgentTool {
|
|
206
|
+
private osAgentHandler;
|
|
207
|
+
constructor(osAgentHandler: OsAgentHandler);
|
|
208
|
+
execute(command: {
|
|
209
|
+
x: number;
|
|
210
|
+
y: number;
|
|
211
|
+
}): Promise<ToolResult>;
|
|
212
|
+
toParams(): BetaTool;
|
|
213
|
+
}
|
|
214
|
+
export declare class AndroidShellCommandTool extends BaseAgentTool {
|
|
215
|
+
private osAgentHandler;
|
|
216
|
+
constructor(osAgentHandler: OsAgentHandler);
|
|
217
|
+
execute(command: {
|
|
218
|
+
command: string;
|
|
219
|
+
}): Promise<ToolResult>;
|
|
220
|
+
toParams(): BetaTool;
|
|
221
|
+
}
|
|
@@ -9,15 +9,17 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
9
9
|
});
|
|
10
10
|
};
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
-
exports.PrintTool = exports.WaitTool = exports.ExecuteShellCommandTool = exports.AgentErrorTool = exports.AndroidSequenceKeyPressTool = exports.AndroidSingleKeyPressTool = exports.TypeTool = exports.DesktopKeyReleaseTool = exports.DesktopKeyHoldDownTool = exports.DesktopPressAndReleaseKeysTool = exports.MouseReleaseLeftButtonTool = exports.MouseHoldLeftButtonDownTool = exports.MouseDragAndDropTool = exports.MouseScrollTool = exports.MouseClickTool = exports.MouseMoveTool = exports.ScreenShotTool = exports.OsAgentHandler = void 0;
|
|
12
|
+
exports.AndroidShellCommandTool = exports.AndroidTapTool = exports.AndroidDragAndDropTool = exports.AndroidSwipeTool = exports.PrintTool = exports.WaitTool = exports.ExecuteShellCommandTool = exports.AgentErrorTool = exports.AndroidSequenceKeyPressTool = exports.AndroidSingleKeyPressTool = exports.TypeTool = exports.DesktopKeyReleaseTool = exports.DesktopKeyHoldDownTool = exports.DesktopPressAndReleaseKeysTool = exports.MouseReleaseLeftButtonTool = exports.MouseHoldLeftButtonDownTool = exports.MouseDragAndDropTool = exports.MouseScrollTool = exports.MouseClickTool = exports.MouseMoveTool = exports.ScreenShotTool = exports.OsAgentHandler = void 0;
|
|
13
13
|
const dsl_1 = require("../../../../execution/dsl");
|
|
14
14
|
const base_1 = require("./base");
|
|
15
15
|
const ui_control_commands_1 = require("../../../ui-control-commands");
|
|
16
16
|
const base_64_image_1 = require("../../../../utils/base_64_image/base-64-image");
|
|
17
17
|
const agent_errors_1 = require("./agent-errors");
|
|
18
18
|
class OsAgentHandler {
|
|
19
|
-
constructor(AgentOsClient, screenDimensions) {
|
|
19
|
+
constructor(AgentOsClient, screenDimensions, runtime) {
|
|
20
20
|
this.AgentOsClient = AgentOsClient;
|
|
21
|
+
this.screenDimensions = screenDimensions;
|
|
22
|
+
this.runtime = runtime;
|
|
21
23
|
this.targetResolution = { width: 1280, height: 800 };
|
|
22
24
|
this.paddingInfo = null;
|
|
23
25
|
this.screenDimensions = screenDimensions;
|
|
@@ -48,7 +50,7 @@ class OsAgentHandler {
|
|
|
48
50
|
scaledWidth,
|
|
49
51
|
scaledHeight,
|
|
50
52
|
padLeft,
|
|
51
|
-
padTop
|
|
53
|
+
padTop,
|
|
52
54
|
};
|
|
53
55
|
}
|
|
54
56
|
// Add image support to act, an check for function overload in typescript.
|
|
@@ -56,10 +58,12 @@ class OsAgentHandler {
|
|
|
56
58
|
return __awaiter(this, void 0, void 0, function* () {
|
|
57
59
|
const base64ImageString = yield AgentOsClient.getScreenshot();
|
|
58
60
|
const image_info = yield (yield base_64_image_1.Base64Image.fromString(base64ImageString)).getInfo();
|
|
61
|
+
const startingArguments = yield AgentOsClient.getStartingArguments();
|
|
62
|
+
const runtime = startingArguments['runtime'] === 'android' ? 'android' : 'desktop';
|
|
59
63
|
return new OsAgentHandler(AgentOsClient, {
|
|
60
64
|
width: image_info.width,
|
|
61
65
|
height: image_info.height,
|
|
62
|
-
});
|
|
66
|
+
}, runtime);
|
|
63
67
|
});
|
|
64
68
|
}
|
|
65
69
|
getTargetResolution() {
|
|
@@ -90,7 +94,7 @@ class OsAgentHandler {
|
|
|
90
94
|
if (!this.paddingInfo) {
|
|
91
95
|
throw new base_1.ToolError('Padding information not initialized');
|
|
92
96
|
}
|
|
93
|
-
const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop } = this.paddingInfo;
|
|
97
|
+
const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop, } = this.paddingInfo;
|
|
94
98
|
if (source === 'api') {
|
|
95
99
|
if (x > this.targetResolution.width || y > this.targetResolution.height || x < 0 || y < 0) {
|
|
96
100
|
throw new base_1.ToolError(`Coordinates ${x}, ${y} are outside screen bounds `
|
|
@@ -131,23 +135,21 @@ class OsAgentHandler {
|
|
|
131
135
|
return __awaiter(this, void 0, void 0, function* () {
|
|
132
136
|
let action = ui_control_commands_1.InputEvent.MOUSE_CLICK_LEFT;
|
|
133
137
|
if (doubleClick) {
|
|
134
|
-
if (button ===
|
|
138
|
+
if (button === 'left') {
|
|
135
139
|
action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_LEFT;
|
|
136
140
|
}
|
|
137
|
-
else if (button ===
|
|
141
|
+
else if (button === 'right') {
|
|
138
142
|
action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_RIGHT;
|
|
139
143
|
}
|
|
140
|
-
else if (button ===
|
|
144
|
+
else if (button === 'middle') {
|
|
141
145
|
action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE;
|
|
142
146
|
}
|
|
143
147
|
}
|
|
144
|
-
else {
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
action = ui_control_commands_1.InputEvent.MOUSE_CLICK_MIDDLE;
|
|
150
|
-
}
|
|
148
|
+
else if (button === 'right') {
|
|
149
|
+
action = ui_control_commands_1.InputEvent.MOUSE_CLICK_RIGHT;
|
|
150
|
+
}
|
|
151
|
+
else if (button === 'middle') {
|
|
152
|
+
action = ui_control_commands_1.InputEvent.MOUSE_CLICK_MIDDLE;
|
|
151
153
|
}
|
|
152
154
|
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(action, { x: 0, y: 0 }, '', {})]);
|
|
153
155
|
yield this.requestControl(controlCommand);
|
|
@@ -184,8 +186,8 @@ class OsAgentHandler {
|
|
|
184
186
|
desktopKeyHoldDown(key_1) {
|
|
185
187
|
return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
|
|
186
188
|
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.KEY_PRESS, { x: 0, y: 0 }, '', {
|
|
187
|
-
key
|
|
188
|
-
modifiers
|
|
189
|
+
key,
|
|
190
|
+
modifiers,
|
|
189
191
|
})]);
|
|
190
192
|
yield this.requestControl(controlCommand);
|
|
191
193
|
});
|
|
@@ -193,8 +195,8 @@ class OsAgentHandler {
|
|
|
193
195
|
desktopKeyRelease(key_1) {
|
|
194
196
|
return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
|
|
195
197
|
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.KEY_RELEASE, { x: 0, y: 0 }, '', {
|
|
196
|
-
key
|
|
197
|
-
modifiers
|
|
198
|
+
key,
|
|
199
|
+
modifiers,
|
|
198
200
|
})]);
|
|
199
201
|
yield this.requestControl(controlCommand);
|
|
200
202
|
});
|
|
@@ -223,6 +225,47 @@ class OsAgentHandler {
|
|
|
223
225
|
yield this.requestControl(controlCommand);
|
|
224
226
|
});
|
|
225
227
|
}
|
|
228
|
+
AndroidSwipeTool(startX, startY, endX, endY) {
|
|
229
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
230
|
+
if (this.runtime !== 'android') {
|
|
231
|
+
throw new base_1.ToolError('This tool is only available on Android devices');
|
|
232
|
+
}
|
|
233
|
+
[startX, startY] = this.scaleCoordinates('api', startX, startY);
|
|
234
|
+
[endX, endY] = this.scaleCoordinates('api', endX, endY);
|
|
235
|
+
const adbCommand = `input swipe ${startX} ${startY} ${endX} ${endY}`;
|
|
236
|
+
yield this.executeShellCommand(adbCommand);
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
AndroidDragAndDropTool(startX, startY, endX, endY) {
|
|
240
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
241
|
+
if (this.runtime !== 'android') {
|
|
242
|
+
throw new base_1.ToolError('This tool is only available on Android devices');
|
|
243
|
+
}
|
|
244
|
+
[startX, startY] = this.scaleCoordinates('api', startX, startY);
|
|
245
|
+
[endX, endY] = this.scaleCoordinates('api', endX, endY);
|
|
246
|
+
const adbCommand = `input draganddrop ${startX} ${startY} ${endX} ${endY}`;
|
|
247
|
+
yield this.executeShellCommand(adbCommand);
|
|
248
|
+
});
|
|
249
|
+
}
|
|
250
|
+
AndroidTapTool(x, y) {
|
|
251
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
252
|
+
if (this.runtime !== 'android') {
|
|
253
|
+
throw new base_1.ToolError('This tool is only available on Android devices');
|
|
254
|
+
}
|
|
255
|
+
[x, y] = this.scaleCoordinates('api', x, y);
|
|
256
|
+
const adbCommand = `input tap ${x} ${y}`;
|
|
257
|
+
yield this.executeShellCommand(adbCommand);
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
executeAndroidShellCommand(command) {
|
|
261
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
262
|
+
if (this.runtime !== 'android') {
|
|
263
|
+
throw new base_1.ToolError('This tool is only available on Android devices');
|
|
264
|
+
}
|
|
265
|
+
command = command.replace(/^adb shell /, '');
|
|
266
|
+
yield this.executeShellCommand(command);
|
|
267
|
+
});
|
|
268
|
+
}
|
|
226
269
|
}
|
|
227
270
|
exports.OsAgentHandler = OsAgentHandler;
|
|
228
271
|
class ScreenShotTool extends base_1.BaseAgentTool {
|
|
@@ -605,7 +648,8 @@ class AndroidSingleKeyPressTool extends base_1.BaseAgentTool {
|
|
|
605
648
|
}
|
|
606
649
|
execute(command) {
|
|
607
650
|
return __awaiter(this, void 0, void 0, function* () {
|
|
608
|
-
|
|
651
|
+
const adbCommand = `input keyevent ${command.key.toUpperCase()}`;
|
|
652
|
+
yield this.osAgentHandler.executeShellCommand(adbCommand);
|
|
609
653
|
return {
|
|
610
654
|
output: `Pressed Android key ${command.key}`,
|
|
611
655
|
};
|
|
@@ -637,7 +681,8 @@ class AndroidSequenceKeyPressTool extends base_1.BaseAgentTool {
|
|
|
637
681
|
}
|
|
638
682
|
execute(command) {
|
|
639
683
|
return __awaiter(this, void 0, void 0, function* () {
|
|
640
|
-
|
|
684
|
+
const adbCommand = `input keyevent ${command.keys.map((key) => key.toUpperCase()).join(' ')}`;
|
|
685
|
+
yield this.osAgentHandler.executeShellCommand(adbCommand);
|
|
641
686
|
return {
|
|
642
687
|
output: `Pressed Android keys: ${command.keys.join(', ')}`,
|
|
643
688
|
};
|
|
@@ -708,7 +753,7 @@ class ExecuteShellCommandTool extends base_1.BaseAgentTool {
|
|
|
708
753
|
toParams() {
|
|
709
754
|
return {
|
|
710
755
|
name: 'execute_shell_command_tool',
|
|
711
|
-
description: 'Executes a shell command',
|
|
756
|
+
description: 'Executes a shell command. It does not return the output of the command.',
|
|
712
757
|
input_schema: {
|
|
713
758
|
type: 'object',
|
|
714
759
|
properties: {
|
|
@@ -729,7 +774,7 @@ class WaitTool extends base_1.BaseAgentTool {
|
|
|
729
774
|
}
|
|
730
775
|
execute(command) {
|
|
731
776
|
return __awaiter(this, void 0, void 0, function* () {
|
|
732
|
-
yield new Promise(resolve => setTimeout(resolve, command.milliseconds));
|
|
777
|
+
yield new Promise((resolve) => setTimeout(resolve, command.milliseconds));
|
|
733
778
|
return {
|
|
734
779
|
output: `Waited for ${command.milliseconds} milliseconds`,
|
|
735
780
|
};
|
|
@@ -783,3 +828,155 @@ class PrintTool extends base_1.BaseAgentTool {
|
|
|
783
828
|
}
|
|
784
829
|
}
|
|
785
830
|
exports.PrintTool = PrintTool;
|
|
831
|
+
class AndroidSwipeTool extends base_1.BaseAgentTool {
|
|
832
|
+
constructor(osAgentHandler) {
|
|
833
|
+
super();
|
|
834
|
+
this.osAgentHandler = osAgentHandler;
|
|
835
|
+
}
|
|
836
|
+
execute(command) {
|
|
837
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
838
|
+
yield this.osAgentHandler.AndroidSwipeTool(command.startX, command.startY, command.endX, command.endY);
|
|
839
|
+
return {
|
|
840
|
+
output: `Swiped from ${command.startX}, ${command.startY} to ${command.endX}, ${command.endY} on the screen`,
|
|
841
|
+
};
|
|
842
|
+
});
|
|
843
|
+
}
|
|
844
|
+
toParams() {
|
|
845
|
+
return {
|
|
846
|
+
name: 'android_swipe_tool',
|
|
847
|
+
description: 'Swipes from a starting point to an ending point on the screen',
|
|
848
|
+
input_schema: {
|
|
849
|
+
type: 'object',
|
|
850
|
+
properties: {
|
|
851
|
+
startX: {
|
|
852
|
+
type: 'number',
|
|
853
|
+
description: 'The x (pixels from the left edge) coordinate of the start position',
|
|
854
|
+
},
|
|
855
|
+
startY: {
|
|
856
|
+
type: 'number',
|
|
857
|
+
description: 'The y (pixels from the top edge) coordinate of the start position',
|
|
858
|
+
},
|
|
859
|
+
endX: {
|
|
860
|
+
type: 'number',
|
|
861
|
+
description: 'The x (pixels from the left edge) coordinate of the end position',
|
|
862
|
+
},
|
|
863
|
+
endY: {
|
|
864
|
+
type: 'number',
|
|
865
|
+
description: 'The y (pixels from the top edge) coordinate of the end position',
|
|
866
|
+
},
|
|
867
|
+
},
|
|
868
|
+
required: ['startX', 'startY', 'endX', 'endY'],
|
|
869
|
+
},
|
|
870
|
+
};
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
exports.AndroidSwipeTool = AndroidSwipeTool;
|
|
874
|
+
class AndroidDragAndDropTool extends base_1.BaseAgentTool {
|
|
875
|
+
constructor(osAgentHandler) {
|
|
876
|
+
super();
|
|
877
|
+
this.osAgentHandler = osAgentHandler;
|
|
878
|
+
}
|
|
879
|
+
execute(command) {
|
|
880
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
881
|
+
yield this.osAgentHandler.AndroidDragAndDropTool(command.startX, command.startY, command.endX, command.endY);
|
|
882
|
+
return {
|
|
883
|
+
output: `Dragged and dropped from ${command.startX}, ${command.startY} to ${command.endX}, ${command.endY} on the screen`,
|
|
884
|
+
};
|
|
885
|
+
});
|
|
886
|
+
}
|
|
887
|
+
toParams() {
|
|
888
|
+
return {
|
|
889
|
+
name: 'android_drag_and_drop_tool',
|
|
890
|
+
description: 'Drags and drops from a starting point to an ending point on the screen',
|
|
891
|
+
input_schema: {
|
|
892
|
+
type: 'object',
|
|
893
|
+
properties: {
|
|
894
|
+
startX: {
|
|
895
|
+
type: 'number',
|
|
896
|
+
description: 'The x (pixels from the left edge) coordinate of the start position',
|
|
897
|
+
},
|
|
898
|
+
startY: {
|
|
899
|
+
type: 'number',
|
|
900
|
+
description: 'The y (pixels from the top edge) coordinate of the start position',
|
|
901
|
+
},
|
|
902
|
+
endX: {
|
|
903
|
+
type: 'number',
|
|
904
|
+
description: 'The x (pixels from the left edge) coordinate of the end position',
|
|
905
|
+
},
|
|
906
|
+
endY: {
|
|
907
|
+
type: 'number',
|
|
908
|
+
description: 'The y (pixels from the top edge) coordinate of the end position',
|
|
909
|
+
},
|
|
910
|
+
},
|
|
911
|
+
required: ['startX', 'startY', 'endX', 'endY'],
|
|
912
|
+
},
|
|
913
|
+
};
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
exports.AndroidDragAndDropTool = AndroidDragAndDropTool;
|
|
917
|
+
class AndroidTapTool extends base_1.BaseAgentTool {
|
|
918
|
+
constructor(osAgentHandler) {
|
|
919
|
+
super();
|
|
920
|
+
this.osAgentHandler = osAgentHandler;
|
|
921
|
+
}
|
|
922
|
+
execute(command) {
|
|
923
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
924
|
+
yield this.osAgentHandler.AndroidTapTool(command.x, command.y);
|
|
925
|
+
return {
|
|
926
|
+
output: `Tapped the screen at ${command.x}, ${command.y}`,
|
|
927
|
+
};
|
|
928
|
+
});
|
|
929
|
+
}
|
|
930
|
+
toParams() {
|
|
931
|
+
return {
|
|
932
|
+
name: 'android_tap_tool',
|
|
933
|
+
description: 'Taps the screen at the specified coordinates',
|
|
934
|
+
input_schema: {
|
|
935
|
+
type: 'object',
|
|
936
|
+
properties: {
|
|
937
|
+
x: {
|
|
938
|
+
type: 'number',
|
|
939
|
+
description: 'The x (pixels from the left edge) coordinate of the tap position',
|
|
940
|
+
},
|
|
941
|
+
y: {
|
|
942
|
+
type: 'number',
|
|
943
|
+
description: 'The y (pixels from the top edge) coordinate of the tap position',
|
|
944
|
+
},
|
|
945
|
+
},
|
|
946
|
+
required: ['x', 'y'],
|
|
947
|
+
},
|
|
948
|
+
};
|
|
949
|
+
}
|
|
950
|
+
}
|
|
951
|
+
exports.AndroidTapTool = AndroidTapTool;
|
|
952
|
+
class AndroidShellCommandTool extends base_1.BaseAgentTool {
|
|
953
|
+
constructor(osAgentHandler) {
|
|
954
|
+
super();
|
|
955
|
+
this.osAgentHandler = osAgentHandler;
|
|
956
|
+
}
|
|
957
|
+
execute(command) {
|
|
958
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
959
|
+
yield this.osAgentHandler.executeAndroidShellCommand(command.command);
|
|
960
|
+
return {
|
|
961
|
+
output: `Executed shell command: ${command.command}`,
|
|
962
|
+
};
|
|
963
|
+
});
|
|
964
|
+
}
|
|
965
|
+
toParams() {
|
|
966
|
+
return {
|
|
967
|
+
name: 'android_shell_command_tool',
|
|
968
|
+
description: 'Executes a shell command on the Android device. It does not return the output of the command.',
|
|
969
|
+
input_schema: {
|
|
970
|
+
type: 'object',
|
|
971
|
+
properties: {
|
|
972
|
+
command: {
|
|
973
|
+
type: 'string',
|
|
974
|
+
description: 'The shell command to execute without the "adb shell" prefix',
|
|
975
|
+
},
|
|
976
|
+
},
|
|
977
|
+
required: ['command'],
|
|
978
|
+
},
|
|
979
|
+
};
|
|
980
|
+
}
|
|
981
|
+
}
|
|
982
|
+
exports.AndroidShellCommandTool = AndroidShellCommandTool;
|
|
@@ -563,7 +563,6 @@ export declare class UiControlClient extends ApiCommands {
|
|
|
563
563
|
* #### Cross-Platform Coordination
|
|
564
564
|
* ```typescript
|
|
565
565
|
* // Share context between desktop and mobile agents
|
|
566
|
-
* await auiAndroid.agent.configureAsAndroidAgent();
|
|
567
566
|
*
|
|
568
567
|
* const history = await auiDesktop.act("Copy username from desktop app");
|
|
569
568
|
* await auiAndroid.act("Paste username into mobile login", {
|
|
@@ -46,7 +46,7 @@ class UiControlClient extends dsl_1.ApiCommands {
|
|
|
46
46
|
return __awaiter(this, void 0, void 0, function* () {
|
|
47
47
|
const connectionState = yield this.executionRuntime.connect();
|
|
48
48
|
yield this.agent.initializeOsAgentHandler();
|
|
49
|
-
yield this.agent.
|
|
49
|
+
yield this.agent.configureAgent();
|
|
50
50
|
return connectionState;
|
|
51
51
|
});
|
|
52
52
|
}
|
|
@@ -4,12 +4,12 @@ import { ExecutionRuntime } from '../../../execution/execution-runtime';
|
|
|
4
4
|
export declare class AskUIAgent extends ClaudeAgent {
|
|
5
5
|
private osAgentHandler;
|
|
6
6
|
private executionRuntime;
|
|
7
|
+
private runtime;
|
|
7
8
|
constructor(executionRuntime: ExecutionRuntime);
|
|
8
9
|
isConnected(): boolean;
|
|
9
10
|
initializeOsAgentHandler(): Promise<void>;
|
|
10
11
|
getOsAgentHandler(): OsAgentHandler;
|
|
11
|
-
|
|
12
|
-
configureAsAndroidAgent(): Promise<void>;
|
|
12
|
+
configureAgent(): Promise<void>;
|
|
13
13
|
private static DesktopSystemPrompt;
|
|
14
14
|
private static AndroidSystemPrompt;
|
|
15
15
|
}
|
|
@@ -7,12 +7,13 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
7
7
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
8
|
});
|
|
9
9
|
};
|
|
10
|
-
import { DesktopPressAndReleaseKeysTool, MouseClickTool, MouseMoveTool, MouseScrollTool, OsAgentHandler, ScreenShotTool, TypeTool, AgentErrorTool, AndroidSequenceKeyPressTool, AndroidSingleKeyPressTool, ExecuteShellCommandTool, DesktopKeyHoldDownTool, DesktopKeyReleaseTool, MouseReleaseLeftButtonTool, MouseHoldLeftButtonDownTool, MouseDragAndDropTool, WaitTool, PrintTool, } from './tools/os-agent-tools';
|
|
10
|
+
import { DesktopPressAndReleaseKeysTool, MouseClickTool, MouseMoveTool, MouseScrollTool, OsAgentHandler, ScreenShotTool, TypeTool, AgentErrorTool, AndroidSequenceKeyPressTool, AndroidSingleKeyPressTool, ExecuteShellCommandTool, DesktopKeyHoldDownTool, DesktopKeyReleaseTool, MouseReleaseLeftButtonTool, MouseHoldLeftButtonDownTool, MouseDragAndDropTool, WaitTool, PrintTool, AndroidSwipeTool, AndroidDragAndDropTool, AndroidTapTool, AndroidShellCommandTool, } from './tools/os-agent-tools';
|
|
11
11
|
import { ClaudeAgent } from './claude-agent';
|
|
12
12
|
export class AskUIAgent extends ClaudeAgent {
|
|
13
13
|
constructor(executionRuntime) {
|
|
14
14
|
super((params) => executionRuntime.predictActResponse(params));
|
|
15
15
|
this.osAgentHandler = undefined;
|
|
16
|
+
this.runtime = 'desktop';
|
|
16
17
|
this.executionRuntime = executionRuntime;
|
|
17
18
|
}
|
|
18
19
|
isConnected() {
|
|
@@ -21,6 +22,7 @@ export class AskUIAgent extends ClaudeAgent {
|
|
|
21
22
|
initializeOsAgentHandler() {
|
|
22
23
|
return __awaiter(this, void 0, void 0, function* () {
|
|
23
24
|
this.osAgentHandler = yield OsAgentHandler.createInstance(this.executionRuntime);
|
|
25
|
+
this.runtime = this.osAgentHandler.runtime;
|
|
24
26
|
});
|
|
25
27
|
}
|
|
26
28
|
getOsAgentHandler() {
|
|
@@ -29,50 +31,48 @@ export class AskUIAgent extends ClaudeAgent {
|
|
|
29
31
|
}
|
|
30
32
|
return this.osAgentHandler;
|
|
31
33
|
}
|
|
32
|
-
|
|
34
|
+
configureAgent() {
|
|
33
35
|
return __awaiter(this, void 0, void 0, function* () {
|
|
34
36
|
if (!this.osAgentHandler) {
|
|
35
37
|
throw new Error('Agent OS client is not connected');
|
|
36
38
|
}
|
|
37
|
-
|
|
39
|
+
let systemPrompt = AskUIAgent.DesktopSystemPrompt;
|
|
40
|
+
let tools = [
|
|
38
41
|
new AgentErrorTool(),
|
|
39
42
|
new PrintTool(),
|
|
40
|
-
new ScreenShotTool(this.osAgentHandler),
|
|
41
|
-
new MouseMoveTool(this.osAgentHandler),
|
|
42
|
-
new MouseClickTool(this.osAgentHandler),
|
|
43
|
-
new MouseScrollTool(this.osAgentHandler),
|
|
44
|
-
new TypeTool(this.osAgentHandler),
|
|
45
|
-
new DesktopPressAndReleaseKeysTool(this.osAgentHandler),
|
|
46
|
-
new DesktopKeyHoldDownTool(this.osAgentHandler),
|
|
47
|
-
new DesktopKeyReleaseTool(this.osAgentHandler),
|
|
48
|
-
new MouseHoldLeftButtonDownTool(this.osAgentHandler),
|
|
49
|
-
new MouseReleaseLeftButtonTool(this.osAgentHandler),
|
|
50
|
-
new MouseDragAndDropTool(this.osAgentHandler),
|
|
51
43
|
new WaitTool(),
|
|
52
|
-
];
|
|
53
|
-
this.setTools(tools);
|
|
54
|
-
this.setSystemPrompt(AskUIAgent.DesktopSystemPrompt);
|
|
55
|
-
});
|
|
56
|
-
}
|
|
57
|
-
configureAsAndroidAgent() {
|
|
58
|
-
return __awaiter(this, void 0, void 0, function* () {
|
|
59
|
-
if (!this.osAgentHandler) {
|
|
60
|
-
throw new Error('Agent OS client is not connected');
|
|
61
|
-
}
|
|
62
|
-
const tools = [
|
|
63
|
-
new AgentErrorTool(),
|
|
64
44
|
new ScreenShotTool(this.osAgentHandler),
|
|
65
|
-
new MouseMoveTool(this.osAgentHandler),
|
|
66
|
-
new MouseClickTool(this.osAgentHandler),
|
|
67
|
-
new MouseScrollTool(this.osAgentHandler),
|
|
68
|
-
new AndroidSingleKeyPressTool(this.osAgentHandler),
|
|
69
|
-
new AndroidSequenceKeyPressTool(this.osAgentHandler),
|
|
70
45
|
new TypeTool(this.osAgentHandler),
|
|
71
|
-
new ExecuteShellCommandTool(this.osAgentHandler),
|
|
72
|
-
new WaitTool(),
|
|
73
46
|
];
|
|
47
|
+
if (this.runtime === 'desktop') {
|
|
48
|
+
tools = [
|
|
49
|
+
...tools,
|
|
50
|
+
new MouseMoveTool(this.osAgentHandler),
|
|
51
|
+
new MouseClickTool(this.osAgentHandler),
|
|
52
|
+
new MouseScrollTool(this.osAgentHandler),
|
|
53
|
+
new DesktopPressAndReleaseKeysTool(this.osAgentHandler),
|
|
54
|
+
new DesktopKeyHoldDownTool(this.osAgentHandler),
|
|
55
|
+
new DesktopKeyReleaseTool(this.osAgentHandler),
|
|
56
|
+
new MouseHoldLeftButtonDownTool(this.osAgentHandler),
|
|
57
|
+
new MouseReleaseLeftButtonTool(this.osAgentHandler),
|
|
58
|
+
new MouseDragAndDropTool(this.osAgentHandler),
|
|
59
|
+
new ExecuteShellCommandTool(this.osAgentHandler),
|
|
60
|
+
];
|
|
61
|
+
}
|
|
62
|
+
if (this.runtime === 'android') {
|
|
63
|
+
tools = [
|
|
64
|
+
...tools,
|
|
65
|
+
new AndroidSingleKeyPressTool(this.osAgentHandler),
|
|
66
|
+
new AndroidSequenceKeyPressTool(this.osAgentHandler),
|
|
67
|
+
new AndroidSwipeTool(this.osAgentHandler),
|
|
68
|
+
new AndroidDragAndDropTool(this.osAgentHandler),
|
|
69
|
+
new AndroidTapTool(this.osAgentHandler),
|
|
70
|
+
new AndroidShellCommandTool(this.osAgentHandler),
|
|
71
|
+
];
|
|
72
|
+
systemPrompt = AskUIAgent.AndroidSystemPrompt;
|
|
73
|
+
}
|
|
74
74
|
this.setTools(tools);
|
|
75
|
-
this.setSystemPrompt(
|
|
75
|
+
this.setSystemPrompt(systemPrompt);
|
|
76
76
|
});
|
|
77
77
|
}
|
|
78
78
|
}
|
|
@@ -5,13 +5,14 @@ import { ExecutionRuntime } from '../../../../execution/execution-runtime';
|
|
|
5
5
|
import { ControlCommand } from '../../../ui-control-commands';
|
|
6
6
|
export declare class OsAgentHandler {
|
|
7
7
|
private AgentOsClient;
|
|
8
|
-
private targetResolution;
|
|
9
8
|
private screenDimensions;
|
|
9
|
+
runtime: 'android' | 'desktop';
|
|
10
|
+
private targetResolution;
|
|
10
11
|
private paddingInfo;
|
|
11
12
|
constructor(AgentOsClient: ExecutionRuntime, screenDimensions: {
|
|
12
13
|
width: number;
|
|
13
14
|
height: number;
|
|
14
|
-
});
|
|
15
|
+
}, runtime: 'android' | 'desktop');
|
|
15
16
|
private updatePaddingInfo;
|
|
16
17
|
static createInstance(AgentOsClient: ExecutionRuntime): Promise<OsAgentHandler>;
|
|
17
18
|
getTargetResolution(): {
|
|
@@ -27,7 +28,7 @@ export declare class OsAgentHandler {
|
|
|
27
28
|
scaleCoordinates(source: 'api' | 'computer', x: number, y: number): [number, number];
|
|
28
29
|
requestControl(controlCommand: ControlCommand): Promise<void>;
|
|
29
30
|
mouseMove(x: number, y: number): Promise<void>;
|
|
30
|
-
mouseClick(button:
|
|
31
|
+
mouseClick(button: 'left' | 'right' | 'middle', doubleClick: boolean): Promise<void>;
|
|
31
32
|
mouseScroll(dx: number, dy: number): Promise<void>;
|
|
32
33
|
mouseHoldLeftButtonDown(): Promise<void>;
|
|
33
34
|
mouseReleaseLeftButton(): Promise<void>;
|
|
@@ -38,6 +39,10 @@ export declare class OsAgentHandler {
|
|
|
38
39
|
androidKeyPress(key: ANDROID_KEY): Promise<void>;
|
|
39
40
|
androidKeySequencePress(keys: ANDROID_KEY[]): Promise<void>;
|
|
40
41
|
executeShellCommand(command: string): Promise<void>;
|
|
42
|
+
AndroidSwipeTool(startX: number, startY: number, endX: number, endY: number): Promise<void>;
|
|
43
|
+
AndroidDragAndDropTool(startX: number, startY: number, endX: number, endY: number): Promise<void>;
|
|
44
|
+
AndroidTapTool(x: number, y: number): Promise<void>;
|
|
45
|
+
executeAndroidShellCommand(command: string): Promise<void>;
|
|
41
46
|
}
|
|
42
47
|
export declare class ScreenShotTool extends BaseAgentTool {
|
|
43
48
|
private osAgentHandler;
|
|
@@ -175,3 +180,42 @@ export declare class PrintTool extends BaseAgentTool {
|
|
|
175
180
|
}): Promise<ToolResult>;
|
|
176
181
|
toParams(): BetaTool;
|
|
177
182
|
}
|
|
183
|
+
export declare class AndroidSwipeTool extends BaseAgentTool {
|
|
184
|
+
private osAgentHandler;
|
|
185
|
+
constructor(osAgentHandler: OsAgentHandler);
|
|
186
|
+
execute(command: {
|
|
187
|
+
startX: number;
|
|
188
|
+
startY: number;
|
|
189
|
+
endX: number;
|
|
190
|
+
endY: number;
|
|
191
|
+
}): Promise<ToolResult>;
|
|
192
|
+
toParams(): BetaTool;
|
|
193
|
+
}
|
|
194
|
+
export declare class AndroidDragAndDropTool extends BaseAgentTool {
|
|
195
|
+
private osAgentHandler;
|
|
196
|
+
constructor(osAgentHandler: OsAgentHandler);
|
|
197
|
+
execute(command: {
|
|
198
|
+
startX: number;
|
|
199
|
+
startY: number;
|
|
200
|
+
endX: number;
|
|
201
|
+
endY: number;
|
|
202
|
+
}): Promise<ToolResult>;
|
|
203
|
+
toParams(): BetaTool;
|
|
204
|
+
}
|
|
205
|
+
export declare class AndroidTapTool extends BaseAgentTool {
|
|
206
|
+
private osAgentHandler;
|
|
207
|
+
constructor(osAgentHandler: OsAgentHandler);
|
|
208
|
+
execute(command: {
|
|
209
|
+
x: number;
|
|
210
|
+
y: number;
|
|
211
|
+
}): Promise<ToolResult>;
|
|
212
|
+
toParams(): BetaTool;
|
|
213
|
+
}
|
|
214
|
+
export declare class AndroidShellCommandTool extends BaseAgentTool {
|
|
215
|
+
private osAgentHandler;
|
|
216
|
+
constructor(osAgentHandler: OsAgentHandler);
|
|
217
|
+
execute(command: {
|
|
218
|
+
command: string;
|
|
219
|
+
}): Promise<ToolResult>;
|
|
220
|
+
toParams(): BetaTool;
|
|
221
|
+
}
|
|
@@ -13,8 +13,10 @@ import { ControlCommand, ControlCommandCode, InputEvent, Action, } from '../../.
|
|
|
13
13
|
import { Base64Image } from '../../../../utils/base_64_image/base-64-image';
|
|
14
14
|
import { AgentError } from './agent-errors';
|
|
15
15
|
export class OsAgentHandler {
|
|
16
|
-
constructor(AgentOsClient, screenDimensions) {
|
|
16
|
+
constructor(AgentOsClient, screenDimensions, runtime) {
|
|
17
17
|
this.AgentOsClient = AgentOsClient;
|
|
18
|
+
this.screenDimensions = screenDimensions;
|
|
19
|
+
this.runtime = runtime;
|
|
18
20
|
this.targetResolution = { width: 1280, height: 800 };
|
|
19
21
|
this.paddingInfo = null;
|
|
20
22
|
this.screenDimensions = screenDimensions;
|
|
@@ -45,7 +47,7 @@ export class OsAgentHandler {
|
|
|
45
47
|
scaledWidth,
|
|
46
48
|
scaledHeight,
|
|
47
49
|
padLeft,
|
|
48
|
-
padTop
|
|
50
|
+
padTop,
|
|
49
51
|
};
|
|
50
52
|
}
|
|
51
53
|
// Add image support to act, an check for function overload in typescript.
|
|
@@ -53,10 +55,12 @@ export class OsAgentHandler {
|
|
|
53
55
|
return __awaiter(this, void 0, void 0, function* () {
|
|
54
56
|
const base64ImageString = yield AgentOsClient.getScreenshot();
|
|
55
57
|
const image_info = yield (yield Base64Image.fromString(base64ImageString)).getInfo();
|
|
58
|
+
const startingArguments = yield AgentOsClient.getStartingArguments();
|
|
59
|
+
const runtime = startingArguments['runtime'] === 'android' ? 'android' : 'desktop';
|
|
56
60
|
return new OsAgentHandler(AgentOsClient, {
|
|
57
61
|
width: image_info.width,
|
|
58
62
|
height: image_info.height,
|
|
59
|
-
});
|
|
63
|
+
}, runtime);
|
|
60
64
|
});
|
|
61
65
|
}
|
|
62
66
|
getTargetResolution() {
|
|
@@ -87,7 +91,7 @@ export class OsAgentHandler {
|
|
|
87
91
|
if (!this.paddingInfo) {
|
|
88
92
|
throw new ToolError('Padding information not initialized');
|
|
89
93
|
}
|
|
90
|
-
const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop } = this.paddingInfo;
|
|
94
|
+
const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop, } = this.paddingInfo;
|
|
91
95
|
if (source === 'api') {
|
|
92
96
|
if (x > this.targetResolution.width || y > this.targetResolution.height || x < 0 || y < 0) {
|
|
93
97
|
throw new ToolError(`Coordinates ${x}, ${y} are outside screen bounds `
|
|
@@ -128,23 +132,21 @@ export class OsAgentHandler {
|
|
|
128
132
|
return __awaiter(this, void 0, void 0, function* () {
|
|
129
133
|
let action = InputEvent.MOUSE_CLICK_LEFT;
|
|
130
134
|
if (doubleClick) {
|
|
131
|
-
if (button ===
|
|
135
|
+
if (button === 'left') {
|
|
132
136
|
action = InputEvent.MOUSE_CLICK_DOUBLE_LEFT;
|
|
133
137
|
}
|
|
134
|
-
else if (button ===
|
|
138
|
+
else if (button === 'right') {
|
|
135
139
|
action = InputEvent.MOUSE_CLICK_DOUBLE_RIGHT;
|
|
136
140
|
}
|
|
137
|
-
else if (button ===
|
|
141
|
+
else if (button === 'middle') {
|
|
138
142
|
action = InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE;
|
|
139
143
|
}
|
|
140
144
|
}
|
|
141
|
-
else {
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
action = InputEvent.MOUSE_CLICK_MIDDLE;
|
|
147
|
-
}
|
|
145
|
+
else if (button === 'right') {
|
|
146
|
+
action = InputEvent.MOUSE_CLICK_RIGHT;
|
|
147
|
+
}
|
|
148
|
+
else if (button === 'middle') {
|
|
149
|
+
action = InputEvent.MOUSE_CLICK_MIDDLE;
|
|
148
150
|
}
|
|
149
151
|
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(action, { x: 0, y: 0 }, '', {})]);
|
|
150
152
|
yield this.requestControl(controlCommand);
|
|
@@ -181,8 +183,8 @@ export class OsAgentHandler {
|
|
|
181
183
|
desktopKeyHoldDown(key_1) {
|
|
182
184
|
return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
|
|
183
185
|
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.KEY_PRESS, { x: 0, y: 0 }, '', {
|
|
184
|
-
key
|
|
185
|
-
modifiers
|
|
186
|
+
key,
|
|
187
|
+
modifiers,
|
|
186
188
|
})]);
|
|
187
189
|
yield this.requestControl(controlCommand);
|
|
188
190
|
});
|
|
@@ -190,8 +192,8 @@ export class OsAgentHandler {
|
|
|
190
192
|
desktopKeyRelease(key_1) {
|
|
191
193
|
return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
|
|
192
194
|
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.KEY_RELEASE, { x: 0, y: 0 }, '', {
|
|
193
|
-
key
|
|
194
|
-
modifiers
|
|
195
|
+
key,
|
|
196
|
+
modifiers,
|
|
195
197
|
})]);
|
|
196
198
|
yield this.requestControl(controlCommand);
|
|
197
199
|
});
|
|
@@ -220,6 +222,47 @@ export class OsAgentHandler {
|
|
|
220
222
|
yield this.requestControl(controlCommand);
|
|
221
223
|
});
|
|
222
224
|
}
|
|
225
|
+
AndroidSwipeTool(startX, startY, endX, endY) {
|
|
226
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
227
|
+
if (this.runtime !== 'android') {
|
|
228
|
+
throw new ToolError('This tool is only available on Android devices');
|
|
229
|
+
}
|
|
230
|
+
[startX, startY] = this.scaleCoordinates('api', startX, startY);
|
|
231
|
+
[endX, endY] = this.scaleCoordinates('api', endX, endY);
|
|
232
|
+
const adbCommand = `input swipe ${startX} ${startY} ${endX} ${endY}`;
|
|
233
|
+
yield this.executeShellCommand(adbCommand);
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
AndroidDragAndDropTool(startX, startY, endX, endY) {
|
|
237
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
238
|
+
if (this.runtime !== 'android') {
|
|
239
|
+
throw new ToolError('This tool is only available on Android devices');
|
|
240
|
+
}
|
|
241
|
+
[startX, startY] = this.scaleCoordinates('api', startX, startY);
|
|
242
|
+
[endX, endY] = this.scaleCoordinates('api', endX, endY);
|
|
243
|
+
const adbCommand = `input draganddrop ${startX} ${startY} ${endX} ${endY}`;
|
|
244
|
+
yield this.executeShellCommand(adbCommand);
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
AndroidTapTool(x, y) {
|
|
248
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
249
|
+
if (this.runtime !== 'android') {
|
|
250
|
+
throw new ToolError('This tool is only available on Android devices');
|
|
251
|
+
}
|
|
252
|
+
[x, y] = this.scaleCoordinates('api', x, y);
|
|
253
|
+
const adbCommand = `input tap ${x} ${y}`;
|
|
254
|
+
yield this.executeShellCommand(adbCommand);
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
executeAndroidShellCommand(command) {
|
|
258
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
259
|
+
if (this.runtime !== 'android') {
|
|
260
|
+
throw new ToolError('This tool is only available on Android devices');
|
|
261
|
+
}
|
|
262
|
+
command = command.replace(/^adb shell /, '');
|
|
263
|
+
yield this.executeShellCommand(command);
|
|
264
|
+
});
|
|
265
|
+
}
|
|
223
266
|
}
|
|
224
267
|
export class ScreenShotTool extends BaseAgentTool {
|
|
225
268
|
constructor(osAgentHandler) {
|
|
@@ -590,7 +633,8 @@ export class AndroidSingleKeyPressTool extends BaseAgentTool {
|
|
|
590
633
|
}
|
|
591
634
|
execute(command) {
|
|
592
635
|
return __awaiter(this, void 0, void 0, function* () {
|
|
593
|
-
|
|
636
|
+
const adbCommand = `input keyevent ${command.key.toUpperCase()}`;
|
|
637
|
+
yield this.osAgentHandler.executeShellCommand(adbCommand);
|
|
594
638
|
return {
|
|
595
639
|
output: `Pressed Android key ${command.key}`,
|
|
596
640
|
};
|
|
@@ -621,7 +665,8 @@ export class AndroidSequenceKeyPressTool extends BaseAgentTool {
|
|
|
621
665
|
}
|
|
622
666
|
execute(command) {
|
|
623
667
|
return __awaiter(this, void 0, void 0, function* () {
|
|
624
|
-
|
|
668
|
+
const adbCommand = `input keyevent ${command.keys.map((key) => key.toUpperCase()).join(' ')}`;
|
|
669
|
+
yield this.osAgentHandler.executeShellCommand(adbCommand);
|
|
625
670
|
return {
|
|
626
671
|
output: `Pressed Android keys: ${command.keys.join(', ')}`,
|
|
627
672
|
};
|
|
@@ -690,7 +735,7 @@ export class ExecuteShellCommandTool extends BaseAgentTool {
|
|
|
690
735
|
toParams() {
|
|
691
736
|
return {
|
|
692
737
|
name: 'execute_shell_command_tool',
|
|
693
|
-
description: 'Executes a shell command',
|
|
738
|
+
description: 'Executes a shell command. It does not return the output of the command.',
|
|
694
739
|
input_schema: {
|
|
695
740
|
type: 'object',
|
|
696
741
|
properties: {
|
|
@@ -710,7 +755,7 @@ export class WaitTool extends BaseAgentTool {
|
|
|
710
755
|
}
|
|
711
756
|
execute(command) {
|
|
712
757
|
return __awaiter(this, void 0, void 0, function* () {
|
|
713
|
-
yield new Promise(resolve => setTimeout(resolve, command.milliseconds));
|
|
758
|
+
yield new Promise((resolve) => setTimeout(resolve, command.milliseconds));
|
|
714
759
|
return {
|
|
715
760
|
output: `Waited for ${command.milliseconds} milliseconds`,
|
|
716
761
|
};
|
|
@@ -762,3 +807,151 @@ export class PrintTool extends BaseAgentTool {
|
|
|
762
807
|
};
|
|
763
808
|
}
|
|
764
809
|
}
|
|
810
|
+
export class AndroidSwipeTool extends BaseAgentTool {
|
|
811
|
+
constructor(osAgentHandler) {
|
|
812
|
+
super();
|
|
813
|
+
this.osAgentHandler = osAgentHandler;
|
|
814
|
+
}
|
|
815
|
+
execute(command) {
|
|
816
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
817
|
+
yield this.osAgentHandler.AndroidSwipeTool(command.startX, command.startY, command.endX, command.endY);
|
|
818
|
+
return {
|
|
819
|
+
output: `Swiped from ${command.startX}, ${command.startY} to ${command.endX}, ${command.endY} on the screen`,
|
|
820
|
+
};
|
|
821
|
+
});
|
|
822
|
+
}
|
|
823
|
+
toParams() {
|
|
824
|
+
return {
|
|
825
|
+
name: 'android_swipe_tool',
|
|
826
|
+
description: 'Swipes from a starting point to an ending point on the screen',
|
|
827
|
+
input_schema: {
|
|
828
|
+
type: 'object',
|
|
829
|
+
properties: {
|
|
830
|
+
startX: {
|
|
831
|
+
type: 'number',
|
|
832
|
+
description: 'The x (pixels from the left edge) coordinate of the start position',
|
|
833
|
+
},
|
|
834
|
+
startY: {
|
|
835
|
+
type: 'number',
|
|
836
|
+
description: 'The y (pixels from the top edge) coordinate of the start position',
|
|
837
|
+
},
|
|
838
|
+
endX: {
|
|
839
|
+
type: 'number',
|
|
840
|
+
description: 'The x (pixels from the left edge) coordinate of the end position',
|
|
841
|
+
},
|
|
842
|
+
endY: {
|
|
843
|
+
type: 'number',
|
|
844
|
+
description: 'The y (pixels from the top edge) coordinate of the end position',
|
|
845
|
+
},
|
|
846
|
+
},
|
|
847
|
+
required: ['startX', 'startY', 'endX', 'endY'],
|
|
848
|
+
},
|
|
849
|
+
};
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
export class AndroidDragAndDropTool extends BaseAgentTool {
|
|
853
|
+
constructor(osAgentHandler) {
|
|
854
|
+
super();
|
|
855
|
+
this.osAgentHandler = osAgentHandler;
|
|
856
|
+
}
|
|
857
|
+
execute(command) {
|
|
858
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
859
|
+
yield this.osAgentHandler.AndroidDragAndDropTool(command.startX, command.startY, command.endX, command.endY);
|
|
860
|
+
return {
|
|
861
|
+
output: `Dragged and dropped from ${command.startX}, ${command.startY} to ${command.endX}, ${command.endY} on the screen`,
|
|
862
|
+
};
|
|
863
|
+
});
|
|
864
|
+
}
|
|
865
|
+
toParams() {
|
|
866
|
+
return {
|
|
867
|
+
name: 'android_drag_and_drop_tool',
|
|
868
|
+
description: 'Drags and drops from a starting point to an ending point on the screen',
|
|
869
|
+
input_schema: {
|
|
870
|
+
type: 'object',
|
|
871
|
+
properties: {
|
|
872
|
+
startX: {
|
|
873
|
+
type: 'number',
|
|
874
|
+
description: 'The x (pixels from the left edge) coordinate of the start position',
|
|
875
|
+
},
|
|
876
|
+
startY: {
|
|
877
|
+
type: 'number',
|
|
878
|
+
description: 'The y (pixels from the top edge) coordinate of the start position',
|
|
879
|
+
},
|
|
880
|
+
endX: {
|
|
881
|
+
type: 'number',
|
|
882
|
+
description: 'The x (pixels from the left edge) coordinate of the end position',
|
|
883
|
+
},
|
|
884
|
+
endY: {
|
|
885
|
+
type: 'number',
|
|
886
|
+
description: 'The y (pixels from the top edge) coordinate of the end position',
|
|
887
|
+
},
|
|
888
|
+
},
|
|
889
|
+
required: ['startX', 'startY', 'endX', 'endY'],
|
|
890
|
+
},
|
|
891
|
+
};
|
|
892
|
+
}
|
|
893
|
+
}
|
|
894
|
+
export class AndroidTapTool extends BaseAgentTool {
|
|
895
|
+
constructor(osAgentHandler) {
|
|
896
|
+
super();
|
|
897
|
+
this.osAgentHandler = osAgentHandler;
|
|
898
|
+
}
|
|
899
|
+
execute(command) {
|
|
900
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
901
|
+
yield this.osAgentHandler.AndroidTapTool(command.x, command.y);
|
|
902
|
+
return {
|
|
903
|
+
output: `Tapped the screen at ${command.x}, ${command.y}`,
|
|
904
|
+
};
|
|
905
|
+
});
|
|
906
|
+
}
|
|
907
|
+
toParams() {
|
|
908
|
+
return {
|
|
909
|
+
name: 'android_tap_tool',
|
|
910
|
+
description: 'Taps the screen at the specified coordinates',
|
|
911
|
+
input_schema: {
|
|
912
|
+
type: 'object',
|
|
913
|
+
properties: {
|
|
914
|
+
x: {
|
|
915
|
+
type: 'number',
|
|
916
|
+
description: 'The x (pixels from the left edge) coordinate of the tap position',
|
|
917
|
+
},
|
|
918
|
+
y: {
|
|
919
|
+
type: 'number',
|
|
920
|
+
description: 'The y (pixels from the top edge) coordinate of the tap position',
|
|
921
|
+
},
|
|
922
|
+
},
|
|
923
|
+
required: ['x', 'y'],
|
|
924
|
+
},
|
|
925
|
+
};
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
export class AndroidShellCommandTool extends BaseAgentTool {
|
|
929
|
+
constructor(osAgentHandler) {
|
|
930
|
+
super();
|
|
931
|
+
this.osAgentHandler = osAgentHandler;
|
|
932
|
+
}
|
|
933
|
+
execute(command) {
|
|
934
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
935
|
+
yield this.osAgentHandler.executeAndroidShellCommand(command.command);
|
|
936
|
+
return {
|
|
937
|
+
output: `Executed shell command: ${command.command}`,
|
|
938
|
+
};
|
|
939
|
+
});
|
|
940
|
+
}
|
|
941
|
+
toParams() {
|
|
942
|
+
return {
|
|
943
|
+
name: 'android_shell_command_tool',
|
|
944
|
+
description: 'Executes a shell command on the Android device. It does not return the output of the command.',
|
|
945
|
+
input_schema: {
|
|
946
|
+
type: 'object',
|
|
947
|
+
properties: {
|
|
948
|
+
command: {
|
|
949
|
+
type: 'string',
|
|
950
|
+
description: 'The shell command to execute without the "adb shell" prefix',
|
|
951
|
+
},
|
|
952
|
+
},
|
|
953
|
+
required: ['command'],
|
|
954
|
+
},
|
|
955
|
+
};
|
|
956
|
+
}
|
|
957
|
+
}
|
|
@@ -563,7 +563,6 @@ export declare class UiControlClient extends ApiCommands {
|
|
|
563
563
|
* #### Cross-Platform Coordination
|
|
564
564
|
* ```typescript
|
|
565
565
|
* // Share context between desktop and mobile agents
|
|
566
|
-
* await auiAndroid.agent.configureAsAndroidAgent();
|
|
567
566
|
*
|
|
568
567
|
* const history = await auiDesktop.act("Copy username from desktop app");
|
|
569
568
|
* await auiAndroid.act("Paste username into mobile login", {
|
|
@@ -43,7 +43,7 @@ export class UiControlClient extends ApiCommands {
|
|
|
43
43
|
return __awaiter(this, void 0, void 0, function* () {
|
|
44
44
|
const connectionState = yield this.executionRuntime.connect();
|
|
45
45
|
yield this.agent.initializeOsAgentHandler();
|
|
46
|
-
yield this.agent.
|
|
46
|
+
yield this.agent.configureAgent();
|
|
47
47
|
return connectionState;
|
|
48
48
|
});
|
|
49
49
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "askui",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.30.0",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": "askui GmbH <info@askui.com> (http://www.askui.com/)",
|
|
6
6
|
"description": "Reliable, automated end-to-end-testing that depends on what is shown on your screen instead of the technology you are running on",
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import { UiControlClient } from 'askui';
|
|
2
|
-
{{ allure_stepreporter_import }}
|
|
3
|
-
|
|
4
|
-
// Client is necessary to use the askui API
|
|
5
|
-
// eslint-disable-next-line import/no-mutable-exports
|
|
6
|
-
let aui: UiControlClient;
|
|
7
|
-
|
|
8
|
-
{{ timeout_placeholder }}
|
|
9
|
-
|
|
10
|
-
beforeAll(async () => {
|
|
11
|
-
aui = await UiControlClient.build({
|
|
12
|
-
{{ credentials }}
|
|
13
|
-
{{ reporter_placeholder }}
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
await aui.connect();
|
|
17
|
-
});
|
|
18
|
-
|
|
19
|
-
beforeEach(async () => {
|
|
20
|
-
await aui.startVideoRecording();
|
|
21
|
-
});
|
|
22
|
-
|
|
23
|
-
afterEach(async () => {
|
|
24
|
-
await aui.stopVideoRecording();
|
|
25
|
-
{{ allure_stepreporter_attach_video }}
|
|
26
|
-
});
|
|
27
|
-
|
|
28
|
-
afterAll(async () => {
|
|
29
|
-
aui.disconnect();
|
|
30
|
-
});
|
|
31
|
-
|
|
32
|
-
export { aui };
|