askui 0.25.0 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/core/models/anthropic/askui-agent.js +20 -32
- package/dist/cjs/core/models/anthropic/claude-agent.d.ts +16 -4
- package/dist/cjs/core/models/anthropic/claude-agent.js +43 -5
- package/dist/cjs/core/models/anthropic/index.d.ts +1 -2
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +59 -5
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +340 -75
- package/dist/cjs/core/ui-control-commands/input-event.d.ts +3 -1
- package/dist/cjs/core/ui-control-commands/input-event.js +2 -0
- package/dist/cjs/execution/execution-runtime.d.ts +4 -0
- package/dist/cjs/execution/inference-client.d.ts +4 -0
- package/dist/cjs/execution/ui-control-client.d.ts +84 -34
- package/dist/cjs/execution/ui-control-client.js +4 -48
- package/dist/cjs/lib/interactive_cli/create-example-project.js +1 -4
- package/dist/esm/core/models/anthropic/askui-agent.js +21 -33
- package/dist/esm/core/models/anthropic/claude-agent.d.ts +16 -4
- package/dist/esm/core/models/anthropic/claude-agent.js +43 -5
- package/dist/esm/core/models/anthropic/index.d.ts +1 -2
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +59 -5
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +332 -72
- package/dist/esm/core/ui-control-commands/input-event.d.ts +3 -1
- package/dist/esm/core/ui-control-commands/input-event.js +2 -0
- package/dist/esm/execution/execution-runtime.d.ts +4 -0
- package/dist/esm/execution/inference-client.d.ts +4 -0
- package/dist/esm/execution/ui-control-client.d.ts +84 -34
- package/dist/esm/execution/ui-control-client.js +4 -48
- package/dist/esm/lib/interactive_cli/create-example-project.js +1 -4
- package/dist/example_projects_templates/typescript/askui_example/my-first-askui-test-suite.test.ts +7 -17
- package/package.json +2 -2
|
@@ -9,7 +9,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
9
9
|
});
|
|
10
10
|
};
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
-
exports.ExecuteShellCommandTool = exports.AgentErrorTool = exports.AndroidSequenceKeyPressTool = exports.AndroidSingleKeyPressTool = exports.TypeTool = exports.
|
|
12
|
+
exports.WaitTool = exports.ExecuteShellCommandTool = exports.AgentErrorTool = exports.AndroidSequenceKeyPressTool = exports.AndroidSingleKeyPressTool = exports.TypeTool = exports.DesktopKeyReleaseTool = exports.DesktopKeyHoldDownTool = exports.DesktopPressAndReleaseKeysTool = exports.MouseReleaseLeftButtonTool = exports.MouseHoldLeftButtonDownTool = exports.MouseDragAndDropTool = exports.MouseScrollTool = exports.MouseClickTool = exports.MouseMoveTool = exports.ScreenShotTool = exports.OsAgentHandler = void 0;
|
|
13
13
|
const dsl_1 = require("../../../../execution/dsl");
|
|
14
14
|
const base_1 = require("./base");
|
|
15
15
|
const ui_control_commands_1 = require("../../../ui-control-commands");
|
|
@@ -18,9 +18,40 @@ const agent_errors_1 = require("./agent-errors");
|
|
|
18
18
|
class OsAgentHandler {
|
|
19
19
|
constructor(AgentOsClient, screenDimensions) {
|
|
20
20
|
this.AgentOsClient = AgentOsClient;
|
|
21
|
-
this.
|
|
21
|
+
this.targetResolution = { width: 1280, height: 800 };
|
|
22
|
+
this.paddingInfo = null;
|
|
22
23
|
this.screenDimensions = screenDimensions;
|
|
24
|
+
this.updatePaddingInfo();
|
|
25
|
+
}
|
|
26
|
+
updatePaddingInfo() {
|
|
27
|
+
const targetAspectRatio = this.targetResolution.width / this.targetResolution.height;
|
|
28
|
+
const screenAspectRatio = this.screenDimensions.width / this.screenDimensions.height;
|
|
29
|
+
let scaledWidth;
|
|
30
|
+
let scaledHeight;
|
|
31
|
+
let scaleFactor;
|
|
32
|
+
let padLeft = 0;
|
|
33
|
+
let padTop = 0;
|
|
34
|
+
if (targetAspectRatio > screenAspectRatio) {
|
|
35
|
+
scaleFactor = this.targetResolution.height / this.screenDimensions.height;
|
|
36
|
+
scaledWidth = Math.floor(this.screenDimensions.width * scaleFactor);
|
|
37
|
+
scaledHeight = this.targetResolution.height;
|
|
38
|
+
padLeft = Math.floor((this.targetResolution.width - scaledWidth) / 2);
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
scaleFactor = this.targetResolution.width / this.screenDimensions.width;
|
|
42
|
+
scaledWidth = this.targetResolution.width;
|
|
43
|
+
scaledHeight = Math.floor(this.screenDimensions.height * scaleFactor);
|
|
44
|
+
padTop = Math.floor((this.targetResolution.height - scaledHeight) / 2);
|
|
45
|
+
}
|
|
46
|
+
this.paddingInfo = {
|
|
47
|
+
scaleFactor,
|
|
48
|
+
scaledWidth,
|
|
49
|
+
scaledHeight,
|
|
50
|
+
padLeft,
|
|
51
|
+
padTop
|
|
52
|
+
};
|
|
23
53
|
}
|
|
54
|
+
// Add image support to act, an check for function overload in typescript.
|
|
24
55
|
static createInstance(AgentOsClient) {
|
|
25
56
|
return __awaiter(this, void 0, void 0, function* () {
|
|
26
57
|
const base64ImageString = yield AgentOsClient.getScreenshot();
|
|
@@ -32,10 +63,14 @@ class OsAgentHandler {
|
|
|
32
63
|
});
|
|
33
64
|
}
|
|
34
65
|
getTargetResolution() {
|
|
35
|
-
return this.
|
|
66
|
+
return this.targetResolution;
|
|
67
|
+
}
|
|
68
|
+
getScreenDimensions() {
|
|
69
|
+
return this.screenDimensions;
|
|
36
70
|
}
|
|
37
71
|
setTargetResolution(width, height) {
|
|
38
|
-
this.
|
|
72
|
+
this.targetResolution = { width, height };
|
|
73
|
+
this.updatePaddingInfo();
|
|
39
74
|
}
|
|
40
75
|
takeScreenshot() {
|
|
41
76
|
return __awaiter(this, void 0, void 0, function* () {
|
|
@@ -46,36 +81,130 @@ class OsAgentHandler {
|
|
|
46
81
|
width: image_info.width,
|
|
47
82
|
height: image_info.height,
|
|
48
83
|
};
|
|
49
|
-
|
|
84
|
+
this.updatePaddingInfo();
|
|
85
|
+
const resized_image = yield base64Image.resizeWithSameAspectRatio(this.targetResolution.width, this.targetResolution.height);
|
|
50
86
|
return resized_image.toString(false);
|
|
51
87
|
});
|
|
52
88
|
}
|
|
53
89
|
scaleCoordinates(source, x, y) {
|
|
54
|
-
|
|
55
|
-
|
|
90
|
+
if (!this.paddingInfo) {
|
|
91
|
+
throw new base_1.ToolError('Padding information not initialized');
|
|
92
|
+
}
|
|
93
|
+
const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop } = this.paddingInfo;
|
|
56
94
|
if (source === 'api') {
|
|
57
|
-
if (x > this.
|
|
95
|
+
if (x > this.targetResolution.width || y > this.targetResolution.height || x < 0 || y < 0) {
|
|
58
96
|
throw new base_1.ToolError(`Coordinates ${x}, ${y} are outside screen bounds `
|
|
59
|
-
+ `(${this.
|
|
97
|
+
+ `(${this.targetResolution.width}x${this.targetResolution.height})`);
|
|
98
|
+
}
|
|
99
|
+
const adjustedX = x - padLeft;
|
|
100
|
+
const adjustedY = y - padTop;
|
|
101
|
+
if (adjustedX < 0 || adjustedX > scaledWidth || adjustedY < 0 || adjustedY > scaledHeight) {
|
|
102
|
+
throw new base_1.ToolError(`Coordinates ${x}, ${y} are outside the scaled image area `
|
|
103
|
+
+ `(${scaledWidth}x${scaledHeight} with padding ${padLeft},${padTop})`);
|
|
60
104
|
}
|
|
61
105
|
return [
|
|
62
|
-
Math.round(
|
|
63
|
-
Math.round(
|
|
106
|
+
Math.round(adjustedX / scaleFactor),
|
|
107
|
+
Math.round(adjustedY / scaleFactor),
|
|
64
108
|
];
|
|
65
109
|
}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
];
|
|
110
|
+
const apiX = Math.round(x * scaleFactor) + padLeft;
|
|
111
|
+
const apiY = Math.round(y * scaleFactor) + padTop;
|
|
112
|
+
return [apiX, apiY];
|
|
70
113
|
}
|
|
71
114
|
requestControl(controlCommand) {
|
|
72
115
|
return __awaiter(this, void 0, void 0, function* () {
|
|
73
116
|
for (const action of controlCommand.actions) {
|
|
74
|
-
|
|
117
|
+
if (action.inputEvent === ui_control_commands_1.InputEvent.MOUSE_MOVE || action.inputEvent === ui_control_commands_1.InputEvent.MOUSE_SCROLL) {
|
|
118
|
+
[action.position.x, action.position.y] = this.scaleCoordinates('api', action.position.x, action.position.y);
|
|
119
|
+
}
|
|
75
120
|
}
|
|
76
121
|
yield this.AgentOsClient.requestControl(controlCommand);
|
|
77
122
|
});
|
|
78
123
|
}
|
|
124
|
+
mouseMove(x, y) {
|
|
125
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
126
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_MOVE, { x, y }, '', {})]);
|
|
127
|
+
yield this.requestControl(controlCommand);
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
mouseClick(button, doubleClick) {
|
|
131
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
132
|
+
let action = ui_control_commands_1.InputEvent.MOUSE_CLICK_LEFT;
|
|
133
|
+
if (doubleClick) {
|
|
134
|
+
if (button === "left") {
|
|
135
|
+
action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_LEFT;
|
|
136
|
+
}
|
|
137
|
+
else if (button === "right") {
|
|
138
|
+
action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_RIGHT;
|
|
139
|
+
}
|
|
140
|
+
else if (button === "middle") {
|
|
141
|
+
action = ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
else {
|
|
145
|
+
if (button === "right") {
|
|
146
|
+
action = ui_control_commands_1.InputEvent.MOUSE_CLICK_RIGHT;
|
|
147
|
+
}
|
|
148
|
+
else if (button === "middle") {
|
|
149
|
+
action = ui_control_commands_1.InputEvent.MOUSE_CLICK_MIDDLE;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(action, { x: 0, y: 0 }, '', {})]);
|
|
153
|
+
yield this.requestControl(controlCommand);
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
mouseScroll(dx, dy) {
|
|
157
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
158
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_SCROLL, { x: dx, y: dy }, '', {})]);
|
|
159
|
+
yield this.requestControl(controlCommand);
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
mouseHoldLeftButtonDown() {
|
|
163
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
164
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_DOWN, { x: 0, y: 0 }, '', {})]);
|
|
165
|
+
yield this.requestControl(controlCommand);
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
mouseReleaseLeftButton() {
|
|
169
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
170
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_UP, { x: 0, y: 0 }, '', {})]);
|
|
171
|
+
yield this.requestControl(controlCommand);
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
desktopKeyPressAndRelease(key_1) {
|
|
175
|
+
return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
|
|
176
|
+
let keyString = key;
|
|
177
|
+
if (modifiers.length > 0) {
|
|
178
|
+
keyString = `${modifiers.join('+')}+${key}`;
|
|
179
|
+
}
|
|
180
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, keyString, {})]);
|
|
181
|
+
yield this.requestControl(controlCommand);
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
desktopKeyHoldDown(key_1) {
|
|
185
|
+
return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
|
|
186
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.KEY_PRESS, { x: 0, y: 0 }, '', {
|
|
187
|
+
key: key,
|
|
188
|
+
modifiers: modifiers,
|
|
189
|
+
})]);
|
|
190
|
+
yield this.requestControl(controlCommand);
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
desktopKeyRelease(key_1) {
|
|
194
|
+
return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
|
|
195
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.KEY_RELEASE, { x: 0, y: 0 }, '', {
|
|
196
|
+
key: key,
|
|
197
|
+
modifiers: modifiers,
|
|
198
|
+
})]);
|
|
199
|
+
yield this.requestControl(controlCommand);
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
typeText(text) {
|
|
203
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
204
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.TYPE, { x: 0, y: 0 }, text, {})]);
|
|
205
|
+
yield this.requestControl(controlCommand);
|
|
206
|
+
});
|
|
207
|
+
}
|
|
79
208
|
}
|
|
80
209
|
exports.OsAgentHandler = OsAgentHandler;
|
|
81
210
|
class ScreenShotTool extends base_1.BaseAgentTool {
|
|
@@ -95,7 +224,7 @@ class ScreenShotTool extends base_1.BaseAgentTool {
|
|
|
95
224
|
toParams() {
|
|
96
225
|
return {
|
|
97
226
|
name: 'screenshot_tool',
|
|
98
|
-
description: 'Takes a screenshot of the current screen and returns it as a base64 image',
|
|
227
|
+
description: 'Takes a screenshot of the current screen and returns it as a base64 image.',
|
|
99
228
|
input_schema: { type: 'object', properties: {}, required: [] },
|
|
100
229
|
};
|
|
101
230
|
}
|
|
@@ -108,8 +237,7 @@ class MouseMoveTool extends base_1.BaseAgentTool {
|
|
|
108
237
|
}
|
|
109
238
|
execute(command) {
|
|
110
239
|
return __awaiter(this, void 0, void 0, function* () {
|
|
111
|
-
|
|
112
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
240
|
+
yield this.osAgentHandler.mouseMove(command.x, command.y);
|
|
113
241
|
return {
|
|
114
242
|
output: `Moved mouse to (${command.x}, ${command.y})`,
|
|
115
243
|
};
|
|
@@ -124,11 +252,11 @@ class MouseMoveTool extends base_1.BaseAgentTool {
|
|
|
124
252
|
properties: {
|
|
125
253
|
x: {
|
|
126
254
|
type: 'number',
|
|
127
|
-
description: 'The x
|
|
255
|
+
description: 'The x (pixels from the left edge) coordinate to move the mouse to',
|
|
128
256
|
},
|
|
129
257
|
y: {
|
|
130
258
|
type: 'number',
|
|
131
|
-
description: 'The y
|
|
259
|
+
description: 'The y (pixels from the top edge) coordinate to move the mouse to',
|
|
132
260
|
},
|
|
133
261
|
},
|
|
134
262
|
},
|
|
@@ -143,33 +271,7 @@ class MouseClickTool extends base_1.BaseAgentTool {
|
|
|
143
271
|
}
|
|
144
272
|
execute(command) {
|
|
145
273
|
return __awaiter(this, void 0, void 0, function* () {
|
|
146
|
-
|
|
147
|
-
if (command.doubleClick) {
|
|
148
|
-
if (command.button === 'left') {
|
|
149
|
-
controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_LEFT, { x: 0, y: 0 }, '', {})]);
|
|
150
|
-
}
|
|
151
|
-
if (command.button === 'right') {
|
|
152
|
-
controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_RIGHT, { x: 0, y: 0 }, '', {})]);
|
|
153
|
-
}
|
|
154
|
-
if (command.button === 'middle') {
|
|
155
|
-
controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE, { x: 0, y: 0 }, '', {})]);
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
else {
|
|
159
|
-
if (command.button === 'left') {
|
|
160
|
-
controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_LEFT, { x: 0, y: 0 }, '', {})]);
|
|
161
|
-
}
|
|
162
|
-
if (command.button === 'right') {
|
|
163
|
-
controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_RIGHT, { x: 0, y: 0 }, '', {})]);
|
|
164
|
-
}
|
|
165
|
-
if (command.button === 'middle') {
|
|
166
|
-
controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_MIDDLE, { x: 0, y: 0 }, '', {})]);
|
|
167
|
-
}
|
|
168
|
-
}
|
|
169
|
-
if (!controlCommand) {
|
|
170
|
-
throw new base_1.ToolError('Invalid input parameter for mouse click tool');
|
|
171
|
-
}
|
|
172
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
274
|
+
yield this.osAgentHandler.mouseClick(command.button, command.doubleClick);
|
|
173
275
|
const returnedMessage = command.doubleClick ? `Double clicked ${command.button} button` : `Clicked ${command.button} button`;
|
|
174
276
|
return {
|
|
175
277
|
output: returnedMessage,
|
|
@@ -206,8 +308,7 @@ class MouseScrollTool extends base_1.BaseAgentTool {
|
|
|
206
308
|
}
|
|
207
309
|
execute(command) {
|
|
208
310
|
return __awaiter(this, void 0, void 0, function* () {
|
|
209
|
-
|
|
210
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
311
|
+
yield this.osAgentHandler.mouseScroll(command.dx, command.dy);
|
|
211
312
|
return {
|
|
212
313
|
output: `Scrolled by (${command.dx}, ${command.dy})`,
|
|
213
314
|
};
|
|
@@ -222,11 +323,11 @@ class MouseScrollTool extends base_1.BaseAgentTool {
|
|
|
222
323
|
properties: {
|
|
223
324
|
dx: {
|
|
224
325
|
type: 'number',
|
|
225
|
-
description: 'The amount to scroll horizontally',
|
|
326
|
+
description: 'The amount to scroll horizontally (positive is right, negative is left)',
|
|
226
327
|
},
|
|
227
328
|
dy: {
|
|
228
329
|
type: 'number',
|
|
229
|
-
description: 'The amount to scroll vertically',
|
|
330
|
+
description: 'The amount to scroll vertically (positive is down, negative is up)',
|
|
230
331
|
},
|
|
231
332
|
},
|
|
232
333
|
required: ['dx', 'dy'],
|
|
@@ -235,20 +336,107 @@ class MouseScrollTool extends base_1.BaseAgentTool {
|
|
|
235
336
|
}
|
|
236
337
|
}
|
|
237
338
|
exports.MouseScrollTool = MouseScrollTool;
|
|
238
|
-
class
|
|
339
|
+
class MouseDragAndDropTool extends base_1.BaseAgentTool {
|
|
239
340
|
constructor(osAgentHandler) {
|
|
240
341
|
super();
|
|
241
342
|
this.osAgentHandler = osAgentHandler;
|
|
242
343
|
}
|
|
243
344
|
execute(command) {
|
|
244
345
|
return __awaiter(this, void 0, void 0, function* () {
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
346
|
+
yield this.osAgentHandler.mouseMove(command.startX, command.startY);
|
|
347
|
+
yield this.osAgentHandler.mouseHoldLeftButtonDown();
|
|
348
|
+
yield this.osAgentHandler.mouseMove(command.endX, command.endY);
|
|
349
|
+
yield this.osAgentHandler.mouseReleaseLeftButton();
|
|
250
350
|
return {
|
|
251
|
-
output: `
|
|
351
|
+
output: `Dragged from (${command.startX}, ${command.startY}) to (${command.endX}, ${command.endY})`,
|
|
352
|
+
};
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
toParams() {
|
|
356
|
+
return {
|
|
357
|
+
name: 'mouse_drag_and_drop_tool',
|
|
358
|
+
description: 'Drags the mouse from the specified start coordinates to the specified end coordinates. The top left corner of the screen is (0,0)',
|
|
359
|
+
input_schema: {
|
|
360
|
+
type: 'object',
|
|
361
|
+
properties: {
|
|
362
|
+
startX: {
|
|
363
|
+
type: 'number',
|
|
364
|
+
description: 'The x (pixels from the left edge) coordinate of the start position',
|
|
365
|
+
},
|
|
366
|
+
startY: {
|
|
367
|
+
type: 'number',
|
|
368
|
+
description: 'The y (pixels from the top edge) coordinate of the start position',
|
|
369
|
+
},
|
|
370
|
+
endX: {
|
|
371
|
+
type: 'number',
|
|
372
|
+
description: 'The x (pixels from the left edge) coordinate of the end position',
|
|
373
|
+
},
|
|
374
|
+
endY: {
|
|
375
|
+
type: 'number',
|
|
376
|
+
description: 'The y (pixels from the top edge) coordinate of the end position',
|
|
377
|
+
},
|
|
378
|
+
},
|
|
379
|
+
required: ['startX', 'startY', 'endX', 'endY'],
|
|
380
|
+
},
|
|
381
|
+
};
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
exports.MouseDragAndDropTool = MouseDragAndDropTool;
|
|
385
|
+
class MouseHoldLeftButtonDownTool extends base_1.BaseAgentTool {
|
|
386
|
+
constructor(osAgentHandler) {
|
|
387
|
+
super();
|
|
388
|
+
this.osAgentHandler = osAgentHandler;
|
|
389
|
+
}
|
|
390
|
+
execute() {
|
|
391
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
392
|
+
yield this.osAgentHandler.mouseHoldLeftButtonDown();
|
|
393
|
+
return {
|
|
394
|
+
output: 'Holding down left mouse button',
|
|
395
|
+
};
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
toParams() {
|
|
399
|
+
return {
|
|
400
|
+
name: 'mouse_hold_left_button_down_tool',
|
|
401
|
+
description: 'Hold down the left mouse button at the current position.',
|
|
402
|
+
input_schema: { type: 'object', properties: {}, required: [] },
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
exports.MouseHoldLeftButtonDownTool = MouseHoldLeftButtonDownTool;
|
|
407
|
+
class MouseReleaseLeftButtonTool extends base_1.BaseAgentTool {
|
|
408
|
+
constructor(osAgentHandler) {
|
|
409
|
+
super();
|
|
410
|
+
this.osAgentHandler = osAgentHandler;
|
|
411
|
+
}
|
|
412
|
+
execute() {
|
|
413
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
414
|
+
yield this.osAgentHandler.mouseReleaseLeftButton();
|
|
415
|
+
return {
|
|
416
|
+
output: 'Released left mouse button',
|
|
417
|
+
};
|
|
418
|
+
});
|
|
419
|
+
}
|
|
420
|
+
toParams() {
|
|
421
|
+
return {
|
|
422
|
+
name: 'mouse_release_left_button_tool',
|
|
423
|
+
description: 'Release the left mouse button at the current position.',
|
|
424
|
+
input_schema: { type: 'object', properties: {}, required: [] },
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
exports.MouseReleaseLeftButtonTool = MouseReleaseLeftButtonTool;
|
|
429
|
+
class DesktopPressAndReleaseKeysTool extends base_1.BaseAgentTool {
|
|
430
|
+
constructor(osAgentHandler) {
|
|
431
|
+
super();
|
|
432
|
+
this.osAgentHandler = osAgentHandler;
|
|
433
|
+
}
|
|
434
|
+
execute(command) {
|
|
435
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
436
|
+
const modifiers = command.modifiers || [];
|
|
437
|
+
yield this.osAgentHandler.desktopKeyPressAndRelease(command.key, modifiers);
|
|
438
|
+
return {
|
|
439
|
+
output: `Pressed key ${command.key} with modifiers ${modifiers.join(' ')}`,
|
|
252
440
|
};
|
|
253
441
|
});
|
|
254
442
|
}
|
|
@@ -264,15 +452,54 @@ class DesktopKeyPressSequenceTool extends base_1.BaseAgentTool {
|
|
|
264
452
|
enum: dsl_1.PC_KEY_VALUES,
|
|
265
453
|
description: 'The key to press',
|
|
266
454
|
},
|
|
267
|
-
|
|
268
|
-
type: '
|
|
269
|
-
|
|
270
|
-
|
|
455
|
+
modifiers: {
|
|
456
|
+
type: 'array',
|
|
457
|
+
items: {
|
|
458
|
+
type: 'string',
|
|
459
|
+
enum: dsl_1.MODIFIER_KEY_VALUES,
|
|
460
|
+
},
|
|
461
|
+
description: 'The modifiers to press',
|
|
271
462
|
},
|
|
272
|
-
|
|
463
|
+
},
|
|
464
|
+
required: ['key'],
|
|
465
|
+
},
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
exports.DesktopPressAndReleaseKeysTool = DesktopPressAndReleaseKeysTool;
|
|
470
|
+
class DesktopKeyHoldDownTool extends base_1.BaseAgentTool {
|
|
471
|
+
constructor(osAgentHandler) {
|
|
472
|
+
super();
|
|
473
|
+
this.osAgentHandler = osAgentHandler;
|
|
474
|
+
}
|
|
475
|
+
execute(command) {
|
|
476
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
477
|
+
const modifiers = command.modifiers || [];
|
|
478
|
+
yield this.osAgentHandler.desktopKeyHoldDown(command.key, modifiers);
|
|
479
|
+
return {
|
|
480
|
+
output: `Holding down key ${command.key} with modifiers ${modifiers.join(' ')}`,
|
|
481
|
+
};
|
|
482
|
+
});
|
|
483
|
+
}
|
|
484
|
+
toParams() {
|
|
485
|
+
return {
|
|
486
|
+
name: 'desktop_key_hold_down_tool',
|
|
487
|
+
description: 'Hold down a key and optional modifiers. Keys will be still pressed after the tool is finished.',
|
|
488
|
+
input_schema: {
|
|
489
|
+
type: 'object',
|
|
490
|
+
properties: {
|
|
491
|
+
key: {
|
|
273
492
|
type: 'string',
|
|
274
|
-
enum: dsl_1.MODIFIER_KEY_VALUES,
|
|
275
|
-
description: 'The
|
|
493
|
+
enum: [...dsl_1.PC_KEY_VALUES, ...dsl_1.MODIFIER_KEY_VALUES],
|
|
494
|
+
description: 'The key to hold down',
|
|
495
|
+
},
|
|
496
|
+
modifiers: {
|
|
497
|
+
type: 'array',
|
|
498
|
+
items: {
|
|
499
|
+
type: 'string',
|
|
500
|
+
enum: dsl_1.MODIFIER_KEY_VALUES,
|
|
501
|
+
},
|
|
502
|
+
description: 'The modifiers to hold down',
|
|
276
503
|
},
|
|
277
504
|
},
|
|
278
505
|
required: ['key'],
|
|
@@ -280,32 +507,40 @@ class DesktopKeyPressSequenceTool extends base_1.BaseAgentTool {
|
|
|
280
507
|
};
|
|
281
508
|
}
|
|
282
509
|
}
|
|
283
|
-
exports.
|
|
284
|
-
class
|
|
510
|
+
exports.DesktopKeyHoldDownTool = DesktopKeyHoldDownTool;
|
|
511
|
+
class DesktopKeyReleaseTool extends base_1.BaseAgentTool {
|
|
285
512
|
constructor(osAgentHandler) {
|
|
286
513
|
super();
|
|
287
514
|
this.osAgentHandler = osAgentHandler;
|
|
288
515
|
}
|
|
289
516
|
execute(command) {
|
|
290
517
|
return __awaiter(this, void 0, void 0, function* () {
|
|
291
|
-
const
|
|
292
|
-
yield this.osAgentHandler.
|
|
518
|
+
const modifiers = command.modifiers || [];
|
|
519
|
+
yield this.osAgentHandler.desktopKeyRelease(command.key, modifiers);
|
|
293
520
|
return {
|
|
294
|
-
output: `
|
|
521
|
+
output: `Released key ${command.key} with modifiers ${modifiers.join(' ')}`,
|
|
295
522
|
};
|
|
296
523
|
});
|
|
297
524
|
}
|
|
298
525
|
toParams() {
|
|
299
526
|
return {
|
|
300
|
-
name: '
|
|
301
|
-
description: '
|
|
527
|
+
name: 'desktop_key_release_tool',
|
|
528
|
+
description: 'Releases a key and optional modifiers. This can be used after keys were held down with the desktop_key_hold_down_tool',
|
|
302
529
|
input_schema: {
|
|
303
530
|
type: 'object',
|
|
304
531
|
properties: {
|
|
305
532
|
key: {
|
|
306
533
|
type: 'string',
|
|
307
534
|
enum: [...dsl_1.PC_KEY_VALUES, ...dsl_1.MODIFIER_KEY_VALUES],
|
|
308
|
-
description: 'The key to
|
|
535
|
+
description: 'The key to release',
|
|
536
|
+
},
|
|
537
|
+
modifiers: {
|
|
538
|
+
type: 'array',
|
|
539
|
+
items: {
|
|
540
|
+
type: 'string',
|
|
541
|
+
enum: dsl_1.MODIFIER_KEY_VALUES,
|
|
542
|
+
},
|
|
543
|
+
description: 'The modifiers to release',
|
|
309
544
|
},
|
|
310
545
|
},
|
|
311
546
|
required: ['key'],
|
|
@@ -313,7 +548,7 @@ class DesktopSingleKeyPressTool extends base_1.BaseAgentTool {
|
|
|
313
548
|
};
|
|
314
549
|
}
|
|
315
550
|
}
|
|
316
|
-
exports.
|
|
551
|
+
exports.DesktopKeyReleaseTool = DesktopKeyReleaseTool;
|
|
317
552
|
class TypeTool extends base_1.BaseAgentTool {
|
|
318
553
|
constructor(osAgentHandler) {
|
|
319
554
|
super();
|
|
@@ -474,3 +709,33 @@ class ExecuteShellCommandTool extends base_1.BaseAgentTool {
|
|
|
474
709
|
}
|
|
475
710
|
}
|
|
476
711
|
exports.ExecuteShellCommandTool = ExecuteShellCommandTool;
|
|
712
|
+
class WaitTool extends base_1.BaseAgentTool {
|
|
713
|
+
constructor() {
|
|
714
|
+
super();
|
|
715
|
+
}
|
|
716
|
+
execute(command) {
|
|
717
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
718
|
+
yield new Promise(resolve => setTimeout(resolve, command.milliseconds));
|
|
719
|
+
return {
|
|
720
|
+
output: `Waited for ${command.milliseconds} milliseconds`,
|
|
721
|
+
};
|
|
722
|
+
});
|
|
723
|
+
}
|
|
724
|
+
toParams() {
|
|
725
|
+
return {
|
|
726
|
+
name: 'wait_tool',
|
|
727
|
+
description: 'Waits for a specified number of milliseconds',
|
|
728
|
+
input_schema: {
|
|
729
|
+
type: 'object',
|
|
730
|
+
properties: {
|
|
731
|
+
milliseconds: {
|
|
732
|
+
type: 'number',
|
|
733
|
+
description: 'The number of milliseconds to wait',
|
|
734
|
+
},
|
|
735
|
+
},
|
|
736
|
+
required: ['milliseconds'],
|
|
737
|
+
},
|
|
738
|
+
};
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
exports.WaitTool = WaitTool;
|
|
@@ -21,4 +21,6 @@ var InputEvent;
|
|
|
21
21
|
InputEvent["MOUSE_DOWN"] = "MOUSE_DOWN";
|
|
22
22
|
InputEvent["MOUSE_UP"] = "MOUSE_UP";
|
|
23
23
|
InputEvent["EXECUTE_COMMAND"] = "EXECUTE_COMMAND";
|
|
24
|
+
InputEvent["KEY_PRESS"] = "KEY_PRESS";
|
|
25
|
+
InputEvent["KEY_RELEASE"] = "KEY_RELEASE";
|
|
24
26
|
})(InputEvent || (exports.InputEvent = InputEvent = {}));
|
|
@@ -26,6 +26,10 @@ export declare class InferenceClient {
|
|
|
26
26
|
predictVQAAnswer(prompt: string, image: string, config?: object): Promise<any>;
|
|
27
27
|
predictActResponse(params: {
|
|
28
28
|
max_tokens: number;
|
|
29
|
+
tool_choice?: {
|
|
30
|
+
type: 'tool' | 'any' | 'auto';
|
|
31
|
+
name?: string;
|
|
32
|
+
};
|
|
29
33
|
messages: BetaMessageParam[];
|
|
30
34
|
model: string;
|
|
31
35
|
system?: string;
|