askui 0.25.1 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/core/models/anthropic/askui-agent.js +20 -32
- package/dist/cjs/core/models/anthropic/claude-agent.d.ts +16 -4
- package/dist/cjs/core/models/anthropic/claude-agent.js +43 -5
- package/dist/cjs/core/models/anthropic/index.d.ts +1 -2
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +59 -5
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +340 -75
- package/dist/cjs/core/ui-control-commands/input-event.d.ts +3 -1
- package/dist/cjs/core/ui-control-commands/input-event.js +2 -0
- package/dist/cjs/execution/execution-runtime.d.ts +4 -0
- package/dist/cjs/execution/inference-client.d.ts +4 -0
- package/dist/cjs/execution/ui-control-client.d.ts +84 -34
- package/dist/cjs/execution/ui-control-client.js +4 -48
- package/dist/esm/core/models/anthropic/askui-agent.js +21 -33
- package/dist/esm/core/models/anthropic/claude-agent.d.ts +16 -4
- package/dist/esm/core/models/anthropic/claude-agent.js +43 -5
- package/dist/esm/core/models/anthropic/index.d.ts +1 -2
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +59 -5
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +332 -72
- package/dist/esm/core/ui-control-commands/input-event.d.ts +3 -1
- package/dist/esm/core/ui-control-commands/input-event.js +2 -0
- package/dist/esm/execution/execution-runtime.d.ts +4 -0
- package/dist/esm/execution/inference-client.d.ts +4 -0
- package/dist/esm/execution/ui-control-client.d.ts +84 -34
- package/dist/esm/execution/ui-control-client.js +4 -48
- package/package.json +2 -2
|
@@ -15,9 +15,40 @@ import { AgentError } from './agent-errors';
|
|
|
15
15
|
export class OsAgentHandler {
|
|
16
16
|
constructor(AgentOsClient, screenDimensions) {
|
|
17
17
|
this.AgentOsClient = AgentOsClient;
|
|
18
|
-
this.
|
|
18
|
+
this.targetResolution = { width: 1280, height: 800 };
|
|
19
|
+
this.paddingInfo = null;
|
|
19
20
|
this.screenDimensions = screenDimensions;
|
|
21
|
+
this.updatePaddingInfo();
|
|
22
|
+
}
|
|
23
|
+
updatePaddingInfo() {
|
|
24
|
+
const targetAspectRatio = this.targetResolution.width / this.targetResolution.height;
|
|
25
|
+
const screenAspectRatio = this.screenDimensions.width / this.screenDimensions.height;
|
|
26
|
+
let scaledWidth;
|
|
27
|
+
let scaledHeight;
|
|
28
|
+
let scaleFactor;
|
|
29
|
+
let padLeft = 0;
|
|
30
|
+
let padTop = 0;
|
|
31
|
+
if (targetAspectRatio > screenAspectRatio) {
|
|
32
|
+
scaleFactor = this.targetResolution.height / this.screenDimensions.height;
|
|
33
|
+
scaledWidth = Math.floor(this.screenDimensions.width * scaleFactor);
|
|
34
|
+
scaledHeight = this.targetResolution.height;
|
|
35
|
+
padLeft = Math.floor((this.targetResolution.width - scaledWidth) / 2);
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
scaleFactor = this.targetResolution.width / this.screenDimensions.width;
|
|
39
|
+
scaledWidth = this.targetResolution.width;
|
|
40
|
+
scaledHeight = Math.floor(this.screenDimensions.height * scaleFactor);
|
|
41
|
+
padTop = Math.floor((this.targetResolution.height - scaledHeight) / 2);
|
|
42
|
+
}
|
|
43
|
+
this.paddingInfo = {
|
|
44
|
+
scaleFactor,
|
|
45
|
+
scaledWidth,
|
|
46
|
+
scaledHeight,
|
|
47
|
+
padLeft,
|
|
48
|
+
padTop
|
|
49
|
+
};
|
|
20
50
|
}
|
|
51
|
+
// Add image support to act, an check for function overload in typescript.
|
|
21
52
|
static createInstance(AgentOsClient) {
|
|
22
53
|
return __awaiter(this, void 0, void 0, function* () {
|
|
23
54
|
const base64ImageString = yield AgentOsClient.getScreenshot();
|
|
@@ -29,10 +60,14 @@ export class OsAgentHandler {
|
|
|
29
60
|
});
|
|
30
61
|
}
|
|
31
62
|
getTargetResolution() {
|
|
32
|
-
return this.
|
|
63
|
+
return this.targetResolution;
|
|
64
|
+
}
|
|
65
|
+
getScreenDimensions() {
|
|
66
|
+
return this.screenDimensions;
|
|
33
67
|
}
|
|
34
68
|
setTargetResolution(width, height) {
|
|
35
|
-
this.
|
|
69
|
+
this.targetResolution = { width, height };
|
|
70
|
+
this.updatePaddingInfo();
|
|
36
71
|
}
|
|
37
72
|
takeScreenshot() {
|
|
38
73
|
return __awaiter(this, void 0, void 0, function* () {
|
|
@@ -43,36 +78,130 @@ export class OsAgentHandler {
|
|
|
43
78
|
width: image_info.width,
|
|
44
79
|
height: image_info.height,
|
|
45
80
|
};
|
|
46
|
-
|
|
81
|
+
this.updatePaddingInfo();
|
|
82
|
+
const resized_image = yield base64Image.resizeWithSameAspectRatio(this.targetResolution.width, this.targetResolution.height);
|
|
47
83
|
return resized_image.toString(false);
|
|
48
84
|
});
|
|
49
85
|
}
|
|
50
86
|
scaleCoordinates(source, x, y) {
|
|
51
|
-
|
|
52
|
-
|
|
87
|
+
if (!this.paddingInfo) {
|
|
88
|
+
throw new ToolError('Padding information not initialized');
|
|
89
|
+
}
|
|
90
|
+
const { scaleFactor, scaledWidth, scaledHeight, padLeft, padTop } = this.paddingInfo;
|
|
53
91
|
if (source === 'api') {
|
|
54
|
-
if (x > this.
|
|
92
|
+
if (x > this.targetResolution.width || y > this.targetResolution.height || x < 0 || y < 0) {
|
|
55
93
|
throw new ToolError(`Coordinates ${x}, ${y} are outside screen bounds `
|
|
56
|
-
+ `(${this.
|
|
94
|
+
+ `(${this.targetResolution.width}x${this.targetResolution.height})`);
|
|
95
|
+
}
|
|
96
|
+
const adjustedX = x - padLeft;
|
|
97
|
+
const adjustedY = y - padTop;
|
|
98
|
+
if (adjustedX < 0 || adjustedX > scaledWidth || adjustedY < 0 || adjustedY > scaledHeight) {
|
|
99
|
+
throw new ToolError(`Coordinates ${x}, ${y} are outside the scaled image area `
|
|
100
|
+
+ `(${scaledWidth}x${scaledHeight} with padding ${padLeft},${padTop})`);
|
|
57
101
|
}
|
|
58
102
|
return [
|
|
59
|
-
Math.round(
|
|
60
|
-
Math.round(
|
|
103
|
+
Math.round(adjustedX / scaleFactor),
|
|
104
|
+
Math.round(adjustedY / scaleFactor),
|
|
61
105
|
];
|
|
62
106
|
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
];
|
|
107
|
+
const apiX = Math.round(x * scaleFactor) + padLeft;
|
|
108
|
+
const apiY = Math.round(y * scaleFactor) + padTop;
|
|
109
|
+
return [apiX, apiY];
|
|
67
110
|
}
|
|
68
111
|
requestControl(controlCommand) {
|
|
69
112
|
return __awaiter(this, void 0, void 0, function* () {
|
|
70
113
|
for (const action of controlCommand.actions) {
|
|
71
|
-
|
|
114
|
+
if (action.inputEvent === InputEvent.MOUSE_MOVE || action.inputEvent === InputEvent.MOUSE_SCROLL) {
|
|
115
|
+
[action.position.x, action.position.y] = this.scaleCoordinates('api', action.position.x, action.position.y);
|
|
116
|
+
}
|
|
72
117
|
}
|
|
73
118
|
yield this.AgentOsClient.requestControl(controlCommand);
|
|
74
119
|
});
|
|
75
120
|
}
|
|
121
|
+
mouseMove(x, y) {
|
|
122
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
123
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_MOVE, { x, y }, '', {})]);
|
|
124
|
+
yield this.requestControl(controlCommand);
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
mouseClick(button, doubleClick) {
|
|
128
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
129
|
+
let action = InputEvent.MOUSE_CLICK_LEFT;
|
|
130
|
+
if (doubleClick) {
|
|
131
|
+
if (button === "left") {
|
|
132
|
+
action = InputEvent.MOUSE_CLICK_DOUBLE_LEFT;
|
|
133
|
+
}
|
|
134
|
+
else if (button === "right") {
|
|
135
|
+
action = InputEvent.MOUSE_CLICK_DOUBLE_RIGHT;
|
|
136
|
+
}
|
|
137
|
+
else if (button === "middle") {
|
|
138
|
+
action = InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
if (button === "right") {
|
|
143
|
+
action = InputEvent.MOUSE_CLICK_RIGHT;
|
|
144
|
+
}
|
|
145
|
+
else if (button === "middle") {
|
|
146
|
+
action = InputEvent.MOUSE_CLICK_MIDDLE;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(action, { x: 0, y: 0 }, '', {})]);
|
|
150
|
+
yield this.requestControl(controlCommand);
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
mouseScroll(dx, dy) {
|
|
154
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
155
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_SCROLL, { x: dx, y: dy }, '', {})]);
|
|
156
|
+
yield this.requestControl(controlCommand);
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
mouseHoldLeftButtonDown() {
|
|
160
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
161
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_DOWN, { x: 0, y: 0 }, '', {})]);
|
|
162
|
+
yield this.requestControl(controlCommand);
|
|
163
|
+
});
|
|
164
|
+
}
|
|
165
|
+
mouseReleaseLeftButton() {
|
|
166
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
167
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_UP, { x: 0, y: 0 }, '', {})]);
|
|
168
|
+
yield this.requestControl(controlCommand);
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
desktopKeyPressAndRelease(key_1) {
|
|
172
|
+
return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
|
|
173
|
+
let keyString = key;
|
|
174
|
+
if (modifiers.length > 0) {
|
|
175
|
+
keyString = `${modifiers.join('+')}+${key}`;
|
|
176
|
+
}
|
|
177
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, keyString, {})]);
|
|
178
|
+
yield this.requestControl(controlCommand);
|
|
179
|
+
});
|
|
180
|
+
}
|
|
181
|
+
desktopKeyHoldDown(key_1) {
|
|
182
|
+
return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
|
|
183
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.KEY_PRESS, { x: 0, y: 0 }, '', {
|
|
184
|
+
key: key,
|
|
185
|
+
modifiers: modifiers,
|
|
186
|
+
})]);
|
|
187
|
+
yield this.requestControl(controlCommand);
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
desktopKeyRelease(key_1) {
|
|
191
|
+
return __awaiter(this, arguments, void 0, function* (key, modifiers = []) {
|
|
192
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.KEY_RELEASE, { x: 0, y: 0 }, '', {
|
|
193
|
+
key: key,
|
|
194
|
+
modifiers: modifiers,
|
|
195
|
+
})]);
|
|
196
|
+
yield this.requestControl(controlCommand);
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
typeText(text) {
|
|
200
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
201
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.TYPE, { x: 0, y: 0 }, text, {})]);
|
|
202
|
+
yield this.requestControl(controlCommand);
|
|
203
|
+
});
|
|
204
|
+
}
|
|
76
205
|
}
|
|
77
206
|
export class ScreenShotTool extends BaseAgentTool {
|
|
78
207
|
constructor(osAgentHandler) {
|
|
@@ -91,7 +220,7 @@ export class ScreenShotTool extends BaseAgentTool {
|
|
|
91
220
|
toParams() {
|
|
92
221
|
return {
|
|
93
222
|
name: 'screenshot_tool',
|
|
94
|
-
description: 'Takes a screenshot of the current screen and returns it as a base64 image',
|
|
223
|
+
description: 'Takes a screenshot of the current screen and returns it as a base64 image.',
|
|
95
224
|
input_schema: { type: 'object', properties: {}, required: [] },
|
|
96
225
|
};
|
|
97
226
|
}
|
|
@@ -103,8 +232,7 @@ export class MouseMoveTool extends BaseAgentTool {
|
|
|
103
232
|
}
|
|
104
233
|
execute(command) {
|
|
105
234
|
return __awaiter(this, void 0, void 0, function* () {
|
|
106
|
-
|
|
107
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
235
|
+
yield this.osAgentHandler.mouseMove(command.x, command.y);
|
|
108
236
|
return {
|
|
109
237
|
output: `Moved mouse to (${command.x}, ${command.y})`,
|
|
110
238
|
};
|
|
@@ -119,11 +247,11 @@ export class MouseMoveTool extends BaseAgentTool {
|
|
|
119
247
|
properties: {
|
|
120
248
|
x: {
|
|
121
249
|
type: 'number',
|
|
122
|
-
description: 'The x
|
|
250
|
+
description: 'The x (pixels from the left edge) coordinate to move the mouse to',
|
|
123
251
|
},
|
|
124
252
|
y: {
|
|
125
253
|
type: 'number',
|
|
126
|
-
description: 'The y
|
|
254
|
+
description: 'The y (pixels from the top edge) coordinate to move the mouse to',
|
|
127
255
|
},
|
|
128
256
|
},
|
|
129
257
|
},
|
|
@@ -137,33 +265,7 @@ export class MouseClickTool extends BaseAgentTool {
|
|
|
137
265
|
}
|
|
138
266
|
execute(command) {
|
|
139
267
|
return __awaiter(this, void 0, void 0, function* () {
|
|
140
|
-
|
|
141
|
-
if (command.doubleClick) {
|
|
142
|
-
if (command.button === 'left') {
|
|
143
|
-
controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_DOUBLE_LEFT, { x: 0, y: 0 }, '', {})]);
|
|
144
|
-
}
|
|
145
|
-
if (command.button === 'right') {
|
|
146
|
-
controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_DOUBLE_RIGHT, { x: 0, y: 0 }, '', {})]);
|
|
147
|
-
}
|
|
148
|
-
if (command.button === 'middle') {
|
|
149
|
-
controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE, { x: 0, y: 0 }, '', {})]);
|
|
150
|
-
}
|
|
151
|
-
}
|
|
152
|
-
else {
|
|
153
|
-
if (command.button === 'left') {
|
|
154
|
-
controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_LEFT, { x: 0, y: 0 }, '', {})]);
|
|
155
|
-
}
|
|
156
|
-
if (command.button === 'right') {
|
|
157
|
-
controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_RIGHT, { x: 0, y: 0 }, '', {})]);
|
|
158
|
-
}
|
|
159
|
-
if (command.button === 'middle') {
|
|
160
|
-
controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_MIDDLE, { x: 0, y: 0 }, '', {})]);
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
if (!controlCommand) {
|
|
164
|
-
throw new ToolError('Invalid input parameter for mouse click tool');
|
|
165
|
-
}
|
|
166
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
268
|
+
yield this.osAgentHandler.mouseClick(command.button, command.doubleClick);
|
|
167
269
|
const returnedMessage = command.doubleClick ? `Double clicked ${command.button} button` : `Clicked ${command.button} button`;
|
|
168
270
|
return {
|
|
169
271
|
output: returnedMessage,
|
|
@@ -199,8 +301,7 @@ export class MouseScrollTool extends BaseAgentTool {
|
|
|
199
301
|
}
|
|
200
302
|
execute(command) {
|
|
201
303
|
return __awaiter(this, void 0, void 0, function* () {
|
|
202
|
-
|
|
203
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
304
|
+
yield this.osAgentHandler.mouseScroll(command.dx, command.dy);
|
|
204
305
|
return {
|
|
205
306
|
output: `Scrolled by (${command.dx}, ${command.dy})`,
|
|
206
307
|
};
|
|
@@ -215,11 +316,11 @@ export class MouseScrollTool extends BaseAgentTool {
|
|
|
215
316
|
properties: {
|
|
216
317
|
dx: {
|
|
217
318
|
type: 'number',
|
|
218
|
-
description: 'The amount to scroll horizontally',
|
|
319
|
+
description: 'The amount to scroll horizontally (positive is right, negative is left)',
|
|
219
320
|
},
|
|
220
321
|
dy: {
|
|
221
322
|
type: 'number',
|
|
222
|
-
description: 'The amount to scroll vertically',
|
|
323
|
+
description: 'The amount to scroll vertically (positive is down, negative is up)',
|
|
223
324
|
},
|
|
224
325
|
},
|
|
225
326
|
required: ['dx', 'dy'],
|
|
@@ -227,20 +328,104 @@ export class MouseScrollTool extends BaseAgentTool {
|
|
|
227
328
|
};
|
|
228
329
|
}
|
|
229
330
|
}
|
|
230
|
-
export class
|
|
331
|
+
export class MouseDragAndDropTool extends BaseAgentTool {
|
|
231
332
|
constructor(osAgentHandler) {
|
|
232
333
|
super();
|
|
233
334
|
this.osAgentHandler = osAgentHandler;
|
|
234
335
|
}
|
|
235
336
|
execute(command) {
|
|
236
337
|
return __awaiter(this, void 0, void 0, function* () {
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
338
|
+
yield this.osAgentHandler.mouseMove(command.startX, command.startY);
|
|
339
|
+
yield this.osAgentHandler.mouseHoldLeftButtonDown();
|
|
340
|
+
yield this.osAgentHandler.mouseMove(command.endX, command.endY);
|
|
341
|
+
yield this.osAgentHandler.mouseReleaseLeftButton();
|
|
242
342
|
return {
|
|
243
|
-
output: `
|
|
343
|
+
output: `Dragged from (${command.startX}, ${command.startY}) to (${command.endX}, ${command.endY})`,
|
|
344
|
+
};
|
|
345
|
+
});
|
|
346
|
+
}
|
|
347
|
+
toParams() {
|
|
348
|
+
return {
|
|
349
|
+
name: 'mouse_drag_and_drop_tool',
|
|
350
|
+
description: 'Drags the mouse from the specified start coordinates to the specified end coordinates. The top left corner of the screen is (0,0)',
|
|
351
|
+
input_schema: {
|
|
352
|
+
type: 'object',
|
|
353
|
+
properties: {
|
|
354
|
+
startX: {
|
|
355
|
+
type: 'number',
|
|
356
|
+
description: 'The x (pixels from the left edge) coordinate of the start position',
|
|
357
|
+
},
|
|
358
|
+
startY: {
|
|
359
|
+
type: 'number',
|
|
360
|
+
description: 'The y (pixels from the top edge) coordinate of the start position',
|
|
361
|
+
},
|
|
362
|
+
endX: {
|
|
363
|
+
type: 'number',
|
|
364
|
+
description: 'The x (pixels from the left edge) coordinate of the end position',
|
|
365
|
+
},
|
|
366
|
+
endY: {
|
|
367
|
+
type: 'number',
|
|
368
|
+
description: 'The y (pixels from the top edge) coordinate of the end position',
|
|
369
|
+
},
|
|
370
|
+
},
|
|
371
|
+
required: ['startX', 'startY', 'endX', 'endY'],
|
|
372
|
+
},
|
|
373
|
+
};
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
export class MouseHoldLeftButtonDownTool extends BaseAgentTool {
|
|
377
|
+
constructor(osAgentHandler) {
|
|
378
|
+
super();
|
|
379
|
+
this.osAgentHandler = osAgentHandler;
|
|
380
|
+
}
|
|
381
|
+
execute() {
|
|
382
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
383
|
+
yield this.osAgentHandler.mouseHoldLeftButtonDown();
|
|
384
|
+
return {
|
|
385
|
+
output: 'Holding down left mouse button',
|
|
386
|
+
};
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
toParams() {
|
|
390
|
+
return {
|
|
391
|
+
name: 'mouse_hold_left_button_down_tool',
|
|
392
|
+
description: 'Hold down the left mouse button at the current position.',
|
|
393
|
+
input_schema: { type: 'object', properties: {}, required: [] },
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
export class MouseReleaseLeftButtonTool extends BaseAgentTool {
|
|
398
|
+
constructor(osAgentHandler) {
|
|
399
|
+
super();
|
|
400
|
+
this.osAgentHandler = osAgentHandler;
|
|
401
|
+
}
|
|
402
|
+
execute() {
|
|
403
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
404
|
+
yield this.osAgentHandler.mouseReleaseLeftButton();
|
|
405
|
+
return {
|
|
406
|
+
output: 'Released left mouse button',
|
|
407
|
+
};
|
|
408
|
+
});
|
|
409
|
+
}
|
|
410
|
+
toParams() {
|
|
411
|
+
return {
|
|
412
|
+
name: 'mouse_release_left_button_tool',
|
|
413
|
+
description: 'Release the left mouse button at the current position.',
|
|
414
|
+
input_schema: { type: 'object', properties: {}, required: [] },
|
|
415
|
+
};
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
export class DesktopPressAndReleaseKeysTool extends BaseAgentTool {
|
|
419
|
+
constructor(osAgentHandler) {
|
|
420
|
+
super();
|
|
421
|
+
this.osAgentHandler = osAgentHandler;
|
|
422
|
+
}
|
|
423
|
+
execute(command) {
|
|
424
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
425
|
+
const modifiers = command.modifiers || [];
|
|
426
|
+
yield this.osAgentHandler.desktopKeyPressAndRelease(command.key, modifiers);
|
|
427
|
+
return {
|
|
428
|
+
output: `Pressed key ${command.key} with modifiers ${modifiers.join(' ')}`,
|
|
244
429
|
};
|
|
245
430
|
});
|
|
246
431
|
}
|
|
@@ -256,15 +441,53 @@ export class DesktopKeyPressSequenceTool extends BaseAgentTool {
|
|
|
256
441
|
enum: PC_KEY_VALUES,
|
|
257
442
|
description: 'The key to press',
|
|
258
443
|
},
|
|
259
|
-
|
|
260
|
-
type: '
|
|
261
|
-
|
|
262
|
-
|
|
444
|
+
modifiers: {
|
|
445
|
+
type: 'array',
|
|
446
|
+
items: {
|
|
447
|
+
type: 'string',
|
|
448
|
+
enum: MODIFIER_KEY_VALUES,
|
|
449
|
+
},
|
|
450
|
+
description: 'The modifiers to press',
|
|
263
451
|
},
|
|
264
|
-
|
|
452
|
+
},
|
|
453
|
+
required: ['key'],
|
|
454
|
+
},
|
|
455
|
+
};
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
export class DesktopKeyHoldDownTool extends BaseAgentTool {
|
|
459
|
+
constructor(osAgentHandler) {
|
|
460
|
+
super();
|
|
461
|
+
this.osAgentHandler = osAgentHandler;
|
|
462
|
+
}
|
|
463
|
+
execute(command) {
|
|
464
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
465
|
+
const modifiers = command.modifiers || [];
|
|
466
|
+
yield this.osAgentHandler.desktopKeyHoldDown(command.key, modifiers);
|
|
467
|
+
return {
|
|
468
|
+
output: `Holding down key ${command.key} with modifiers ${modifiers.join(' ')}`,
|
|
469
|
+
};
|
|
470
|
+
});
|
|
471
|
+
}
|
|
472
|
+
toParams() {
|
|
473
|
+
return {
|
|
474
|
+
name: 'desktop_key_hold_down_tool',
|
|
475
|
+
description: 'Hold down a key and optional modifiers. Keys will be still pressed after the tool is finished.',
|
|
476
|
+
input_schema: {
|
|
477
|
+
type: 'object',
|
|
478
|
+
properties: {
|
|
479
|
+
key: {
|
|
265
480
|
type: 'string',
|
|
266
|
-
enum: MODIFIER_KEY_VALUES,
|
|
267
|
-
description: 'The
|
|
481
|
+
enum: [...PC_KEY_VALUES, ...MODIFIER_KEY_VALUES],
|
|
482
|
+
description: 'The key to hold down',
|
|
483
|
+
},
|
|
484
|
+
modifiers: {
|
|
485
|
+
type: 'array',
|
|
486
|
+
items: {
|
|
487
|
+
type: 'string',
|
|
488
|
+
enum: MODIFIER_KEY_VALUES,
|
|
489
|
+
},
|
|
490
|
+
description: 'The modifiers to hold down',
|
|
268
491
|
},
|
|
269
492
|
},
|
|
270
493
|
required: ['key'],
|
|
@@ -272,31 +495,39 @@ export class DesktopKeyPressSequenceTool extends BaseAgentTool {
|
|
|
272
495
|
};
|
|
273
496
|
}
|
|
274
497
|
}
|
|
275
|
-
export class
|
|
498
|
+
export class DesktopKeyReleaseTool extends BaseAgentTool {
|
|
276
499
|
constructor(osAgentHandler) {
|
|
277
500
|
super();
|
|
278
501
|
this.osAgentHandler = osAgentHandler;
|
|
279
502
|
}
|
|
280
503
|
execute(command) {
|
|
281
504
|
return __awaiter(this, void 0, void 0, function* () {
|
|
282
|
-
const
|
|
283
|
-
yield this.osAgentHandler.
|
|
505
|
+
const modifiers = command.modifiers || [];
|
|
506
|
+
yield this.osAgentHandler.desktopKeyRelease(command.key, modifiers);
|
|
284
507
|
return {
|
|
285
|
-
output: `
|
|
508
|
+
output: `Released key ${command.key} with modifiers ${modifiers.join(' ')}`,
|
|
286
509
|
};
|
|
287
510
|
});
|
|
288
511
|
}
|
|
289
512
|
toParams() {
|
|
290
513
|
return {
|
|
291
|
-
name: '
|
|
292
|
-
description: '
|
|
514
|
+
name: 'desktop_key_release_tool',
|
|
515
|
+
description: 'Releases a key and optional modifiers. This can be used after keys were held down with the desktop_key_hold_down_tool',
|
|
293
516
|
input_schema: {
|
|
294
517
|
type: 'object',
|
|
295
518
|
properties: {
|
|
296
519
|
key: {
|
|
297
520
|
type: 'string',
|
|
298
521
|
enum: [...PC_KEY_VALUES, ...MODIFIER_KEY_VALUES],
|
|
299
|
-
description: 'The key to
|
|
522
|
+
description: 'The key to release',
|
|
523
|
+
},
|
|
524
|
+
modifiers: {
|
|
525
|
+
type: 'array',
|
|
526
|
+
items: {
|
|
527
|
+
type: 'string',
|
|
528
|
+
enum: MODIFIER_KEY_VALUES,
|
|
529
|
+
},
|
|
530
|
+
description: 'The modifiers to release',
|
|
300
531
|
},
|
|
301
532
|
},
|
|
302
533
|
required: ['key'],
|
|
@@ -459,3 +690,32 @@ export class ExecuteShellCommandTool extends BaseAgentTool {
|
|
|
459
690
|
};
|
|
460
691
|
}
|
|
461
692
|
}
|
|
693
|
+
export class WaitTool extends BaseAgentTool {
|
|
694
|
+
constructor() {
|
|
695
|
+
super();
|
|
696
|
+
}
|
|
697
|
+
execute(command) {
|
|
698
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
699
|
+
yield new Promise(resolve => setTimeout(resolve, command.milliseconds));
|
|
700
|
+
return {
|
|
701
|
+
output: `Waited for ${command.milliseconds} milliseconds`,
|
|
702
|
+
};
|
|
703
|
+
});
|
|
704
|
+
}
|
|
705
|
+
toParams() {
|
|
706
|
+
return {
|
|
707
|
+
name: 'wait_tool',
|
|
708
|
+
description: 'Waits for a specified number of milliseconds',
|
|
709
|
+
input_schema: {
|
|
710
|
+
type: 'object',
|
|
711
|
+
properties: {
|
|
712
|
+
milliseconds: {
|
|
713
|
+
type: 'number',
|
|
714
|
+
description: 'The number of milliseconds to wait',
|
|
715
|
+
},
|
|
716
|
+
},
|
|
717
|
+
required: ['milliseconds'],
|
|
718
|
+
},
|
|
719
|
+
};
|
|
720
|
+
}
|
|
721
|
+
}
|
|
@@ -18,4 +18,6 @@ export var InputEvent;
|
|
|
18
18
|
InputEvent["MOUSE_DOWN"] = "MOUSE_DOWN";
|
|
19
19
|
InputEvent["MOUSE_UP"] = "MOUSE_UP";
|
|
20
20
|
InputEvent["EXECUTE_COMMAND"] = "EXECUTE_COMMAND";
|
|
21
|
+
InputEvent["KEY_PRESS"] = "KEY_PRESS";
|
|
22
|
+
InputEvent["KEY_RELEASE"] = "KEY_RELEASE";
|
|
21
23
|
})(InputEvent || (InputEvent = {}));
|
|
@@ -26,6 +26,10 @@ export declare class InferenceClient {
|
|
|
26
26
|
predictVQAAnswer(prompt: string, image: string, config?: object): Promise<any>;
|
|
27
27
|
predictActResponse(params: {
|
|
28
28
|
max_tokens: number;
|
|
29
|
+
tool_choice?: {
|
|
30
|
+
type: 'tool' | 'any' | 'auto';
|
|
31
|
+
name?: string;
|
|
32
|
+
};
|
|
29
33
|
messages: BetaMessageParam[];
|
|
30
34
|
model: string;
|
|
31
35
|
system?: string;
|