askui 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/core/models/anthropic/askui-agent.d.ts +13 -0
- package/dist/cjs/core/models/anthropic/askui-agent.js +195 -0
- package/dist/cjs/core/models/anthropic/claude-agent.d.ts +40 -0
- package/dist/cjs/core/models/anthropic/claude-agent.js +200 -0
- package/dist/cjs/core/models/anthropic/index.d.ts +4 -0
- package/dist/cjs/core/models/anthropic/index.js +9 -0
- package/dist/cjs/core/models/anthropic/tools/agent-errors.d.ts +2 -0
- package/dist/cjs/core/models/anthropic/tools/agent-errors.js +6 -0
- package/dist/cjs/core/models/anthropic/tools/base.d.ts +24 -0
- package/dist/cjs/core/models/anthropic/tools/base.js +66 -0
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +113 -0
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +476 -0
- package/dist/cjs/core/ui-control-commands/index.d.ts +2 -0
- package/dist/cjs/core/ui-control-commands/index.js +5 -1
- package/dist/cjs/execution/dsl.d.ts +7 -4
- package/dist/cjs/execution/dsl.js +4 -1
- package/dist/cjs/execution/execution-runtime.d.ts +11 -1
- package/dist/cjs/execution/execution-runtime.js +5 -0
- package/dist/cjs/execution/inference-client.d.ts +9 -0
- package/dist/cjs/execution/inference-client.js +13 -5
- package/dist/cjs/execution/ui-control-client-dependency-builder.js +1 -1
- package/dist/cjs/execution/ui-control-client.d.ts +50 -0
- package/dist/cjs/execution/ui-control-client.js +61 -3
- package/dist/cjs/lib/interactive_cli/create-example-project.d.ts +1 -0
- package/dist/cjs/lib/interactive_cli/create-example-project.js +20 -3
- package/dist/cjs/main.d.ts +1 -0
- package/dist/cjs/main.js +5 -1
- package/dist/cjs/utils/base_64_image/base-64-image.d.ts +2 -1
- package/dist/cjs/utils/base_64_image/base-64-image.js +17 -2
- package/dist/esm/core/models/anthropic/askui-agent.d.ts +13 -0
- package/dist/esm/core/models/anthropic/askui-agent.js +191 -0
- package/dist/esm/core/models/anthropic/claude-agent.d.ts +40 -0
- package/dist/esm/core/models/anthropic/claude-agent.js +196 -0
- package/dist/esm/core/models/anthropic/index.d.ts +4 -0
- package/dist/esm/core/models/anthropic/index.js +2 -0
- package/dist/esm/core/models/anthropic/tools/agent-errors.d.ts +2 -0
- package/dist/esm/core/models/anthropic/tools/agent-errors.js +2 -0
- package/dist/esm/core/models/anthropic/tools/base.d.ts +24 -0
- package/dist/esm/core/models/anthropic/tools/base.js +59 -0
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +113 -0
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +461 -0
- package/dist/esm/core/ui-control-commands/index.d.ts +2 -0
- package/dist/esm/core/ui-control-commands/index.js +2 -0
- package/dist/esm/execution/dsl.d.ts +7 -4
- package/dist/esm/execution/dsl.js +3 -0
- package/dist/esm/execution/execution-runtime.d.ts +11 -1
- package/dist/esm/execution/execution-runtime.js +5 -0
- package/dist/esm/execution/inference-client.d.ts +9 -0
- package/dist/esm/execution/inference-client.js +13 -5
- package/dist/esm/execution/ui-control-client-dependency-builder.js +1 -1
- package/dist/esm/execution/ui-control-client.d.ts +50 -0
- package/dist/esm/execution/ui-control-client.js +61 -3
- package/dist/esm/lib/interactive_cli/create-example-project.d.ts +1 -0
- package/dist/esm/lib/interactive_cli/create-example-project.js +20 -3
- package/dist/esm/main.d.ts +1 -0
- package/dist/esm/main.js +1 -0
- package/dist/esm/utils/base_64_image/base-64-image.d.ts +2 -1
- package/dist/esm/utils/base_64_image/base-64-image.js +17 -2
- package/dist/example_projects_templates/configs/vscode-settings.json +41 -0
- package/package.json +2 -1
|
@@ -0,0 +1,461 @@
|
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { PC_KEY_VALUES, MODIFIER_KEY_VALUES, ANDROID_KEY_VALUES, } from '../../../../execution/dsl';
|
|
11
|
+
import { BaseAgentTool, ToolError } from './base';
|
|
12
|
+
import { ControlCommand, ControlCommandCode, InputEvent, Action, } from '../../../ui-control-commands';
|
|
13
|
+
import { Base64Image } from '../../../../utils/base_64_image/base-64-image';
|
|
14
|
+
import { AgentError } from './agent-errors';
|
|
15
|
+
export class OsAgentHandler {
|
|
16
|
+
constructor(AgentOsClient, screenDimensions) {
|
|
17
|
+
this.AgentOsClient = AgentOsClient;
|
|
18
|
+
this.TargetResolution = { width: 1280, height: 800 };
|
|
19
|
+
this.screenDimensions = screenDimensions;
|
|
20
|
+
}
|
|
21
|
+
static createInstance(AgentOsClient) {
|
|
22
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
23
|
+
const base64ImageString = yield AgentOsClient.getScreenshot();
|
|
24
|
+
const image_info = yield (yield Base64Image.fromString(base64ImageString)).getInfo();
|
|
25
|
+
return new OsAgentHandler(AgentOsClient, {
|
|
26
|
+
width: image_info.width,
|
|
27
|
+
height: image_info.height,
|
|
28
|
+
});
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
getTargetResolution() {
|
|
32
|
+
return this.TargetResolution;
|
|
33
|
+
}
|
|
34
|
+
setTargetResolution(width, height) {
|
|
35
|
+
this.TargetResolution = { width, height };
|
|
36
|
+
}
|
|
37
|
+
takeScreenshot() {
|
|
38
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
39
|
+
const base64ImageString = yield this.AgentOsClient.getScreenshot();
|
|
40
|
+
const base64Image = yield Base64Image.fromString(base64ImageString);
|
|
41
|
+
const image_info = yield base64Image.getInfo();
|
|
42
|
+
this.screenDimensions = {
|
|
43
|
+
width: image_info.width,
|
|
44
|
+
height: image_info.height,
|
|
45
|
+
};
|
|
46
|
+
const resized_image = yield base64Image.resizeWithSameAspectRatio(this.TargetResolution.width, this.TargetResolution.height);
|
|
47
|
+
return resized_image.toString(false);
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
scaleCoordinates(source, x, y) {
|
|
51
|
+
const xScalingFactor = this.TargetResolution.width / this.screenDimensions.width;
|
|
52
|
+
const yScalingFactor = this.TargetResolution.height / this.screenDimensions.height;
|
|
53
|
+
if (source === 'api') {
|
|
54
|
+
if (x > this.TargetResolution.width || y > this.TargetResolution.height || x < 0 || y < 0) {
|
|
55
|
+
throw new ToolError(`Coordinates ${x}, ${y} are outside screen bounds `
|
|
56
|
+
+ `(${this.TargetResolution.width}x${this.TargetResolution.height})`);
|
|
57
|
+
}
|
|
58
|
+
return [
|
|
59
|
+
Math.round(x / xScalingFactor),
|
|
60
|
+
Math.round(y / yScalingFactor),
|
|
61
|
+
];
|
|
62
|
+
}
|
|
63
|
+
return [
|
|
64
|
+
Math.round(x * xScalingFactor),
|
|
65
|
+
Math.round(y * yScalingFactor),
|
|
66
|
+
];
|
|
67
|
+
}
|
|
68
|
+
requestControl(controlCommand) {
|
|
69
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
70
|
+
for (const action of controlCommand.actions) {
|
|
71
|
+
[action.position.x, action.position.y] = this.scaleCoordinates('api', action.position.x, action.position.y);
|
|
72
|
+
}
|
|
73
|
+
yield this.AgentOsClient.requestControl(controlCommand);
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
export class ScreenShotTool extends BaseAgentTool {
|
|
78
|
+
constructor(osAgentHandler) {
|
|
79
|
+
super();
|
|
80
|
+
this.osAgentHandler = osAgentHandler;
|
|
81
|
+
}
|
|
82
|
+
execute() {
|
|
83
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
84
|
+
const screenshot = yield this.osAgentHandler.takeScreenshot();
|
|
85
|
+
return {
|
|
86
|
+
base64Images: [screenshot],
|
|
87
|
+
output: `Screenshot was taken, with resolution width ${this.osAgentHandler.getTargetResolution().width} and height ${this.osAgentHandler.getTargetResolution().height}`,
|
|
88
|
+
};
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
toParams() {
|
|
92
|
+
return {
|
|
93
|
+
name: 'screenshot_tool',
|
|
94
|
+
description: 'Takes a screenshot of the current screen and returns it as a base64 image',
|
|
95
|
+
input_schema: { type: 'object', properties: {}, required: [] },
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
export class MouseMoveTool extends BaseAgentTool {
|
|
100
|
+
constructor(osAgentHandler) {
|
|
101
|
+
super();
|
|
102
|
+
this.osAgentHandler = osAgentHandler;
|
|
103
|
+
}
|
|
104
|
+
execute(command) {
|
|
105
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
106
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_MOVE, { x: command.x, y: command.y }, '', {})]);
|
|
107
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
108
|
+
return {
|
|
109
|
+
output: `Moved mouse to (${command.x}, ${command.y})`,
|
|
110
|
+
};
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
toParams() {
|
|
114
|
+
return {
|
|
115
|
+
name: 'mouse_move_tool',
|
|
116
|
+
description: 'Moves the mouse to the specified absolute coordinates. The top left corner of the screen is (0,0)',
|
|
117
|
+
input_schema: {
|
|
118
|
+
type: 'object',
|
|
119
|
+
properties: {
|
|
120
|
+
x: {
|
|
121
|
+
type: 'number',
|
|
122
|
+
description: 'The x coordinate of the element to click on',
|
|
123
|
+
},
|
|
124
|
+
y: {
|
|
125
|
+
type: 'number',
|
|
126
|
+
description: 'The y coordinate of the element to click on',
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
},
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
export class MouseClickTool extends BaseAgentTool {
|
|
134
|
+
constructor(osAgentHandler) {
|
|
135
|
+
super();
|
|
136
|
+
this.osAgentHandler = osAgentHandler;
|
|
137
|
+
}
|
|
138
|
+
execute(command) {
|
|
139
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
140
|
+
let controlCommand;
|
|
141
|
+
if (command.doubleClick) {
|
|
142
|
+
if (command.button === 'left') {
|
|
143
|
+
controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_DOUBLE_LEFT, { x: 0, y: 0 }, '', {})]);
|
|
144
|
+
}
|
|
145
|
+
if (command.button === 'right') {
|
|
146
|
+
controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_DOUBLE_RIGHT, { x: 0, y: 0 }, '', {})]);
|
|
147
|
+
}
|
|
148
|
+
if (command.button === 'middle') {
|
|
149
|
+
controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE, { x: 0, y: 0 }, '', {})]);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
else {
|
|
153
|
+
if (command.button === 'left') {
|
|
154
|
+
controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_LEFT, { x: 0, y: 0 }, '', {})]);
|
|
155
|
+
}
|
|
156
|
+
if (command.button === 'right') {
|
|
157
|
+
controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_RIGHT, { x: 0, y: 0 }, '', {})]);
|
|
158
|
+
}
|
|
159
|
+
if (command.button === 'middle') {
|
|
160
|
+
controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_CLICK_MIDDLE, { x: 0, y: 0 }, '', {})]);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
if (!controlCommand) {
|
|
164
|
+
throw new ToolError('Invalid input parameter for mouse click tool');
|
|
165
|
+
}
|
|
166
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
167
|
+
const returnedMessage = command.doubleClick ? `Double clicked ${command.button} button` : `Clicked ${command.button} button`;
|
|
168
|
+
return {
|
|
169
|
+
output: returnedMessage,
|
|
170
|
+
};
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
toParams() {
|
|
174
|
+
return {
|
|
175
|
+
name: 'mouse_click_tool',
|
|
176
|
+
description: 'Clicks the specified button on the mouse',
|
|
177
|
+
input_schema: {
|
|
178
|
+
type: 'object',
|
|
179
|
+
properties: {
|
|
180
|
+
button: {
|
|
181
|
+
type: 'string',
|
|
182
|
+
enum: ['left', 'right', 'middle'],
|
|
183
|
+
description: 'The button to click',
|
|
184
|
+
},
|
|
185
|
+
doubleClick: {
|
|
186
|
+
type: 'boolean',
|
|
187
|
+
description: 'Whether to double click the button',
|
|
188
|
+
},
|
|
189
|
+
},
|
|
190
|
+
required: ['button', 'doubleClick'],
|
|
191
|
+
},
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
export class MouseScrollTool extends BaseAgentTool {
|
|
196
|
+
constructor(osAgentHandler) {
|
|
197
|
+
super();
|
|
198
|
+
this.osAgentHandler = osAgentHandler;
|
|
199
|
+
}
|
|
200
|
+
execute(command) {
|
|
201
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
202
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.MOUSE_SCROLL, { x: command.dx, y: command.dy }, '', {})]);
|
|
203
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
204
|
+
return {
|
|
205
|
+
output: `Scrolled by (${command.dx}, ${command.dy})`,
|
|
206
|
+
};
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
toParams() {
|
|
210
|
+
return {
|
|
211
|
+
name: 'mouse_scroll_tool',
|
|
212
|
+
description: 'Scrolls the mouse by the specified amount',
|
|
213
|
+
input_schema: {
|
|
214
|
+
type: 'object',
|
|
215
|
+
properties: {
|
|
216
|
+
dx: {
|
|
217
|
+
type: 'number',
|
|
218
|
+
description: 'The amount to scroll horizontally',
|
|
219
|
+
},
|
|
220
|
+
dy: {
|
|
221
|
+
type: 'number',
|
|
222
|
+
description: 'The amount to scroll vertically',
|
|
223
|
+
},
|
|
224
|
+
},
|
|
225
|
+
required: ['dx', 'dy'],
|
|
226
|
+
},
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
export class DesktopKeyPressSequenceTool extends BaseAgentTool {
|
|
231
|
+
constructor(osAgentHandler) {
|
|
232
|
+
super();
|
|
233
|
+
this.osAgentHandler = osAgentHandler;
|
|
234
|
+
}
|
|
235
|
+
execute(command) {
|
|
236
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
237
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.key, {
|
|
238
|
+
firstModifier: command.firstModifier || '',
|
|
239
|
+
secondModifier: command.secondModifier || '',
|
|
240
|
+
})]);
|
|
241
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
242
|
+
return {
|
|
243
|
+
output: `Pressed key ${command.key} with modifiers ${command.firstModifier || ''} ${command.secondModifier || ''}`,
|
|
244
|
+
};
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
toParams() {
|
|
248
|
+
return {
|
|
249
|
+
name: 'desktop_key_press_sequence_tool',
|
|
250
|
+
description: 'Presses a key with optional modifiers',
|
|
251
|
+
input_schema: {
|
|
252
|
+
type: 'object',
|
|
253
|
+
properties: {
|
|
254
|
+
key: {
|
|
255
|
+
type: 'string',
|
|
256
|
+
enum: PC_KEY_VALUES,
|
|
257
|
+
description: 'The key to press',
|
|
258
|
+
},
|
|
259
|
+
firstModifier: {
|
|
260
|
+
type: 'string',
|
|
261
|
+
enum: MODIFIER_KEY_VALUES,
|
|
262
|
+
description: 'The first modifier key',
|
|
263
|
+
},
|
|
264
|
+
secondModifier: {
|
|
265
|
+
type: 'string',
|
|
266
|
+
enum: MODIFIER_KEY_VALUES,
|
|
267
|
+
description: 'The second modifier key',
|
|
268
|
+
},
|
|
269
|
+
},
|
|
270
|
+
required: ['key'],
|
|
271
|
+
},
|
|
272
|
+
};
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
export class DesktopSingleKeyPressTool extends BaseAgentTool {
|
|
276
|
+
constructor(osAgentHandler) {
|
|
277
|
+
super();
|
|
278
|
+
this.osAgentHandler = osAgentHandler;
|
|
279
|
+
}
|
|
280
|
+
execute(command) {
|
|
281
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
282
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.key, {})]);
|
|
283
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
284
|
+
return {
|
|
285
|
+
output: `Pressed key ${command.key}`,
|
|
286
|
+
};
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
toParams() {
|
|
290
|
+
return {
|
|
291
|
+
name: 'desktop_single_key_press_tool',
|
|
292
|
+
description: 'Presses a single key',
|
|
293
|
+
input_schema: {
|
|
294
|
+
type: 'object',
|
|
295
|
+
properties: {
|
|
296
|
+
key: {
|
|
297
|
+
type: 'string',
|
|
298
|
+
enum: [...PC_KEY_VALUES, ...MODIFIER_KEY_VALUES],
|
|
299
|
+
description: 'The key to press',
|
|
300
|
+
},
|
|
301
|
+
},
|
|
302
|
+
required: ['key'],
|
|
303
|
+
},
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
export class TypeTool extends BaseAgentTool {
|
|
308
|
+
constructor(osAgentHandler) {
|
|
309
|
+
super();
|
|
310
|
+
this.osAgentHandler = osAgentHandler;
|
|
311
|
+
}
|
|
312
|
+
execute(command) {
|
|
313
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
314
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.TYPE, { x: 0, y: 0 }, command.text, {})]);
|
|
315
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
316
|
+
return {
|
|
317
|
+
output: `Typed text: ${command.text}`,
|
|
318
|
+
};
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
toParams() {
|
|
322
|
+
return {
|
|
323
|
+
name: 'type_tool',
|
|
324
|
+
description: 'Types the specified text',
|
|
325
|
+
input_schema: {
|
|
326
|
+
type: 'object',
|
|
327
|
+
properties: {
|
|
328
|
+
text: {
|
|
329
|
+
type: 'string',
|
|
330
|
+
description: 'The text to type',
|
|
331
|
+
},
|
|
332
|
+
},
|
|
333
|
+
required: ['text'],
|
|
334
|
+
},
|
|
335
|
+
};
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
export class AndroidSingleKeyPressTool extends BaseAgentTool {
|
|
339
|
+
constructor(osAgentHandler) {
|
|
340
|
+
super();
|
|
341
|
+
this.osAgentHandler = osAgentHandler;
|
|
342
|
+
}
|
|
343
|
+
execute(command) {
|
|
344
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
345
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.key, {})]);
|
|
346
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
347
|
+
return {
|
|
348
|
+
output: `Pressed Android key ${command.key}`,
|
|
349
|
+
};
|
|
350
|
+
});
|
|
351
|
+
}
|
|
352
|
+
toParams() {
|
|
353
|
+
return {
|
|
354
|
+
name: 'android_single_key_press_tool',
|
|
355
|
+
description: 'Presses a single Android key',
|
|
356
|
+
input_schema: {
|
|
357
|
+
type: 'object',
|
|
358
|
+
properties: {
|
|
359
|
+
key: {
|
|
360
|
+
type: 'string',
|
|
361
|
+
enum: ANDROID_KEY_VALUES,
|
|
362
|
+
description: 'The Android key to press',
|
|
363
|
+
},
|
|
364
|
+
},
|
|
365
|
+
required: ['key'],
|
|
366
|
+
},
|
|
367
|
+
};
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
export class AndroidSequenceKeyPressTool extends BaseAgentTool {
|
|
371
|
+
constructor(osAgentHandler) {
|
|
372
|
+
super();
|
|
373
|
+
this.osAgentHandler = osAgentHandler;
|
|
374
|
+
}
|
|
375
|
+
execute(command) {
|
|
376
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
377
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.keys.join(' '), {})]);
|
|
378
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
379
|
+
return {
|
|
380
|
+
output: `Pressed Android keys: ${command.keys.join(', ')}`,
|
|
381
|
+
};
|
|
382
|
+
});
|
|
383
|
+
}
|
|
384
|
+
toParams() {
|
|
385
|
+
return {
|
|
386
|
+
name: 'android_sequence_key_press_tool',
|
|
387
|
+
description: 'Presses a sequence of Android keys',
|
|
388
|
+
input_schema: {
|
|
389
|
+
type: 'object',
|
|
390
|
+
properties: {
|
|
391
|
+
keys: {
|
|
392
|
+
type: 'array',
|
|
393
|
+
items: {
|
|
394
|
+
type: 'string',
|
|
395
|
+
enum: ANDROID_KEY_VALUES,
|
|
396
|
+
},
|
|
397
|
+
description: 'The sequence of Android keys to press',
|
|
398
|
+
},
|
|
399
|
+
},
|
|
400
|
+
required: ['keys'],
|
|
401
|
+
},
|
|
402
|
+
};
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
export class AgentErrorTool extends BaseAgentTool {
|
|
406
|
+
constructor() {
|
|
407
|
+
super();
|
|
408
|
+
}
|
|
409
|
+
execute(command) {
|
|
410
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
411
|
+
throw new AgentError(command.error);
|
|
412
|
+
});
|
|
413
|
+
}
|
|
414
|
+
toParams() {
|
|
415
|
+
return {
|
|
416
|
+
name: 'agent_error_tool',
|
|
417
|
+
description: 'Raises an error in the agent',
|
|
418
|
+
input_schema: {
|
|
419
|
+
type: 'object',
|
|
420
|
+
properties: {
|
|
421
|
+
error: {
|
|
422
|
+
type: 'string',
|
|
423
|
+
description: 'The error message to raise',
|
|
424
|
+
},
|
|
425
|
+
},
|
|
426
|
+
required: ['error'],
|
|
427
|
+
},
|
|
428
|
+
};
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
export class ExecuteShellCommandTool extends BaseAgentTool {
|
|
432
|
+
constructor(osAgentHandler) {
|
|
433
|
+
super();
|
|
434
|
+
this.osAgentHandler = osAgentHandler;
|
|
435
|
+
}
|
|
436
|
+
execute(command) {
|
|
437
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
438
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.EXECUTE_COMMAND, { x: 0, y: 0 }, command.command, {})]);
|
|
439
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
440
|
+
return {
|
|
441
|
+
output: `Executed shell command: ${command.command}`,
|
|
442
|
+
};
|
|
443
|
+
});
|
|
444
|
+
}
|
|
445
|
+
toParams() {
|
|
446
|
+
return {
|
|
447
|
+
name: 'execute_shell_command_tool',
|
|
448
|
+
description: 'Executes a shell command',
|
|
449
|
+
input_schema: {
|
|
450
|
+
type: 'object',
|
|
451
|
+
properties: {
|
|
452
|
+
command: {
|
|
453
|
+
type: 'string',
|
|
454
|
+
description: 'The shell command to execute',
|
|
455
|
+
},
|
|
456
|
+
},
|
|
457
|
+
required: ['command'],
|
|
458
|
+
},
|
|
459
|
+
};
|
|
460
|
+
}
|
|
461
|
+
}
|
|
@@ -1,3 +1,5 @@
|
|
|
1
1
|
export { ControlCommand } from './control-command';
|
|
2
2
|
export { ControlCommandCode } from './control-command-code';
|
|
3
3
|
export { InferenceResponse } from '../inference-response/inference-response';
|
|
4
|
+
export { Action } from './action';
|
|
5
|
+
export { InputEvent } from './input-event';
|
|
@@ -1,3 +1,5 @@
|
|
|
1
1
|
export { ControlCommand } from './control-command';
|
|
2
2
|
export { ControlCommandCode } from './control-command-code';
|
|
3
3
|
export { InferenceResponse } from '../inference-response/inference-response';
|
|
4
|
+
export { Action } from './action';
|
|
5
|
+
export { InputEvent } from './input-event';
|
|
@@ -5,11 +5,14 @@ export declare enum Separators {
|
|
|
5
5
|
STRING = "<|string|>"
|
|
6
6
|
}
|
|
7
7
|
export type INTERSECTION_AREA = 'element_center_line' | 'element_edge_area' | 'display_edge_area';
|
|
8
|
-
export
|
|
9
|
-
export type
|
|
10
|
-
export
|
|
8
|
+
export declare const PC_KEY_VALUES: readonly ["backspace", "delete", "enter", "tab", "escape", "up", "down", "right", "left", "home", "end", "pageup", "pagedown", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "space", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "<", "=", ">", "?", "@", "[", "\\", "]", "^", "_", "`", "{", "|", "}", "~ "];
|
|
9
|
+
export type PC_KEY = typeof PC_KEY_VALUES[number];
|
|
10
|
+
export declare const ANDROID_KEY_VALUES: readonly ["backspace", "delete", "enter", "tab", "escape", "up", "down", "right", "left", "home", "end", "pageup", "pagedown", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "space", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "<", "=", ">", "?", "@", "[", "\\", "]", "^", "_", "`", "{", "|", "}", "~ "];
|
|
11
|
+
export type ANDROID_KEY = typeof ANDROID_KEY_VALUES[number];
|
|
12
|
+
export declare const MODIFIER_KEY_VALUES: readonly ["command", "alt", "control", "shift", "right_shift"];
|
|
13
|
+
export type MODIFIER_KEY = typeof MODIFIER_KEY_VALUES[number];
|
|
14
|
+
export type PC_AND_MODIFIER_KEY = PC_KEY | MODIFIER_KEY;
|
|
11
15
|
export type COLOR = 'black' | 'white' | 'red' | 'green' | 'yellow green' | 'orange' | 'yellow' | 'purple' | 'pink' | 'gray' | 'lime green' | 'royal blue';
|
|
12
|
-
export type PC_AND_MODIFIER_KEY = 'command' | 'alt' | 'control' | 'shift' | 'right_shift' | 'backspace' | 'delete' | 'enter' | 'tab' | 'escape' | 'up' | 'down' | 'right' | 'left' | 'home' | 'end' | 'pageup' | 'pagedown' | 'f1' | 'f2' | 'f3' | 'f4' | 'f5' | 'f6' | 'f7' | 'f8' | 'f9' | 'f10' | 'f11' | 'f12' | 'space' | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '!' | '"' | '#' | '$' | '%' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' | '\\' | ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~ ';
|
|
13
16
|
export interface CommandExecutorContext {
|
|
14
17
|
customElementsJson: CustomElementJson[];
|
|
15
18
|
aiElementNames: string[];
|
|
@@ -34,6 +34,9 @@ export var Separators;
|
|
|
34
34
|
(function (Separators) {
|
|
35
35
|
Separators["STRING"] = "<|string|>";
|
|
36
36
|
})(Separators || (Separators = {}));
|
|
37
|
+
export const PC_KEY_VALUES = ['backspace', 'delete', 'enter', 'tab', 'escape', 'up', 'down', 'right', 'left', 'home', 'end', 'pageup', 'pagedown', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'space', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~ '];
|
|
38
|
+
export const ANDROID_KEY_VALUES = ['backspace', 'delete', 'enter', 'tab', 'escape', 'up', 'down', 'right', 'left', 'home', 'end', 'pageup', 'pagedown', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'space', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~ '];
|
|
39
|
+
export const MODIFIER_KEY_VALUES = ['command', 'alt', 'control', 'shift', 'right_shift'];
|
|
37
40
|
class FluentBase {
|
|
38
41
|
constructor(prev) {
|
|
39
42
|
this.prev = prev;
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { BetaMessage, BetaMessageParam } from '@anthropic-ai/sdk/resources/beta/messages';
|
|
2
|
+
import { ControlCommand } from '../core/ui-control-commands';
|
|
1
3
|
import { UiControllerClient } from './ui-controller-client';
|
|
2
4
|
import { InferenceClient } from './inference-client';
|
|
3
5
|
import { Annotation } from '../core/annotation/annotation';
|
|
@@ -18,7 +20,7 @@ export declare class ExecutionRuntime {
|
|
|
18
20
|
startVideoRecording(): Promise<void>;
|
|
19
21
|
stopVideoRecording(): Promise<void>;
|
|
20
22
|
readVideoRecording(): Promise<string>;
|
|
21
|
-
|
|
23
|
+
requestControl(controlCommand: ControlCommand): Promise<void>;
|
|
22
24
|
executeInstruction(instruction: Instruction, modelComposition: ModelCompositionBranch[]): Promise<void>;
|
|
23
25
|
private readonly EXEC_REPETITION_COUNT;
|
|
24
26
|
private executeCommandRepeatedly;
|
|
@@ -38,4 +40,12 @@ export declare class ExecutionRuntime {
|
|
|
38
40
|
getDetectedElements(instruction: string, customElementJson?: CustomElementJson[]): Promise<DetectedElement[]>;
|
|
39
41
|
annotateImage(imagePath?: string, customElementJson?: CustomElementJson[], elements?: DetectedElement[]): Promise<Annotation>;
|
|
40
42
|
predictVQA(prompt: string, config?: object): Promise<any>;
|
|
43
|
+
predictActResponse(params: {
|
|
44
|
+
max_tokens: number;
|
|
45
|
+
messages: BetaMessageParam[];
|
|
46
|
+
model: string;
|
|
47
|
+
system?: string;
|
|
48
|
+
tools?: object[];
|
|
49
|
+
betas?: string[];
|
|
50
|
+
}): Promise<BetaMessage>;
|
|
41
51
|
}
|
|
@@ -212,4 +212,9 @@ export class ExecutionRuntime {
|
|
|
212
212
|
return this.inferenceClient.predictVQAAnswer(prompt, base64Image, config);
|
|
213
213
|
});
|
|
214
214
|
}
|
|
215
|
+
predictActResponse(params) {
|
|
216
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
217
|
+
return this.inferenceClient.predictActResponse(params);
|
|
218
|
+
});
|
|
219
|
+
}
|
|
215
220
|
}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { BetaMessage, BetaMessageParam } from '@anthropic-ai/sdk/resources/beta/messages';
|
|
1
2
|
import { HttpClientGot } from '../utils/http/http-client-got';
|
|
2
3
|
import { ControlCommand } from '../core/ui-control-commands';
|
|
3
4
|
import { CustomElement } from '../core/model/custom-element';
|
|
@@ -23,4 +24,12 @@ export declare class InferenceClient {
|
|
|
23
24
|
getDetectedElements(instruction: string, image: string, customElements?: CustomElement[]): Promise<DetectedElement[]>;
|
|
24
25
|
predictImageAnnotation(image: string, customElements?: CustomElement[]): Promise<Annotation>;
|
|
25
26
|
predictVQAAnswer(prompt: string, image: string, config?: object): Promise<any>;
|
|
27
|
+
predictActResponse(params: {
|
|
28
|
+
max_tokens: number;
|
|
29
|
+
messages: BetaMessageParam[];
|
|
30
|
+
model: string;
|
|
31
|
+
system?: string;
|
|
32
|
+
tools?: object[];
|
|
33
|
+
betas?: string[];
|
|
34
|
+
}): Promise<BetaMessage>;
|
|
26
35
|
}
|
|
@@ -27,6 +27,7 @@ export class InferenceClient {
|
|
|
27
27
|
? urljoin(versionedBaseUrl, 'workspaces', workspaceId)
|
|
28
28
|
: versionedBaseUrl;
|
|
29
29
|
this.urls = {
|
|
30
|
+
actEndpoint: urljoin(url, 'act', 'inference'),
|
|
30
31
|
inference: urljoin(url, 'inference'),
|
|
31
32
|
isImageRequired: urljoin(url, 'instruction', 'is-image-required'),
|
|
32
33
|
vqaInference: urljoin(url, 'vqa', 'inference'),
|
|
@@ -68,7 +69,7 @@ export class InferenceClient {
|
|
|
68
69
|
instruction,
|
|
69
70
|
modelComposition: modelComposition.length > 0 ? modelComposition : this.modelComposition,
|
|
70
71
|
});
|
|
71
|
-
InferenceClient.logMetaInformation(response);
|
|
72
|
+
InferenceClient.logMetaInformation(response.headers);
|
|
72
73
|
return InferenceResponse.fromJson(response.body, resizedImage.resizeRatio, image);
|
|
73
74
|
});
|
|
74
75
|
}
|
|
@@ -79,13 +80,13 @@ export class InferenceClient {
|
|
|
79
80
|
image,
|
|
80
81
|
prompt,
|
|
81
82
|
});
|
|
82
|
-
InferenceClient.logMetaInformation(response);
|
|
83
|
+
InferenceClient.logMetaInformation(response.headers);
|
|
83
84
|
return response.body;
|
|
84
85
|
});
|
|
85
86
|
}
|
|
86
|
-
static logMetaInformation(
|
|
87
|
-
if (
|
|
88
|
-
logger.warn(
|
|
87
|
+
static logMetaInformation(headers) {
|
|
88
|
+
if (headers['askui-usage-warnings'] !== undefined) {
|
|
89
|
+
logger.warn(headers['askui-usage-warnings']);
|
|
89
90
|
}
|
|
90
91
|
}
|
|
91
92
|
predictControlCommand(instruction_1, modelComposition_1) {
|
|
@@ -128,4 +129,11 @@ export class InferenceClient {
|
|
|
128
129
|
return response;
|
|
129
130
|
});
|
|
130
131
|
}
|
|
132
|
+
predictActResponse(params) {
|
|
133
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
134
|
+
const response = yield this.httpClient.post(this.urls.actEndpoint, params);
|
|
135
|
+
InferenceClient.logMetaInformation(response.headers);
|
|
136
|
+
return response.body;
|
|
137
|
+
});
|
|
138
|
+
}
|
|
131
139
|
}
|
|
@@ -59,7 +59,7 @@ export class UiControlClientDependencyBuilder {
|
|
|
59
59
|
onLocationNotExist: (_d = (_c = clientArgs.aiElementArgs) === null || _c === void 0 ? void 0 : _c.onLocationNotExist) !== null && _d !== void 0 ? _d : 'error',
|
|
60
60
|
}, context: {
|
|
61
61
|
isCi: (_f = (_e = clientArgs.context) === null || _e === void 0 ? void 0 : _e.isCi) !== null && _f !== void 0 ? _f : isCI,
|
|
62
|
-
}, credentials: readCredentials(clientArgs), inferenceServerApiVersion: (_g = clientArgs.inferenceServerApiVersion) !== null && _g !== void 0 ? _g : '
|
|
62
|
+
}, credentials: readCredentials(clientArgs), inferenceServerApiVersion: (_g = clientArgs.inferenceServerApiVersion) !== null && _g !== void 0 ? _g : 'v1', inferenceServerUrl: (_h = clientArgs.inferenceServerUrl) !== null && _h !== void 0 ? _h : 'https://inference.askui.com', proxyAgents: (_j = clientArgs.proxyAgents) !== null && _j !== void 0 ? _j : (yield envProxyAgents()), uiControllerUrl: (_k = clientArgs.uiControllerUrl) !== null && _k !== void 0 ? _k : 'http://127.0.0.1:6769' });
|
|
63
63
|
});
|
|
64
64
|
}
|
|
65
65
|
}
|