askui 0.24.1 → 0.25.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/core/models/anthropic/askui-agent.d.ts +13 -0
- package/dist/cjs/core/models/anthropic/askui-agent.js +195 -0
- package/dist/cjs/core/models/anthropic/claude-agent.d.ts +40 -0
- package/dist/cjs/core/models/anthropic/claude-agent.js +200 -0
- package/dist/cjs/core/models/anthropic/index.d.ts +4 -0
- package/dist/cjs/core/models/anthropic/index.js +9 -0
- package/dist/cjs/core/models/anthropic/tools/agent-errors.d.ts +2 -0
- package/dist/cjs/core/models/anthropic/tools/agent-errors.js +6 -0
- package/dist/cjs/core/models/anthropic/tools/base.d.ts +24 -0
- package/dist/cjs/core/models/anthropic/tools/base.js +66 -0
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +113 -0
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +476 -0
- package/dist/cjs/core/ui-control-commands/index.d.ts +2 -0
- package/dist/cjs/core/ui-control-commands/index.js +5 -1
- package/dist/cjs/execution/dsl.d.ts +7 -4
- package/dist/cjs/execution/dsl.js +4 -1
- package/dist/cjs/execution/execution-runtime.d.ts +11 -1
- package/dist/cjs/execution/execution-runtime.js +5 -0
- package/dist/cjs/execution/inference-client.d.ts +9 -0
- package/dist/cjs/execution/inference-client.js +13 -5
- package/dist/cjs/execution/ui-control-client-dependency-builder.js +1 -1
- package/dist/cjs/execution/ui-control-client.d.ts +50 -0
- package/dist/cjs/execution/ui-control-client.js +61 -3
- package/dist/cjs/lib/interactive_cli/create-example-project.d.ts +1 -0
- package/dist/cjs/lib/interactive_cli/create-example-project.js +17 -3
- package/dist/cjs/main.d.ts +1 -0
- package/dist/cjs/main.js +5 -1
- package/dist/cjs/utils/base_64_image/base-64-image.d.ts +2 -1
- package/dist/cjs/utils/base_64_image/base-64-image.js +17 -2
- package/dist/esm/core/models/anthropic/askui-agent.d.ts +13 -0
- package/dist/esm/core/models/anthropic/askui-agent.js +191 -0
- package/dist/esm/core/models/anthropic/claude-agent.d.ts +40 -0
- package/dist/esm/core/models/anthropic/claude-agent.js +196 -0
- package/dist/esm/core/models/anthropic/index.d.ts +4 -0
- package/dist/esm/core/models/anthropic/index.js +2 -0
- package/dist/esm/core/models/anthropic/tools/agent-errors.d.ts +2 -0
- package/dist/esm/core/models/anthropic/tools/agent-errors.js +2 -0
- package/dist/esm/core/models/anthropic/tools/base.d.ts +24 -0
- package/dist/esm/core/models/anthropic/tools/base.js +59 -0
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +113 -0
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +461 -0
- package/dist/esm/core/ui-control-commands/index.d.ts +2 -0
- package/dist/esm/core/ui-control-commands/index.js +2 -0
- package/dist/esm/execution/dsl.d.ts +7 -4
- package/dist/esm/execution/dsl.js +3 -0
- package/dist/esm/execution/execution-runtime.d.ts +11 -1
- package/dist/esm/execution/execution-runtime.js +5 -0
- package/dist/esm/execution/inference-client.d.ts +9 -0
- package/dist/esm/execution/inference-client.js +13 -5
- package/dist/esm/execution/ui-control-client-dependency-builder.js +1 -1
- package/dist/esm/execution/ui-control-client.d.ts +50 -0
- package/dist/esm/execution/ui-control-client.js +61 -3
- package/dist/esm/lib/interactive_cli/create-example-project.d.ts +1 -0
- package/dist/esm/lib/interactive_cli/create-example-project.js +17 -3
- package/dist/esm/main.d.ts +1 -0
- package/dist/esm/main.js +1 -0
- package/dist/esm/utils/base_64_image/base-64-image.d.ts +2 -1
- package/dist/esm/utils/base_64_image/base-64-image.js +17 -2
- package/dist/example_projects_templates/configs/vscode-settings.json +41 -0
- package/dist/example_projects_templates/typescript/askui_example/my-first-askui-test-suite.test.ts +7 -17
- package/package.json +2 -1
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.ExecuteShellCommandTool = exports.AgentErrorTool = exports.AndroidSequenceKeyPressTool = exports.AndroidSingleKeyPressTool = exports.TypeTool = exports.DesktopSingleKeyPressTool = exports.DesktopKeyPressSequenceTool = exports.MouseScrollTool = exports.MouseClickTool = exports.MouseMoveTool = exports.ScreenShotTool = exports.OsAgentHandler = void 0;
|
|
13
|
+
const dsl_1 = require("../../../../execution/dsl");
|
|
14
|
+
const base_1 = require("./base");
|
|
15
|
+
const ui_control_commands_1 = require("../../../ui-control-commands");
|
|
16
|
+
const base_64_image_1 = require("../../../../utils/base_64_image/base-64-image");
|
|
17
|
+
const agent_errors_1 = require("./agent-errors");
|
|
18
|
+
class OsAgentHandler {
|
|
19
|
+
constructor(AgentOsClient, screenDimensions) {
|
|
20
|
+
this.AgentOsClient = AgentOsClient;
|
|
21
|
+
this.TargetResolution = { width: 1280, height: 800 };
|
|
22
|
+
this.screenDimensions = screenDimensions;
|
|
23
|
+
}
|
|
24
|
+
static createInstance(AgentOsClient) {
|
|
25
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
26
|
+
const base64ImageString = yield AgentOsClient.getScreenshot();
|
|
27
|
+
const image_info = yield (yield base_64_image_1.Base64Image.fromString(base64ImageString)).getInfo();
|
|
28
|
+
return new OsAgentHandler(AgentOsClient, {
|
|
29
|
+
width: image_info.width,
|
|
30
|
+
height: image_info.height,
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
getTargetResolution() {
|
|
35
|
+
return this.TargetResolution;
|
|
36
|
+
}
|
|
37
|
+
setTargetResolution(width, height) {
|
|
38
|
+
this.TargetResolution = { width, height };
|
|
39
|
+
}
|
|
40
|
+
takeScreenshot() {
|
|
41
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
42
|
+
const base64ImageString = yield this.AgentOsClient.getScreenshot();
|
|
43
|
+
const base64Image = yield base_64_image_1.Base64Image.fromString(base64ImageString);
|
|
44
|
+
const image_info = yield base64Image.getInfo();
|
|
45
|
+
this.screenDimensions = {
|
|
46
|
+
width: image_info.width,
|
|
47
|
+
height: image_info.height,
|
|
48
|
+
};
|
|
49
|
+
const resized_image = yield base64Image.resizeWithSameAspectRatio(this.TargetResolution.width, this.TargetResolution.height);
|
|
50
|
+
return resized_image.toString(false);
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
scaleCoordinates(source, x, y) {
|
|
54
|
+
const xScalingFactor = this.TargetResolution.width / this.screenDimensions.width;
|
|
55
|
+
const yScalingFactor = this.TargetResolution.height / this.screenDimensions.height;
|
|
56
|
+
if (source === 'api') {
|
|
57
|
+
if (x > this.TargetResolution.width || y > this.TargetResolution.height || x < 0 || y < 0) {
|
|
58
|
+
throw new base_1.ToolError(`Coordinates ${x}, ${y} are outside screen bounds `
|
|
59
|
+
+ `(${this.TargetResolution.width}x${this.TargetResolution.height})`);
|
|
60
|
+
}
|
|
61
|
+
return [
|
|
62
|
+
Math.round(x / xScalingFactor),
|
|
63
|
+
Math.round(y / yScalingFactor),
|
|
64
|
+
];
|
|
65
|
+
}
|
|
66
|
+
return [
|
|
67
|
+
Math.round(x * xScalingFactor),
|
|
68
|
+
Math.round(y * yScalingFactor),
|
|
69
|
+
];
|
|
70
|
+
}
|
|
71
|
+
requestControl(controlCommand) {
|
|
72
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
73
|
+
for (const action of controlCommand.actions) {
|
|
74
|
+
[action.position.x, action.position.y] = this.scaleCoordinates('api', action.position.x, action.position.y);
|
|
75
|
+
}
|
|
76
|
+
yield this.AgentOsClient.requestControl(controlCommand);
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
exports.OsAgentHandler = OsAgentHandler;
|
|
81
|
+
class ScreenShotTool extends base_1.BaseAgentTool {
|
|
82
|
+
constructor(osAgentHandler) {
|
|
83
|
+
super();
|
|
84
|
+
this.osAgentHandler = osAgentHandler;
|
|
85
|
+
}
|
|
86
|
+
execute() {
|
|
87
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
88
|
+
const screenshot = yield this.osAgentHandler.takeScreenshot();
|
|
89
|
+
return {
|
|
90
|
+
base64Images: [screenshot],
|
|
91
|
+
output: `Screenshot was taken, with resolution width ${this.osAgentHandler.getTargetResolution().width} and height ${this.osAgentHandler.getTargetResolution().height}`,
|
|
92
|
+
};
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
toParams() {
|
|
96
|
+
return {
|
|
97
|
+
name: 'screenshot_tool',
|
|
98
|
+
description: 'Takes a screenshot of the current screen and returns it as a base64 image',
|
|
99
|
+
input_schema: { type: 'object', properties: {}, required: [] },
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
exports.ScreenShotTool = ScreenShotTool;
|
|
104
|
+
class MouseMoveTool extends base_1.BaseAgentTool {
|
|
105
|
+
constructor(osAgentHandler) {
|
|
106
|
+
super();
|
|
107
|
+
this.osAgentHandler = osAgentHandler;
|
|
108
|
+
}
|
|
109
|
+
execute(command) {
|
|
110
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
111
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_MOVE, { x: command.x, y: command.y }, '', {})]);
|
|
112
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
113
|
+
return {
|
|
114
|
+
output: `Moved mouse to (${command.x}, ${command.y})`,
|
|
115
|
+
};
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
toParams() {
|
|
119
|
+
return {
|
|
120
|
+
name: 'mouse_move_tool',
|
|
121
|
+
description: 'Moves the mouse to the specified absolute coordinates. The top left corner of the screen is (0,0)',
|
|
122
|
+
input_schema: {
|
|
123
|
+
type: 'object',
|
|
124
|
+
properties: {
|
|
125
|
+
x: {
|
|
126
|
+
type: 'number',
|
|
127
|
+
description: 'The x coordinate of the element to click on',
|
|
128
|
+
},
|
|
129
|
+
y: {
|
|
130
|
+
type: 'number',
|
|
131
|
+
description: 'The y coordinate of the element to click on',
|
|
132
|
+
},
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
exports.MouseMoveTool = MouseMoveTool;
|
|
139
|
+
class MouseClickTool extends base_1.BaseAgentTool {
|
|
140
|
+
constructor(osAgentHandler) {
|
|
141
|
+
super();
|
|
142
|
+
this.osAgentHandler = osAgentHandler;
|
|
143
|
+
}
|
|
144
|
+
execute(command) {
|
|
145
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
146
|
+
let controlCommand;
|
|
147
|
+
if (command.doubleClick) {
|
|
148
|
+
if (command.button === 'left') {
|
|
149
|
+
controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_LEFT, { x: 0, y: 0 }, '', {})]);
|
|
150
|
+
}
|
|
151
|
+
if (command.button === 'right') {
|
|
152
|
+
controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_RIGHT, { x: 0, y: 0 }, '', {})]);
|
|
153
|
+
}
|
|
154
|
+
if (command.button === 'middle') {
|
|
155
|
+
controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_DOUBLE_MIDDLE, { x: 0, y: 0 }, '', {})]);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
if (command.button === 'left') {
|
|
160
|
+
controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_LEFT, { x: 0, y: 0 }, '', {})]);
|
|
161
|
+
}
|
|
162
|
+
if (command.button === 'right') {
|
|
163
|
+
controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_RIGHT, { x: 0, y: 0 }, '', {})]);
|
|
164
|
+
}
|
|
165
|
+
if (command.button === 'middle') {
|
|
166
|
+
controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_CLICK_MIDDLE, { x: 0, y: 0 }, '', {})]);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
if (!controlCommand) {
|
|
170
|
+
throw new base_1.ToolError('Invalid input parameter for mouse click tool');
|
|
171
|
+
}
|
|
172
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
173
|
+
const returnedMessage = command.doubleClick ? `Double clicked ${command.button} button` : `Clicked ${command.button} button`;
|
|
174
|
+
return {
|
|
175
|
+
output: returnedMessage,
|
|
176
|
+
};
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
toParams() {
|
|
180
|
+
return {
|
|
181
|
+
name: 'mouse_click_tool',
|
|
182
|
+
description: 'Clicks the specified button on the mouse',
|
|
183
|
+
input_schema: {
|
|
184
|
+
type: 'object',
|
|
185
|
+
properties: {
|
|
186
|
+
button: {
|
|
187
|
+
type: 'string',
|
|
188
|
+
enum: ['left', 'right', 'middle'],
|
|
189
|
+
description: 'The button to click',
|
|
190
|
+
},
|
|
191
|
+
doubleClick: {
|
|
192
|
+
type: 'boolean',
|
|
193
|
+
description: 'Whether to double click the button',
|
|
194
|
+
},
|
|
195
|
+
},
|
|
196
|
+
required: ['button', 'doubleClick'],
|
|
197
|
+
},
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
exports.MouseClickTool = MouseClickTool;
|
|
202
|
+
class MouseScrollTool extends base_1.BaseAgentTool {
|
|
203
|
+
constructor(osAgentHandler) {
|
|
204
|
+
super();
|
|
205
|
+
this.osAgentHandler = osAgentHandler;
|
|
206
|
+
}
|
|
207
|
+
execute(command) {
|
|
208
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
209
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.MOUSE_SCROLL, { x: command.dx, y: command.dy }, '', {})]);
|
|
210
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
211
|
+
return {
|
|
212
|
+
output: `Scrolled by (${command.dx}, ${command.dy})`,
|
|
213
|
+
};
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
toParams() {
|
|
217
|
+
return {
|
|
218
|
+
name: 'mouse_scroll_tool',
|
|
219
|
+
description: 'Scrolls the mouse by the specified amount',
|
|
220
|
+
input_schema: {
|
|
221
|
+
type: 'object',
|
|
222
|
+
properties: {
|
|
223
|
+
dx: {
|
|
224
|
+
type: 'number',
|
|
225
|
+
description: 'The amount to scroll horizontally',
|
|
226
|
+
},
|
|
227
|
+
dy: {
|
|
228
|
+
type: 'number',
|
|
229
|
+
description: 'The amount to scroll vertically',
|
|
230
|
+
},
|
|
231
|
+
},
|
|
232
|
+
required: ['dx', 'dy'],
|
|
233
|
+
},
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
exports.MouseScrollTool = MouseScrollTool;
|
|
238
|
+
class DesktopKeyPressSequenceTool extends base_1.BaseAgentTool {
|
|
239
|
+
constructor(osAgentHandler) {
|
|
240
|
+
super();
|
|
241
|
+
this.osAgentHandler = osAgentHandler;
|
|
242
|
+
}
|
|
243
|
+
execute(command) {
|
|
244
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
245
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.key, {
|
|
246
|
+
firstModifier: command.firstModifier || '',
|
|
247
|
+
secondModifier: command.secondModifier || '',
|
|
248
|
+
})]);
|
|
249
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
250
|
+
return {
|
|
251
|
+
output: `Pressed key ${command.key} with modifiers ${command.firstModifier || ''} ${command.secondModifier || ''}`,
|
|
252
|
+
};
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
toParams() {
|
|
256
|
+
return {
|
|
257
|
+
name: 'desktop_key_press_sequence_tool',
|
|
258
|
+
description: 'Presses a key with optional modifiers',
|
|
259
|
+
input_schema: {
|
|
260
|
+
type: 'object',
|
|
261
|
+
properties: {
|
|
262
|
+
key: {
|
|
263
|
+
type: 'string',
|
|
264
|
+
enum: dsl_1.PC_KEY_VALUES,
|
|
265
|
+
description: 'The key to press',
|
|
266
|
+
},
|
|
267
|
+
firstModifier: {
|
|
268
|
+
type: 'string',
|
|
269
|
+
enum: dsl_1.MODIFIER_KEY_VALUES,
|
|
270
|
+
description: 'The first modifier key',
|
|
271
|
+
},
|
|
272
|
+
secondModifier: {
|
|
273
|
+
type: 'string',
|
|
274
|
+
enum: dsl_1.MODIFIER_KEY_VALUES,
|
|
275
|
+
description: 'The second modifier key',
|
|
276
|
+
},
|
|
277
|
+
},
|
|
278
|
+
required: ['key'],
|
|
279
|
+
},
|
|
280
|
+
};
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
exports.DesktopKeyPressSequenceTool = DesktopKeyPressSequenceTool;
|
|
284
|
+
class DesktopSingleKeyPressTool extends base_1.BaseAgentTool {
|
|
285
|
+
constructor(osAgentHandler) {
|
|
286
|
+
super();
|
|
287
|
+
this.osAgentHandler = osAgentHandler;
|
|
288
|
+
}
|
|
289
|
+
execute(command) {
|
|
290
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
291
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.key, {})]);
|
|
292
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
293
|
+
return {
|
|
294
|
+
output: `Pressed key ${command.key}`,
|
|
295
|
+
};
|
|
296
|
+
});
|
|
297
|
+
}
|
|
298
|
+
toParams() {
|
|
299
|
+
return {
|
|
300
|
+
name: 'desktop_single_key_press_tool',
|
|
301
|
+
description: 'Presses a single key',
|
|
302
|
+
input_schema: {
|
|
303
|
+
type: 'object',
|
|
304
|
+
properties: {
|
|
305
|
+
key: {
|
|
306
|
+
type: 'string',
|
|
307
|
+
enum: [...dsl_1.PC_KEY_VALUES, ...dsl_1.MODIFIER_KEY_VALUES],
|
|
308
|
+
description: 'The key to press',
|
|
309
|
+
},
|
|
310
|
+
},
|
|
311
|
+
required: ['key'],
|
|
312
|
+
},
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
exports.DesktopSingleKeyPressTool = DesktopSingleKeyPressTool;
|
|
317
|
+
class TypeTool extends base_1.BaseAgentTool {
|
|
318
|
+
constructor(osAgentHandler) {
|
|
319
|
+
super();
|
|
320
|
+
this.osAgentHandler = osAgentHandler;
|
|
321
|
+
}
|
|
322
|
+
execute(command) {
|
|
323
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
324
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.TYPE, { x: 0, y: 0 }, command.text, {})]);
|
|
325
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
326
|
+
return {
|
|
327
|
+
output: `Typed text: ${command.text}`,
|
|
328
|
+
};
|
|
329
|
+
});
|
|
330
|
+
}
|
|
331
|
+
toParams() {
|
|
332
|
+
return {
|
|
333
|
+
name: 'type_tool',
|
|
334
|
+
description: 'Types the specified text',
|
|
335
|
+
input_schema: {
|
|
336
|
+
type: 'object',
|
|
337
|
+
properties: {
|
|
338
|
+
text: {
|
|
339
|
+
type: 'string',
|
|
340
|
+
description: 'The text to type',
|
|
341
|
+
},
|
|
342
|
+
},
|
|
343
|
+
required: ['text'],
|
|
344
|
+
},
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
exports.TypeTool = TypeTool;
|
|
349
|
+
class AndroidSingleKeyPressTool extends base_1.BaseAgentTool {
|
|
350
|
+
constructor(osAgentHandler) {
|
|
351
|
+
super();
|
|
352
|
+
this.osAgentHandler = osAgentHandler;
|
|
353
|
+
}
|
|
354
|
+
execute(command) {
|
|
355
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
356
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.key, {})]);
|
|
357
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
358
|
+
return {
|
|
359
|
+
output: `Pressed Android key ${command.key}`,
|
|
360
|
+
};
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
toParams() {
|
|
364
|
+
return {
|
|
365
|
+
name: 'android_single_key_press_tool',
|
|
366
|
+
description: 'Presses a single Android key',
|
|
367
|
+
input_schema: {
|
|
368
|
+
type: 'object',
|
|
369
|
+
properties: {
|
|
370
|
+
key: {
|
|
371
|
+
type: 'string',
|
|
372
|
+
enum: dsl_1.ANDROID_KEY_VALUES,
|
|
373
|
+
description: 'The Android key to press',
|
|
374
|
+
},
|
|
375
|
+
},
|
|
376
|
+
required: ['key'],
|
|
377
|
+
},
|
|
378
|
+
};
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
exports.AndroidSingleKeyPressTool = AndroidSingleKeyPressTool;
|
|
382
|
+
class AndroidSequenceKeyPressTool extends base_1.BaseAgentTool {
|
|
383
|
+
constructor(osAgentHandler) {
|
|
384
|
+
super();
|
|
385
|
+
this.osAgentHandler = osAgentHandler;
|
|
386
|
+
}
|
|
387
|
+
execute(command) {
|
|
388
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
389
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.keys.join(' '), {})]);
|
|
390
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
391
|
+
return {
|
|
392
|
+
output: `Pressed Android keys: ${command.keys.join(', ')}`,
|
|
393
|
+
};
|
|
394
|
+
});
|
|
395
|
+
}
|
|
396
|
+
toParams() {
|
|
397
|
+
return {
|
|
398
|
+
name: 'android_sequence_key_press_tool',
|
|
399
|
+
description: 'Presses a sequence of Android keys',
|
|
400
|
+
input_schema: {
|
|
401
|
+
type: 'object',
|
|
402
|
+
properties: {
|
|
403
|
+
keys: {
|
|
404
|
+
type: 'array',
|
|
405
|
+
items: {
|
|
406
|
+
type: 'string',
|
|
407
|
+
enum: dsl_1.ANDROID_KEY_VALUES,
|
|
408
|
+
},
|
|
409
|
+
description: 'The sequence of Android keys to press',
|
|
410
|
+
},
|
|
411
|
+
},
|
|
412
|
+
required: ['keys'],
|
|
413
|
+
},
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
exports.AndroidSequenceKeyPressTool = AndroidSequenceKeyPressTool;
|
|
418
|
+
class AgentErrorTool extends base_1.BaseAgentTool {
|
|
419
|
+
constructor() {
|
|
420
|
+
super();
|
|
421
|
+
}
|
|
422
|
+
execute(command) {
|
|
423
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
424
|
+
throw new agent_errors_1.AgentError(command.error);
|
|
425
|
+
});
|
|
426
|
+
}
|
|
427
|
+
toParams() {
|
|
428
|
+
return {
|
|
429
|
+
name: 'agent_error_tool',
|
|
430
|
+
description: 'Raises an error in the agent',
|
|
431
|
+
input_schema: {
|
|
432
|
+
type: 'object',
|
|
433
|
+
properties: {
|
|
434
|
+
error: {
|
|
435
|
+
type: 'string',
|
|
436
|
+
description: 'The error message to raise',
|
|
437
|
+
},
|
|
438
|
+
},
|
|
439
|
+
required: ['error'],
|
|
440
|
+
},
|
|
441
|
+
};
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
exports.AgentErrorTool = AgentErrorTool;
|
|
445
|
+
class ExecuteShellCommandTool extends base_1.BaseAgentTool {
|
|
446
|
+
constructor(osAgentHandler) {
|
|
447
|
+
super();
|
|
448
|
+
this.osAgentHandler = osAgentHandler;
|
|
449
|
+
}
|
|
450
|
+
execute(command) {
|
|
451
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
452
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.EXECUTE_COMMAND, { x: 0, y: 0 }, command.command, {})]);
|
|
453
|
+
yield this.osAgentHandler.requestControl(controlCommand);
|
|
454
|
+
return {
|
|
455
|
+
output: `Executed shell command: ${command.command}`,
|
|
456
|
+
};
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
toParams() {
|
|
460
|
+
return {
|
|
461
|
+
name: 'execute_shell_command_tool',
|
|
462
|
+
description: 'Executes a shell command',
|
|
463
|
+
input_schema: {
|
|
464
|
+
type: 'object',
|
|
465
|
+
properties: {
|
|
466
|
+
command: {
|
|
467
|
+
type: 'string',
|
|
468
|
+
description: 'The shell command to execute',
|
|
469
|
+
},
|
|
470
|
+
},
|
|
471
|
+
required: ['command'],
|
|
472
|
+
},
|
|
473
|
+
};
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
exports.ExecuteShellCommandTool = ExecuteShellCommandTool;
|
|
@@ -1,3 +1,5 @@
|
|
|
1
1
|
export { ControlCommand } from './control-command';
|
|
2
2
|
export { ControlCommandCode } from './control-command-code';
|
|
3
3
|
export { InferenceResponse } from '../inference-response/inference-response';
|
|
4
|
+
export { Action } from './action';
|
|
5
|
+
export { InputEvent } from './input-event';
|
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.InferenceResponse = exports.ControlCommandCode = exports.ControlCommand = void 0;
|
|
3
|
+
exports.InputEvent = exports.Action = exports.InferenceResponse = exports.ControlCommandCode = exports.ControlCommand = void 0;
|
|
4
4
|
var control_command_1 = require("./control-command");
|
|
5
5
|
Object.defineProperty(exports, "ControlCommand", { enumerable: true, get: function () { return control_command_1.ControlCommand; } });
|
|
6
6
|
var control_command_code_1 = require("./control-command-code");
|
|
7
7
|
Object.defineProperty(exports, "ControlCommandCode", { enumerable: true, get: function () { return control_command_code_1.ControlCommandCode; } });
|
|
8
8
|
var inference_response_1 = require("../inference-response/inference-response");
|
|
9
9
|
Object.defineProperty(exports, "InferenceResponse", { enumerable: true, get: function () { return inference_response_1.InferenceResponse; } });
|
|
10
|
+
var action_1 = require("./action");
|
|
11
|
+
Object.defineProperty(exports, "Action", { enumerable: true, get: function () { return action_1.Action; } });
|
|
12
|
+
var input_event_1 = require("./input-event");
|
|
13
|
+
Object.defineProperty(exports, "InputEvent", { enumerable: true, get: function () { return input_event_1.InputEvent; } });
|
|
@@ -5,11 +5,14 @@ export declare enum Separators {
|
|
|
5
5
|
STRING = "<|string|>"
|
|
6
6
|
}
|
|
7
7
|
export type INTERSECTION_AREA = 'element_center_line' | 'element_edge_area' | 'display_edge_area';
|
|
8
|
-
export
|
|
9
|
-
export type
|
|
10
|
-
export
|
|
8
|
+
export declare const PC_KEY_VALUES: readonly ["backspace", "delete", "enter", "tab", "escape", "up", "down", "right", "left", "home", "end", "pageup", "pagedown", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "space", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "<", "=", ">", "?", "@", "[", "\\", "]", "^", "_", "`", "{", "|", "}", "~ "];
|
|
9
|
+
export type PC_KEY = typeof PC_KEY_VALUES[number];
|
|
10
|
+
export declare const ANDROID_KEY_VALUES: readonly ["backspace", "delete", "enter", "tab", "escape", "up", "down", "right", "left", "home", "end", "pageup", "pagedown", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "f11", "f12", "space", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", ":", ";", "<", "=", ">", "?", "@", "[", "\\", "]", "^", "_", "`", "{", "|", "}", "~ "];
|
|
11
|
+
export type ANDROID_KEY = typeof ANDROID_KEY_VALUES[number];
|
|
12
|
+
export declare const MODIFIER_KEY_VALUES: readonly ["command", "alt", "control", "shift", "right_shift"];
|
|
13
|
+
export type MODIFIER_KEY = typeof MODIFIER_KEY_VALUES[number];
|
|
14
|
+
export type PC_AND_MODIFIER_KEY = PC_KEY | MODIFIER_KEY;
|
|
11
15
|
export type COLOR = 'black' | 'white' | 'red' | 'green' | 'yellow green' | 'orange' | 'yellow' | 'purple' | 'pink' | 'gray' | 'lime green' | 'royal blue';
|
|
12
|
-
export type PC_AND_MODIFIER_KEY = 'command' | 'alt' | 'control' | 'shift' | 'right_shift' | 'backspace' | 'delete' | 'enter' | 'tab' | 'escape' | 'up' | 'down' | 'right' | 'left' | 'home' | 'end' | 'pageup' | 'pagedown' | 'f1' | 'f2' | 'f3' | 'f4' | 'f5' | 'f6' | 'f7' | 'f8' | 'f9' | 'f10' | 'f11' | 'f12' | 'space' | '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | '!' | '"' | '#' | '$' | '%' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' | '[' | '\\' | ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~ ';
|
|
13
16
|
export interface CommandExecutorContext {
|
|
14
17
|
customElementsJson: CustomElementJson[];
|
|
15
18
|
aiElementNames: string[];
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
/* eslint-disable max-len */
|
|
6
6
|
// Autogenerated from typescript.template file
|
|
7
7
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
-
exports.ApiCommands = exports.Getter = exports.FluentFiltersOrRelationsGetter = exports.FluentFiltersGetter = exports.ExecGetter = exports.FluentCommand = exports.FluentFiltersOrRelationsCondition = exports.FluentFiltersCondition = exports.FluentFiltersOrRelations = exports.FluentFilters = exports.Exec = exports.Separators = void 0;
|
|
8
|
+
exports.ApiCommands = exports.Getter = exports.FluentFiltersOrRelationsGetter = exports.FluentFiltersGetter = exports.ExecGetter = exports.FluentCommand = exports.FluentFiltersOrRelationsCondition = exports.FluentFiltersCondition = exports.FluentFiltersOrRelations = exports.FluentFilters = exports.Exec = exports.MODIFIER_KEY_VALUES = exports.ANDROID_KEY_VALUES = exports.PC_KEY_VALUES = exports.Separators = void 0;
|
|
9
9
|
function isStackTraceCodeline(line) {
|
|
10
10
|
return /[ \t]+at .+/.test(line);
|
|
11
11
|
}
|
|
@@ -37,6 +37,9 @@ var Separators;
|
|
|
37
37
|
(function (Separators) {
|
|
38
38
|
Separators["STRING"] = "<|string|>";
|
|
39
39
|
})(Separators || (exports.Separators = Separators = {}));
|
|
40
|
+
exports.PC_KEY_VALUES = ['backspace', 'delete', 'enter', 'tab', 'escape', 'up', 'down', 'right', 'left', 'home', 'end', 'pageup', 'pagedown', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'space', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~ '];
|
|
41
|
+
exports.ANDROID_KEY_VALUES = ['backspace', 'delete', 'enter', 'tab', 'escape', 'up', 'down', 'right', 'left', 'home', 'end', 'pageup', 'pagedown', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'space', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~ '];
|
|
42
|
+
exports.MODIFIER_KEY_VALUES = ['command', 'alt', 'control', 'shift', 'right_shift'];
|
|
40
43
|
class FluentBase {
|
|
41
44
|
constructor(prev) {
|
|
42
45
|
this.prev = prev;
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { BetaMessage, BetaMessageParam } from '@anthropic-ai/sdk/resources/beta/messages';
|
|
2
|
+
import { ControlCommand } from '../core/ui-control-commands';
|
|
1
3
|
import { UiControllerClient } from './ui-controller-client';
|
|
2
4
|
import { InferenceClient } from './inference-client';
|
|
3
5
|
import { Annotation } from '../core/annotation/annotation';
|
|
@@ -18,7 +20,7 @@ export declare class ExecutionRuntime {
|
|
|
18
20
|
startVideoRecording(): Promise<void>;
|
|
19
21
|
stopVideoRecording(): Promise<void>;
|
|
20
22
|
readVideoRecording(): Promise<string>;
|
|
21
|
-
|
|
23
|
+
requestControl(controlCommand: ControlCommand): Promise<void>;
|
|
22
24
|
executeInstruction(instruction: Instruction, modelComposition: ModelCompositionBranch[]): Promise<void>;
|
|
23
25
|
private readonly EXEC_REPETITION_COUNT;
|
|
24
26
|
private executeCommandRepeatedly;
|
|
@@ -38,4 +40,12 @@ export declare class ExecutionRuntime {
|
|
|
38
40
|
getDetectedElements(instruction: string, customElementJson?: CustomElementJson[]): Promise<DetectedElement[]>;
|
|
39
41
|
annotateImage(imagePath?: string, customElementJson?: CustomElementJson[], elements?: DetectedElement[]): Promise<Annotation>;
|
|
40
42
|
predictVQA(prompt: string, config?: object): Promise<any>;
|
|
43
|
+
predictActResponse(params: {
|
|
44
|
+
max_tokens: number;
|
|
45
|
+
messages: BetaMessageParam[];
|
|
46
|
+
model: string;
|
|
47
|
+
system?: string;
|
|
48
|
+
tools?: object[];
|
|
49
|
+
betas?: string[];
|
|
50
|
+
}): Promise<BetaMessage>;
|
|
41
51
|
}
|
|
@@ -215,5 +215,10 @@ class ExecutionRuntime {
|
|
|
215
215
|
return this.inferenceClient.predictVQAAnswer(prompt, base64Image, config);
|
|
216
216
|
});
|
|
217
217
|
}
|
|
218
|
+
predictActResponse(params) {
|
|
219
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
220
|
+
return this.inferenceClient.predictActResponse(params);
|
|
221
|
+
});
|
|
222
|
+
}
|
|
218
223
|
}
|
|
219
224
|
exports.ExecutionRuntime = ExecutionRuntime;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { BetaMessage, BetaMessageParam } from '@anthropic-ai/sdk/resources/beta/messages';
|
|
1
2
|
import { HttpClientGot } from '../utils/http/http-client-got';
|
|
2
3
|
import { ControlCommand } from '../core/ui-control-commands';
|
|
3
4
|
import { CustomElement } from '../core/model/custom-element';
|
|
@@ -23,4 +24,12 @@ export declare class InferenceClient {
|
|
|
23
24
|
getDetectedElements(instruction: string, image: string, customElements?: CustomElement[]): Promise<DetectedElement[]>;
|
|
24
25
|
predictImageAnnotation(image: string, customElements?: CustomElement[]): Promise<Annotation>;
|
|
25
26
|
predictVQAAnswer(prompt: string, image: string, config?: object): Promise<any>;
|
|
27
|
+
predictActResponse(params: {
|
|
28
|
+
max_tokens: number;
|
|
29
|
+
messages: BetaMessageParam[];
|
|
30
|
+
model: string;
|
|
31
|
+
system?: string;
|
|
32
|
+
tools?: object[];
|
|
33
|
+
betas?: string[];
|
|
34
|
+
}): Promise<BetaMessage>;
|
|
26
35
|
}
|