askui 0.26.0 → 0.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/core/ai-element/ai-element-collection.d.ts +1 -0
- package/dist/cjs/core/ai-element/ai-element-collection.js +3 -0
- package/dist/cjs/core/models/anthropic/askui-agent.d.ts +2 -0
- package/dist/cjs/core/models/anthropic/askui-agent.js +7 -0
- package/dist/cjs/core/models/anthropic/claude-agent.js +1 -1
- package/dist/cjs/core/models/anthropic/index.d.ts +1 -1
- package/dist/cjs/core/models/anthropic/tools/askui-api-tools.d.ts +19 -0
- package/dist/cjs/core/models/anthropic/tools/askui-api-tools.js +81 -0
- package/dist/cjs/core/models/anthropic/tools/base.d.ts +2 -0
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +11 -1
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +54 -10
- package/dist/cjs/execution/ui-control-client.d.ts +29 -0
- package/dist/cjs/execution/ui-control-client.js +110 -6
- package/dist/cjs/lib/interactive_cli/create-example-project.js +1 -1
- package/dist/cjs/main.d.ts +1 -1
- package/dist/esm/core/ai-element/ai-element-collection.d.ts +1 -0
- package/dist/esm/core/ai-element/ai-element-collection.js +3 -0
- package/dist/esm/core/models/anthropic/askui-agent.d.ts +2 -0
- package/dist/esm/core/models/anthropic/askui-agent.js +8 -1
- package/dist/esm/core/models/anthropic/claude-agent.js +1 -1
- package/dist/esm/core/models/anthropic/index.d.ts +1 -1
- package/dist/esm/core/models/anthropic/index.js +1 -1
- package/dist/esm/core/models/anthropic/tools/askui-api-tools.d.ts +19 -0
- package/dist/esm/core/models/anthropic/tools/askui-api-tools.js +76 -0
- package/dist/esm/core/models/anthropic/tools/base.d.ts +2 -0
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +11 -1
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +52 -9
- package/dist/esm/execution/ui-control-client.d.ts +29 -0
- package/dist/esm/execution/ui-control-client.js +110 -6
- package/dist/esm/lib/interactive_cli/create-example-project.js +1 -1
- package/dist/esm/main.d.ts +1 -1
- package/dist/esm/main.js +1 -1
- package/package.json +1 -1
|
@@ -8,5 +8,6 @@ export declare class AIElementCollection {
|
|
|
8
8
|
static collectAIElements(workspaceId: string | undefined, aiElementArgs: AIElementArgs): Promise<AIElementCollection>;
|
|
9
9
|
getByName(name: string): CustomElementJson[];
|
|
10
10
|
getByNames(names: string[]): CustomElementJson[];
|
|
11
|
+
getNames(): string[];
|
|
11
12
|
private static CollectAiElementsFromLocation;
|
|
12
13
|
}
|
|
@@ -73,6 +73,9 @@ class AIElementCollection {
|
|
|
73
73
|
}
|
|
74
74
|
return names.flatMap((name) => this.getByName(name));
|
|
75
75
|
}
|
|
76
|
+
getNames() {
|
|
77
|
+
return [...new Set(this.elements.map((element) => element.name))];
|
|
78
|
+
}
|
|
76
79
|
static CollectAiElementsFromLocation(aiElementLocation) {
|
|
77
80
|
const files = fs_extra_1.default.readdirSync(aiElementLocation);
|
|
78
81
|
if (files.length === 0) {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { OsAgentHandler } from './tools/os-agent-tools';
|
|
1
2
|
import { ClaudeAgent } from './claude-agent';
|
|
2
3
|
import { ExecutionRuntime } from '../../../execution/execution-runtime';
|
|
3
4
|
export declare class AskUIAgent extends ClaudeAgent {
|
|
@@ -6,6 +7,7 @@ export declare class AskUIAgent extends ClaudeAgent {
|
|
|
6
7
|
constructor(executionRuntime: ExecutionRuntime);
|
|
7
8
|
isConnected(): boolean;
|
|
8
9
|
initializeOsAgentHandler(): Promise<void>;
|
|
10
|
+
getOsAgentHandler(): OsAgentHandler;
|
|
9
11
|
configureAsDesktopAgent(): Promise<void>;
|
|
10
12
|
configureAsAndroidAgent(): Promise<void>;
|
|
11
13
|
private static DesktopSystemPrompt;
|
|
@@ -26,6 +26,12 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
|
|
|
26
26
|
this.osAgentHandler = yield os_agent_tools_1.OsAgentHandler.createInstance(this.executionRuntime);
|
|
27
27
|
});
|
|
28
28
|
}
|
|
29
|
+
getOsAgentHandler() {
|
|
30
|
+
if (!this.osAgentHandler) {
|
|
31
|
+
throw new Error('Agent OS client is not connected');
|
|
32
|
+
}
|
|
33
|
+
return this.osAgentHandler;
|
|
34
|
+
}
|
|
29
35
|
configureAsDesktopAgent() {
|
|
30
36
|
return __awaiter(this, void 0, void 0, function* () {
|
|
31
37
|
if (!this.osAgentHandler) {
|
|
@@ -33,6 +39,7 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
|
|
|
33
39
|
}
|
|
34
40
|
const tools = [
|
|
35
41
|
new os_agent_tools_1.AgentErrorTool(),
|
|
42
|
+
new os_agent_tools_1.PrintTool(),
|
|
36
43
|
new os_agent_tools_1.ScreenShotTool(this.osAgentHandler),
|
|
37
44
|
new os_agent_tools_1.MouseMoveTool(this.osAgentHandler),
|
|
38
45
|
new os_agent_tools_1.MouseClickTool(this.osAgentHandler),
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { DetectedElement } from '../../../model/annotation-result/detected-element';
|
|
2
|
+
import { BaseAgentTool, ToolResult, BetaTool } from './base';
|
|
3
|
+
import { OsAgentHandler } from './os-agent-tools';
|
|
4
|
+
export declare class AskUIGetAskUIElementTool extends BaseAgentTool {
|
|
5
|
+
private osAgentHandler;
|
|
6
|
+
private locatorFunction;
|
|
7
|
+
private elementType;
|
|
8
|
+
constructor(osAgentHandler: OsAgentHandler, locatorFunction: (aiElementName: string) => Promise<DetectedElement[]>, elementType: string);
|
|
9
|
+
execute(params: {
|
|
10
|
+
elementName: string;
|
|
11
|
+
}): Promise<ToolResult>;
|
|
12
|
+
toParams(): BetaTool;
|
|
13
|
+
}
|
|
14
|
+
export declare class AskUIListAIElementTool extends BaseAgentTool {
|
|
15
|
+
private listFunction;
|
|
16
|
+
constructor(listFunction: () => Promise<string[]>);
|
|
17
|
+
execute(): Promise<ToolResult>;
|
|
18
|
+
toParams(): BetaTool;
|
|
19
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
3
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
4
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
5
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
6
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
7
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
8
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
9
|
+
});
|
|
10
|
+
};
|
|
11
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
+
exports.AskUIListAIElementTool = exports.AskUIGetAskUIElementTool = void 0;
|
|
13
|
+
const base_1 = require("./base");
|
|
14
|
+
class AskUIGetAskUIElementTool extends base_1.BaseAgentTool {
|
|
15
|
+
constructor(osAgentHandler, locatorFunction, elementType) {
|
|
16
|
+
super();
|
|
17
|
+
this.osAgentHandler = osAgentHandler;
|
|
18
|
+
this.locatorFunction = locatorFunction;
|
|
19
|
+
this.elementType = elementType;
|
|
20
|
+
}
|
|
21
|
+
execute(params) {
|
|
22
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
23
|
+
const { elementName } = params;
|
|
24
|
+
const detectedElements = yield this.locatorFunction(elementName);
|
|
25
|
+
const scaledElementsBoundingBoxes = detectedElements.map((element) => {
|
|
26
|
+
const xMid = (element.bndbox.xmin + element.bndbox.xmax) / 2;
|
|
27
|
+
const yMid = (element.bndbox.ymin + element.bndbox.ymax) / 2;
|
|
28
|
+
const [x, y] = this.osAgentHandler.scaleCoordinates('computer', xMid, yMid);
|
|
29
|
+
return {
|
|
30
|
+
x, y,
|
|
31
|
+
};
|
|
32
|
+
});
|
|
33
|
+
return {
|
|
34
|
+
output: `Found ${scaledElementsBoundingBoxes.length} elements of type ${this.elementType}. center coordinates: ${JSON.stringify(scaledElementsBoundingBoxes)}`,
|
|
35
|
+
};
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
toParams() {
|
|
39
|
+
return {
|
|
40
|
+
description: `Locates and retrieves the bounding box coordinates of AskUI ${this.elementType} elements on the screen. ` +
|
|
41
|
+
`This tool is essential for UI automation as it provides the exact pixel coordinates needed to interact with UI elements. ` +
|
|
42
|
+
`The coordinates returned can be used for clicking, hovering, or other mouse interactions. ` +
|
|
43
|
+
`Use this tool when you need to find and interact with specific ${this.elementType} UI elements by their semantic names.`,
|
|
44
|
+
input_schema: {
|
|
45
|
+
properties: {
|
|
46
|
+
elementName: {
|
|
47
|
+
type: 'string',
|
|
48
|
+
description: `The semantic name or identifier of the ${this.elementType} element to locate on the screen. `
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
required: ['elementName'],
|
|
52
|
+
type: 'object',
|
|
53
|
+
},
|
|
54
|
+
name: `get_askui_${this.elementType}_element_tool`,
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
exports.AskUIGetAskUIElementTool = AskUIGetAskUIElementTool;
|
|
59
|
+
class AskUIListAIElementTool extends base_1.BaseAgentTool {
|
|
60
|
+
constructor(listFunction) {
|
|
61
|
+
super();
|
|
62
|
+
this.listFunction = listFunction;
|
|
63
|
+
}
|
|
64
|
+
execute() {
|
|
65
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
66
|
+
const elementNames = yield this.listFunction();
|
|
67
|
+
return {
|
|
68
|
+
output: `Found ${elementNames.length} element names that can be used to retrieve bounding boxes. Names: ${JSON.stringify(elementNames)}`,
|
|
69
|
+
};
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
toParams() {
|
|
73
|
+
return {
|
|
74
|
+
description: 'Retrieves a comprehensive list of all valid AskUI AI element names that can be used for element location and interaction. ' +
|
|
75
|
+
'The returned names can be used as input for the get_askui_aiElement_element_tool to locate specific ai elements. ',
|
|
76
|
+
input_schema: { type: 'object', properties: {}, required: [] },
|
|
77
|
+
name: 'list_ai_element_names_tool',
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
exports.AskUIListAIElementTool = AskUIListAIElementTool;
|
|
@@ -24,7 +24,7 @@ export declare class OsAgentHandler {
|
|
|
24
24
|
};
|
|
25
25
|
setTargetResolution(width: number, height: number): void;
|
|
26
26
|
takeScreenshot(): Promise<string>;
|
|
27
|
-
|
|
27
|
+
scaleCoordinates(source: 'api' | 'computer', x: number, y: number): [number, number];
|
|
28
28
|
requestControl(controlCommand: ControlCommand): Promise<void>;
|
|
29
29
|
mouseMove(x: number, y: number): Promise<void>;
|
|
30
30
|
mouseClick(button: "left" | "right" | "middle", doubleClick: boolean): Promise<void>;
|
|
@@ -35,6 +35,9 @@ export declare class OsAgentHandler {
|
|
|
35
35
|
desktopKeyHoldDown(key: PC_AND_MODIFIER_KEY, modifiers?: MODIFIER_KEY[]): Promise<void>;
|
|
36
36
|
desktopKeyRelease(key: PC_AND_MODIFIER_KEY, modifiers?: MODIFIER_KEY[]): Promise<void>;
|
|
37
37
|
typeText(text: string): Promise<void>;
|
|
38
|
+
androidKeyPress(key: ANDROID_KEY): Promise<void>;
|
|
39
|
+
androidKeySequencePress(keys: ANDROID_KEY[]): Promise<void>;
|
|
40
|
+
executeShellCommand(command: string): Promise<void>;
|
|
38
41
|
}
|
|
39
42
|
export declare class ScreenShotTool extends BaseAgentTool {
|
|
40
43
|
private osAgentHandler;
|
|
@@ -165,3 +168,10 @@ export declare class WaitTool extends BaseAgentTool {
|
|
|
165
168
|
}): Promise<ToolResult>;
|
|
166
169
|
toParams(): BetaTool;
|
|
167
170
|
}
|
|
171
|
+
export declare class PrintTool extends BaseAgentTool {
|
|
172
|
+
constructor();
|
|
173
|
+
execute(command: {
|
|
174
|
+
text: string;
|
|
175
|
+
}): Promise<ToolResult>;
|
|
176
|
+
toParams(): BetaTool;
|
|
177
|
+
}
|
|
@@ -9,7 +9,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
9
9
|
});
|
|
10
10
|
};
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
|
-
exports.WaitTool = exports.ExecuteShellCommandTool = exports.AgentErrorTool = exports.AndroidSequenceKeyPressTool = exports.AndroidSingleKeyPressTool = exports.TypeTool = exports.DesktopKeyReleaseTool = exports.DesktopKeyHoldDownTool = exports.DesktopPressAndReleaseKeysTool = exports.MouseReleaseLeftButtonTool = exports.MouseHoldLeftButtonDownTool = exports.MouseDragAndDropTool = exports.MouseScrollTool = exports.MouseClickTool = exports.MouseMoveTool = exports.ScreenShotTool = exports.OsAgentHandler = void 0;
|
|
12
|
+
exports.PrintTool = exports.WaitTool = exports.ExecuteShellCommandTool = exports.AgentErrorTool = exports.AndroidSequenceKeyPressTool = exports.AndroidSingleKeyPressTool = exports.TypeTool = exports.DesktopKeyReleaseTool = exports.DesktopKeyHoldDownTool = exports.DesktopPressAndReleaseKeysTool = exports.MouseReleaseLeftButtonTool = exports.MouseHoldLeftButtonDownTool = exports.MouseDragAndDropTool = exports.MouseScrollTool = exports.MouseClickTool = exports.MouseMoveTool = exports.ScreenShotTool = exports.OsAgentHandler = void 0;
|
|
13
13
|
const dsl_1 = require("../../../../execution/dsl");
|
|
14
14
|
const base_1 = require("./base");
|
|
15
15
|
const ui_control_commands_1 = require("../../../ui-control-commands");
|
|
@@ -205,6 +205,24 @@ class OsAgentHandler {
|
|
|
205
205
|
yield this.requestControl(controlCommand);
|
|
206
206
|
});
|
|
207
207
|
}
|
|
208
|
+
androidKeyPress(key) {
|
|
209
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
210
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_ANDROID_SINGLE_KEY, { x: 0, y: 0 }, key, {})]);
|
|
211
|
+
yield this.requestControl(controlCommand);
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
androidKeySequencePress(keys) {
|
|
215
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
216
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.PRESS_ANDROID_KEY_SEQUENCE, { x: 0, y: 0 }, keys.join(' '), {})]);
|
|
217
|
+
yield this.requestControl(controlCommand);
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
executeShellCommand(command) {
|
|
221
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
222
|
+
const controlCommand = new ui_control_commands_1.ControlCommand(ui_control_commands_1.ControlCommandCode.OK, [new ui_control_commands_1.Action(ui_control_commands_1.InputEvent.EXECUTE_COMMAND, { x: 0, y: 0 }, command, {})]);
|
|
223
|
+
yield this.requestControl(controlCommand);
|
|
224
|
+
});
|
|
225
|
+
}
|
|
208
226
|
}
|
|
209
227
|
exports.OsAgentHandler = OsAgentHandler;
|
|
210
228
|
class ScreenShotTool extends base_1.BaseAgentTool {
|
|
@@ -556,8 +574,7 @@ class TypeTool extends base_1.BaseAgentTool {
|
|
|
556
574
|
}
|
|
557
575
|
execute(command) {
|
|
558
576
|
return __awaiter(this, void 0, void 0, function* () {
|
|
559
|
-
|
|
560
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
577
|
+
yield this.osAgentHandler.typeText(command.text);
|
|
561
578
|
return {
|
|
562
579
|
output: `Typed text: ${command.text}`,
|
|
563
580
|
};
|
|
@@ -588,8 +605,7 @@ class AndroidSingleKeyPressTool extends base_1.BaseAgentTool {
|
|
|
588
605
|
}
|
|
589
606
|
execute(command) {
|
|
590
607
|
return __awaiter(this, void 0, void 0, function* () {
|
|
591
|
-
|
|
592
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
608
|
+
yield this.osAgentHandler.androidKeyPress(command.key);
|
|
593
609
|
return {
|
|
594
610
|
output: `Pressed Android key ${command.key}`,
|
|
595
611
|
};
|
|
@@ -621,8 +637,7 @@ class AndroidSequenceKeyPressTool extends base_1.BaseAgentTool {
|
|
|
621
637
|
}
|
|
622
638
|
execute(command) {
|
|
623
639
|
return __awaiter(this, void 0, void 0, function* () {
|
|
624
|
-
|
|
625
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
640
|
+
yield this.osAgentHandler.androidKeySequencePress(command.keys);
|
|
626
641
|
return {
|
|
627
642
|
output: `Pressed Android keys: ${command.keys.join(', ')}`,
|
|
628
643
|
};
|
|
@@ -662,7 +677,7 @@ class AgentErrorTool extends base_1.BaseAgentTool {
|
|
|
662
677
|
toParams() {
|
|
663
678
|
return {
|
|
664
679
|
name: 'agent_error_tool',
|
|
665
|
-
description: '
|
|
680
|
+
description: 'Intentionally raises an error to signal that the agent cannot proceed with the current task. Use this when the agent encounters an unsolvable problem, gets stuck in a loop, or needs to communicate a critical failure that prevents further automation.',
|
|
666
681
|
input_schema: {
|
|
667
682
|
type: 'object',
|
|
668
683
|
properties: {
|
|
@@ -684,8 +699,7 @@ class ExecuteShellCommandTool extends base_1.BaseAgentTool {
|
|
|
684
699
|
}
|
|
685
700
|
execute(command) {
|
|
686
701
|
return __awaiter(this, void 0, void 0, function* () {
|
|
687
|
-
|
|
688
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
702
|
+
yield this.osAgentHandler.executeShellCommand(command.command);
|
|
689
703
|
return {
|
|
690
704
|
output: `Executed shell command: ${command.command}`,
|
|
691
705
|
};
|
|
@@ -739,3 +753,33 @@ class WaitTool extends base_1.BaseAgentTool {
|
|
|
739
753
|
}
|
|
740
754
|
}
|
|
741
755
|
exports.WaitTool = WaitTool;
|
|
756
|
+
class PrintTool extends base_1.BaseAgentTool {
|
|
757
|
+
constructor() {
|
|
758
|
+
super();
|
|
759
|
+
}
|
|
760
|
+
execute(command) {
|
|
761
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
762
|
+
console.log(command.text);
|
|
763
|
+
return {
|
|
764
|
+
output: `Printed text: ${command.text}`,
|
|
765
|
+
};
|
|
766
|
+
});
|
|
767
|
+
}
|
|
768
|
+
toParams() {
|
|
769
|
+
return {
|
|
770
|
+
name: 'print_tool',
|
|
771
|
+
description: 'Outputs text to the console for debugging, status updates, or user communication. Useful for providing feedback about automation progress, errors, or important information during test execution.',
|
|
772
|
+
input_schema: {
|
|
773
|
+
type: 'object',
|
|
774
|
+
properties: {
|
|
775
|
+
text: {
|
|
776
|
+
type: 'string',
|
|
777
|
+
description: 'The text to output to the console.',
|
|
778
|
+
},
|
|
779
|
+
},
|
|
780
|
+
required: ['text'],
|
|
781
|
+
},
|
|
782
|
+
};
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
exports.PrintTool = PrintTool;
|
|
@@ -55,6 +55,7 @@ export declare class UiControlClient extends ApiCommands {
|
|
|
55
55
|
stopVideoRecording(): Promise<void>;
|
|
56
56
|
readVideoRecording(): Promise<string>;
|
|
57
57
|
private shouldAnnotateAfterCommandExecution;
|
|
58
|
+
private beforeNoneInferenceCallCommandExecution;
|
|
58
59
|
private afterCommandExecution;
|
|
59
60
|
annotate(annotationRequest?: AnnotationRequest): Promise<Annotation>;
|
|
60
61
|
annotateInteractively(): Promise<void>;
|
|
@@ -475,6 +476,28 @@ export declare class UiControlClient extends ApiCommands {
|
|
|
475
476
|
* @returns {ExpectAllExistResult.elements} - ExpectExistenceElement[].
|
|
476
477
|
*/
|
|
477
478
|
expectAllExist(query: ElementExistsQuery[]): Promise<ExpectAllExistResult>;
|
|
479
|
+
/**
|
|
480
|
+
* Holds down a key on the keyboard.
|
|
481
|
+
*
|
|
482
|
+
* **Examples:**
|
|
483
|
+
* ```typescript
|
|
484
|
+
* await aui.keyDown('a').exec();
|
|
485
|
+
* ```
|
|
486
|
+
*
|
|
487
|
+
* @param {PC_AND_MODIFIER_KEY} key - The key to hold down.
|
|
488
|
+
*/
|
|
489
|
+
keyDown(key: PC_AND_MODIFIER_KEY): Executable;
|
|
490
|
+
/**
|
|
491
|
+
* Releases a key up that was previously held down.
|
|
492
|
+
*
|
|
493
|
+
* **Examples:**
|
|
494
|
+
* ```typescript
|
|
495
|
+
* await aui.keyUp('a').exec();
|
|
496
|
+
* ```
|
|
497
|
+
*
|
|
498
|
+
* @param {PC_AND_MODIFIER_KEY} key - The key to release up.
|
|
499
|
+
*/
|
|
500
|
+
keyUp(key: PC_AND_MODIFIER_KEY): Executable;
|
|
478
501
|
/**
|
|
479
502
|
* Instructs the agent to autonomously achieve a specified goal through UI interactions.
|
|
480
503
|
*
|
|
@@ -573,4 +596,10 @@ export declare class UiControlClient extends ApiCommands {
|
|
|
573
596
|
*/
|
|
574
597
|
act(goal: string, options?: ActOptions): Promise<AgentHistory>;
|
|
575
598
|
act(goal: string, imagePathOrBase64String: string, options?: ActOptions): Promise<AgentHistory>;
|
|
599
|
+
/**
|
|
600
|
+
* Adds tools to the agent that allow it to interact with AI elements.
|
|
601
|
+
*
|
|
602
|
+
* @returns {Promise<void>} - A promise that resolves when the tools are added to the agent.
|
|
603
|
+
*/
|
|
604
|
+
addAIElementsToolsToAgent(): Promise<void>;
|
|
576
605
|
}
|
|
@@ -19,6 +19,7 @@ const ui_control_client_dependency_builder_1 = require("./ui-control-client-depe
|
|
|
19
19
|
const ai_element_collection_1 = require("../core/ai-element/ai-element-collection");
|
|
20
20
|
const retry_strategies_1 = require("./retry-strategies");
|
|
21
21
|
const anthropic_1 = require("../core/models/anthropic");
|
|
22
|
+
const askui_api_tools_1 = require("../core/models/anthropic/tools/askui-api-tools");
|
|
22
23
|
class UiControlClient extends dsl_1.ApiCommands {
|
|
23
24
|
constructor(workspaceId, executionRuntime, stepReporter, aiElementArgs, agent) {
|
|
24
25
|
super();
|
|
@@ -82,6 +83,22 @@ class UiControlClient extends dsl_1.ApiCommands {
|
|
|
82
83
|
return (this.stepReporter.config.withDetectedElements === 'onFailure' && error !== undefined)
|
|
83
84
|
|| (this.stepReporter.config.withDetectedElements === 'always');
|
|
84
85
|
}
|
|
86
|
+
beforeNoneInferenceCallCommandExecution(instruction) {
|
|
87
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
88
|
+
this.stepReporter.resetStep(instruction);
|
|
89
|
+
let annotation;
|
|
90
|
+
if (this.stepReporter.config.withDetectedElements === 'begin'
|
|
91
|
+
|| this.stepReporter.config.withDetectedElements === 'always') {
|
|
92
|
+
annotation = yield this.executionRuntime.annotateImage();
|
|
93
|
+
}
|
|
94
|
+
const createdAt = new Date();
|
|
95
|
+
yield this.stepReporter.onStepBegin({
|
|
96
|
+
createdAt,
|
|
97
|
+
detectedElements: annotation === null || annotation === void 0 ? void 0 : annotation.detected_elements,
|
|
98
|
+
screenshot: annotation === null || annotation === void 0 ? void 0 : annotation.image,
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
}
|
|
85
102
|
afterCommandExecution(instruction, error) {
|
|
86
103
|
return __awaiter(this, void 0, void 0, function* () {
|
|
87
104
|
var _a;
|
|
@@ -151,7 +168,7 @@ class UiControlClient extends dsl_1.ApiCommands {
|
|
|
151
168
|
]);
|
|
152
169
|
logger_1.logger.debug(instruction);
|
|
153
170
|
try {
|
|
154
|
-
|
|
171
|
+
this.stepReporter.resetStep(instruction);
|
|
155
172
|
yield this.executionRuntime.executeInstruction(instruction, modelComposition);
|
|
156
173
|
yield this.afterCommandExecution(instruction);
|
|
157
174
|
return yield Promise.resolve();
|
|
@@ -340,10 +357,14 @@ class UiControlClient extends dsl_1.ApiCommands {
|
|
|
340
357
|
// eslint-disable-next-line class-methods-use-this
|
|
341
358
|
waitFor(delayInMs) {
|
|
342
359
|
return {
|
|
343
|
-
exec() {
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
360
|
+
exec: () => __awaiter(this, void 0, void 0, function* () {
|
|
361
|
+
const stepTitle = `Wait for ${delayInMs} ms`;
|
|
362
|
+
const instruction = yield this.buildInstruction(stepTitle, []);
|
|
363
|
+
yield this.beforeNoneInferenceCallCommandExecution(instruction);
|
|
364
|
+
yield new Promise((resolve) => { setTimeout(resolve, delayInMs); });
|
|
365
|
+
yield this.afterCommandExecution(instruction);
|
|
366
|
+
return Promise.resolve();
|
|
367
|
+
}),
|
|
347
368
|
};
|
|
348
369
|
}
|
|
349
370
|
/**
|
|
@@ -738,12 +759,95 @@ class UiControlClient extends dsl_1.ApiCommands {
|
|
|
738
759
|
};
|
|
739
760
|
});
|
|
740
761
|
}
|
|
762
|
+
/**
|
|
763
|
+
* Holds down a key on the keyboard.
|
|
764
|
+
*
|
|
765
|
+
* **Examples:**
|
|
766
|
+
* ```typescript
|
|
767
|
+
* await aui.keyDown('a').exec();
|
|
768
|
+
* ```
|
|
769
|
+
*
|
|
770
|
+
* @param {PC_AND_MODIFIER_KEY} key - The key to hold down.
|
|
771
|
+
*/
|
|
772
|
+
keyDown(key) {
|
|
773
|
+
return {
|
|
774
|
+
exec: () => __awaiter(this, void 0, void 0, function* () {
|
|
775
|
+
const stepTitle = `Hold down key ${key}`;
|
|
776
|
+
const instruction = yield this.buildInstruction(stepTitle, []);
|
|
777
|
+
try {
|
|
778
|
+
yield this.beforeNoneInferenceCallCommandExecution(instruction);
|
|
779
|
+
yield this.agent.getOsAgentHandler().desktopKeyHoldDown(key, []);
|
|
780
|
+
yield this.afterCommandExecution(instruction);
|
|
781
|
+
}
|
|
782
|
+
catch (error) {
|
|
783
|
+
yield this.afterCommandExecution(instruction, error instanceof Error ? error : new Error(String(error)));
|
|
784
|
+
return Promise.reject(error);
|
|
785
|
+
}
|
|
786
|
+
return Promise.resolve();
|
|
787
|
+
}),
|
|
788
|
+
};
|
|
789
|
+
}
|
|
790
|
+
/**
|
|
791
|
+
* Releases a key up that was previously held down.
|
|
792
|
+
*
|
|
793
|
+
* **Examples:**
|
|
794
|
+
* ```typescript
|
|
795
|
+
* await aui.keyUp('a').exec();
|
|
796
|
+
* ```
|
|
797
|
+
*
|
|
798
|
+
* @param {PC_AND_MODIFIER_KEY} key - The key to release up.
|
|
799
|
+
*/
|
|
800
|
+
keyUp(key) {
|
|
801
|
+
return {
|
|
802
|
+
exec: () => __awaiter(this, void 0, void 0, function* () {
|
|
803
|
+
const stepTitle = `Release key ${key}`;
|
|
804
|
+
const instruction = yield this.buildInstruction(stepTitle, []);
|
|
805
|
+
try {
|
|
806
|
+
yield this.beforeNoneInferenceCallCommandExecution(instruction);
|
|
807
|
+
yield this.agent.getOsAgentHandler().desktopKeyRelease(key, []);
|
|
808
|
+
yield this.afterCommandExecution(instruction);
|
|
809
|
+
}
|
|
810
|
+
catch (error) {
|
|
811
|
+
yield this.afterCommandExecution(instruction, error instanceof Error ? error : new Error(String(error)));
|
|
812
|
+
return Promise.reject(error);
|
|
813
|
+
}
|
|
814
|
+
return Promise.resolve();
|
|
815
|
+
}),
|
|
816
|
+
};
|
|
817
|
+
}
|
|
741
818
|
act(goal, imageOrOptions, options) {
|
|
742
819
|
return __awaiter(this, void 0, void 0, function* () {
|
|
743
820
|
if (typeof imageOrOptions === 'string') {
|
|
744
821
|
return this.agent.act(goal, imageOrOptions, options);
|
|
745
822
|
}
|
|
746
|
-
|
|
823
|
+
const fullTitle = `Act: ${goal}`;
|
|
824
|
+
const stepTitle = fullTitle.length > 50 ? `${fullTitle.substring(0, 47)}...` : fullTitle;
|
|
825
|
+
const instruction = yield this.buildInstruction(stepTitle, []);
|
|
826
|
+
try {
|
|
827
|
+
yield this.beforeNoneInferenceCallCommandExecution(instruction);
|
|
828
|
+
const result = yield this.agent.act(goal, undefined, imageOrOptions);
|
|
829
|
+
yield this.afterCommandExecution(instruction);
|
|
830
|
+
return result;
|
|
831
|
+
}
|
|
832
|
+
catch (error) {
|
|
833
|
+
yield this.afterCommandExecution(instruction, error instanceof Error ? error : new Error(String(error)));
|
|
834
|
+
return Promise.reject(error);
|
|
835
|
+
}
|
|
836
|
+
});
|
|
837
|
+
}
|
|
838
|
+
/**
|
|
839
|
+
* Adds tools to the agent that allow it to interact with AI elements.
|
|
840
|
+
*
|
|
841
|
+
* @returns {Promise<void>} - A promise that resolves when the tools are added to the agent.
|
|
842
|
+
*/
|
|
843
|
+
addAIElementsToolsToAgent() {
|
|
844
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
845
|
+
const aiElementLocator = (aiElementName) => this.get().aiElement(aiElementName).exec();
|
|
846
|
+
const askUIGetAskUIElementTool = new askui_api_tools_1.AskUIGetAskUIElementTool(this.agent.getOsAgentHandler(), aiElementLocator, 'aiElement');
|
|
847
|
+
this.agent.addTool(askUIGetAskUIElementTool);
|
|
848
|
+
const listAIElementNamesFunction = () => (ai_element_collection_1.AIElementCollection.collectAIElements(this.workspaceId, this.aiElementArgs)).then((aiElementCollection) => aiElementCollection.getNames());
|
|
849
|
+
const askUIListAIElementTool = new askui_api_tools_1.AskUIListAIElementTool(listAIElementNamesFunction);
|
|
850
|
+
this.agent.addTool(askUIListAIElementTool);
|
|
747
851
|
});
|
|
748
852
|
}
|
|
749
853
|
}
|
|
@@ -185,7 +185,7 @@ class CreateExampleProject {
|
|
|
185
185
|
return __awaiter(this, void 0, void 0, function* () {
|
|
186
186
|
const runCommand = (0, util_1.promisify)(child_process_1.exec);
|
|
187
187
|
const frameworkDependencies = {
|
|
188
|
-
jest: 'npm i -D @askui/askui-reporters typescript ts-node @types/jest ts-jest jest @askui/jest-allure-circus eslint @typescript-eslint/parser @typescript-eslint/eslint-plugin eslint-plugin-import @askui/eslint-plugin-askui hpagent',
|
|
188
|
+
jest: 'npm i -D @askui/askui-reporters typescript ts-node @types/jest@30.0.0 ts-jest@29.4.0 jest@29.7.0 @askui/jest-allure-circus eslint @typescript-eslint/parser @typescript-eslint/eslint-plugin eslint-plugin-import @askui/eslint-plugin-askui hpagent',
|
|
189
189
|
};
|
|
190
190
|
yield runCommand(frameworkDependencies.jest);
|
|
191
191
|
});
|
package/dist/cjs/main.d.ts
CHANGED
|
@@ -4,4 +4,4 @@ export { Instruction, Reporter, ReporterConfig, Snapshot, SnapshotDetailLevel, S
|
|
|
4
4
|
export { Annotation } from './core/annotation/annotation';
|
|
5
5
|
export { DetectedElement } from './core/model/annotation-result/detected-element';
|
|
6
6
|
export { LogLevels } from './shared';
|
|
7
|
-
export { ToolFailure, ToolError, BaseAgentTool } from './core/models/anthropic';
|
|
7
|
+
export { ToolFailure, ToolError, BaseAgentTool, BetaTool, ToolResult, } from './core/models/anthropic';
|
|
@@ -8,5 +8,6 @@ export declare class AIElementCollection {
|
|
|
8
8
|
static collectAIElements(workspaceId: string | undefined, aiElementArgs: AIElementArgs): Promise<AIElementCollection>;
|
|
9
9
|
getByName(name: string): CustomElementJson[];
|
|
10
10
|
getByNames(names: string[]): CustomElementJson[];
|
|
11
|
+
getNames(): string[];
|
|
11
12
|
private static CollectAiElementsFromLocation;
|
|
12
13
|
}
|
|
@@ -67,6 +67,9 @@ export class AIElementCollection {
|
|
|
67
67
|
}
|
|
68
68
|
return names.flatMap((name) => this.getByName(name));
|
|
69
69
|
}
|
|
70
|
+
getNames() {
|
|
71
|
+
return [...new Set(this.elements.map((element) => element.name))];
|
|
72
|
+
}
|
|
70
73
|
static CollectAiElementsFromLocation(aiElementLocation) {
|
|
71
74
|
const files = fs.readdirSync(aiElementLocation);
|
|
72
75
|
if (files.length === 0) {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { OsAgentHandler } from './tools/os-agent-tools';
|
|
1
2
|
import { ClaudeAgent } from './claude-agent';
|
|
2
3
|
import { ExecutionRuntime } from '../../../execution/execution-runtime';
|
|
3
4
|
export declare class AskUIAgent extends ClaudeAgent {
|
|
@@ -6,6 +7,7 @@ export declare class AskUIAgent extends ClaudeAgent {
|
|
|
6
7
|
constructor(executionRuntime: ExecutionRuntime);
|
|
7
8
|
isConnected(): boolean;
|
|
8
9
|
initializeOsAgentHandler(): Promise<void>;
|
|
10
|
+
getOsAgentHandler(): OsAgentHandler;
|
|
9
11
|
configureAsDesktopAgent(): Promise<void>;
|
|
10
12
|
configureAsAndroidAgent(): Promise<void>;
|
|
11
13
|
private static DesktopSystemPrompt;
|
|
@@ -7,7 +7,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
7
7
|
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
8
|
});
|
|
9
9
|
};
|
|
10
|
-
import { DesktopPressAndReleaseKeysTool, MouseClickTool, MouseMoveTool, MouseScrollTool, OsAgentHandler, ScreenShotTool, TypeTool, AgentErrorTool, AndroidSequenceKeyPressTool, AndroidSingleKeyPressTool, ExecuteShellCommandTool, DesktopKeyHoldDownTool, DesktopKeyReleaseTool, MouseReleaseLeftButtonTool, MouseHoldLeftButtonDownTool, MouseDragAndDropTool, WaitTool, } from './tools/os-agent-tools';
|
|
10
|
+
import { DesktopPressAndReleaseKeysTool, MouseClickTool, MouseMoveTool, MouseScrollTool, OsAgentHandler, ScreenShotTool, TypeTool, AgentErrorTool, AndroidSequenceKeyPressTool, AndroidSingleKeyPressTool, ExecuteShellCommandTool, DesktopKeyHoldDownTool, DesktopKeyReleaseTool, MouseReleaseLeftButtonTool, MouseHoldLeftButtonDownTool, MouseDragAndDropTool, WaitTool, PrintTool, } from './tools/os-agent-tools';
|
|
11
11
|
import { ClaudeAgent } from './claude-agent';
|
|
12
12
|
export class AskUIAgent extends ClaudeAgent {
|
|
13
13
|
constructor(executionRuntime) {
|
|
@@ -23,6 +23,12 @@ export class AskUIAgent extends ClaudeAgent {
|
|
|
23
23
|
this.osAgentHandler = yield OsAgentHandler.createInstance(this.executionRuntime);
|
|
24
24
|
});
|
|
25
25
|
}
|
|
26
|
+
getOsAgentHandler() {
|
|
27
|
+
if (!this.osAgentHandler) {
|
|
28
|
+
throw new Error('Agent OS client is not connected');
|
|
29
|
+
}
|
|
30
|
+
return this.osAgentHandler;
|
|
31
|
+
}
|
|
26
32
|
configureAsDesktopAgent() {
|
|
27
33
|
return __awaiter(this, void 0, void 0, function* () {
|
|
28
34
|
if (!this.osAgentHandler) {
|
|
@@ -30,6 +36,7 @@ export class AskUIAgent extends ClaudeAgent {
|
|
|
30
36
|
}
|
|
31
37
|
const tools = [
|
|
32
38
|
new AgentErrorTool(),
|
|
39
|
+
new PrintTool(),
|
|
33
40
|
new ScreenShotTool(this.osAgentHandler),
|
|
34
41
|
new MouseMoveTool(this.osAgentHandler),
|
|
35
42
|
new MouseClickTool(this.osAgentHandler),
|
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
export { AskUIAgent } from './askui-agent';
|
|
2
|
-
export { ToolFailure, ToolError, BaseAgentTool } from './tools/base';
|
|
2
|
+
export { ToolFailure, ToolError, BaseAgentTool, } from './tools/base';
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { DetectedElement } from '../../../model/annotation-result/detected-element';
|
|
2
|
+
import { BaseAgentTool, ToolResult, BetaTool } from './base';
|
|
3
|
+
import { OsAgentHandler } from './os-agent-tools';
|
|
4
|
+
export declare class AskUIGetAskUIElementTool extends BaseAgentTool {
|
|
5
|
+
private osAgentHandler;
|
|
6
|
+
private locatorFunction;
|
|
7
|
+
private elementType;
|
|
8
|
+
constructor(osAgentHandler: OsAgentHandler, locatorFunction: (aiElementName: string) => Promise<DetectedElement[]>, elementType: string);
|
|
9
|
+
execute(params: {
|
|
10
|
+
elementName: string;
|
|
11
|
+
}): Promise<ToolResult>;
|
|
12
|
+
toParams(): BetaTool;
|
|
13
|
+
}
|
|
14
|
+
export declare class AskUIListAIElementTool extends BaseAgentTool {
|
|
15
|
+
private listFunction;
|
|
16
|
+
constructor(listFunction: () => Promise<string[]>);
|
|
17
|
+
execute(): Promise<ToolResult>;
|
|
18
|
+
toParams(): BetaTool;
|
|
19
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { BaseAgentTool } from './base';
|
|
11
|
+
export class AskUIGetAskUIElementTool extends BaseAgentTool {
|
|
12
|
+
constructor(osAgentHandler, locatorFunction, elementType) {
|
|
13
|
+
super();
|
|
14
|
+
this.osAgentHandler = osAgentHandler;
|
|
15
|
+
this.locatorFunction = locatorFunction;
|
|
16
|
+
this.elementType = elementType;
|
|
17
|
+
}
|
|
18
|
+
execute(params) {
|
|
19
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
20
|
+
const { elementName } = params;
|
|
21
|
+
const detectedElements = yield this.locatorFunction(elementName);
|
|
22
|
+
const scaledElementsBoundingBoxes = detectedElements.map((element) => {
|
|
23
|
+
const xMid = (element.bndbox.xmin + element.bndbox.xmax) / 2;
|
|
24
|
+
const yMid = (element.bndbox.ymin + element.bndbox.ymax) / 2;
|
|
25
|
+
const [x, y] = this.osAgentHandler.scaleCoordinates('computer', xMid, yMid);
|
|
26
|
+
return {
|
|
27
|
+
x, y,
|
|
28
|
+
};
|
|
29
|
+
});
|
|
30
|
+
return {
|
|
31
|
+
output: `Found ${scaledElementsBoundingBoxes.length} elements of type ${this.elementType}. center coordinates: ${JSON.stringify(scaledElementsBoundingBoxes)}`,
|
|
32
|
+
};
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
toParams() {
|
|
36
|
+
return {
|
|
37
|
+
description: `Locates and retrieves the bounding box coordinates of AskUI ${this.elementType} elements on the screen. ` +
|
|
38
|
+
`This tool is essential for UI automation as it provides the exact pixel coordinates needed to interact with UI elements. ` +
|
|
39
|
+
`The coordinates returned can be used for clicking, hovering, or other mouse interactions. ` +
|
|
40
|
+
`Use this tool when you need to find and interact with specific ${this.elementType} UI elements by their semantic names.`,
|
|
41
|
+
input_schema: {
|
|
42
|
+
properties: {
|
|
43
|
+
elementName: {
|
|
44
|
+
type: 'string',
|
|
45
|
+
description: `The semantic name or identifier of the ${this.elementType} element to locate on the screen. `
|
|
46
|
+
},
|
|
47
|
+
},
|
|
48
|
+
required: ['elementName'],
|
|
49
|
+
type: 'object',
|
|
50
|
+
},
|
|
51
|
+
name: `get_askui_${this.elementType}_element_tool`,
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
export class AskUIListAIElementTool extends BaseAgentTool {
|
|
56
|
+
constructor(listFunction) {
|
|
57
|
+
super();
|
|
58
|
+
this.listFunction = listFunction;
|
|
59
|
+
}
|
|
60
|
+
execute() {
|
|
61
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
62
|
+
const elementNames = yield this.listFunction();
|
|
63
|
+
return {
|
|
64
|
+
output: `Found ${elementNames.length} element names that can be used to retrieve bounding boxes. Names: ${JSON.stringify(elementNames)}`,
|
|
65
|
+
};
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
toParams() {
|
|
69
|
+
return {
|
|
70
|
+
description: 'Retrieves a comprehensive list of all valid AskUI AI element names that can be used for element location and interaction. ' +
|
|
71
|
+
'The returned names can be used as input for the get_askui_aiElement_element_tool to locate specific ai elements. ',
|
|
72
|
+
input_schema: { type: 'object', properties: {}, required: [] },
|
|
73
|
+
name: 'list_ai_element_names_tool',
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
}
|
|
@@ -24,7 +24,7 @@ export declare class OsAgentHandler {
|
|
|
24
24
|
};
|
|
25
25
|
setTargetResolution(width: number, height: number): void;
|
|
26
26
|
takeScreenshot(): Promise<string>;
|
|
27
|
-
|
|
27
|
+
scaleCoordinates(source: 'api' | 'computer', x: number, y: number): [number, number];
|
|
28
28
|
requestControl(controlCommand: ControlCommand): Promise<void>;
|
|
29
29
|
mouseMove(x: number, y: number): Promise<void>;
|
|
30
30
|
mouseClick(button: "left" | "right" | "middle", doubleClick: boolean): Promise<void>;
|
|
@@ -35,6 +35,9 @@ export declare class OsAgentHandler {
|
|
|
35
35
|
desktopKeyHoldDown(key: PC_AND_MODIFIER_KEY, modifiers?: MODIFIER_KEY[]): Promise<void>;
|
|
36
36
|
desktopKeyRelease(key: PC_AND_MODIFIER_KEY, modifiers?: MODIFIER_KEY[]): Promise<void>;
|
|
37
37
|
typeText(text: string): Promise<void>;
|
|
38
|
+
androidKeyPress(key: ANDROID_KEY): Promise<void>;
|
|
39
|
+
androidKeySequencePress(keys: ANDROID_KEY[]): Promise<void>;
|
|
40
|
+
executeShellCommand(command: string): Promise<void>;
|
|
38
41
|
}
|
|
39
42
|
export declare class ScreenShotTool extends BaseAgentTool {
|
|
40
43
|
private osAgentHandler;
|
|
@@ -165,3 +168,10 @@ export declare class WaitTool extends BaseAgentTool {
|
|
|
165
168
|
}): Promise<ToolResult>;
|
|
166
169
|
toParams(): BetaTool;
|
|
167
170
|
}
|
|
171
|
+
export declare class PrintTool extends BaseAgentTool {
|
|
172
|
+
constructor();
|
|
173
|
+
execute(command: {
|
|
174
|
+
text: string;
|
|
175
|
+
}): Promise<ToolResult>;
|
|
176
|
+
toParams(): BetaTool;
|
|
177
|
+
}
|
|
@@ -202,6 +202,24 @@ export class OsAgentHandler {
|
|
|
202
202
|
yield this.requestControl(controlCommand);
|
|
203
203
|
});
|
|
204
204
|
}
|
|
205
|
+
androidKeyPress(key) {
|
|
206
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
207
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_ANDROID_SINGLE_KEY, { x: 0, y: 0 }, key, {})]);
|
|
208
|
+
yield this.requestControl(controlCommand);
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
androidKeySequencePress(keys) {
|
|
212
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
213
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_ANDROID_KEY_SEQUENCE, { x: 0, y: 0 }, keys.join(' '), {})]);
|
|
214
|
+
yield this.requestControl(controlCommand);
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
executeShellCommand(command) {
|
|
218
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
219
|
+
const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.EXECUTE_COMMAND, { x: 0, y: 0 }, command, {})]);
|
|
220
|
+
yield this.requestControl(controlCommand);
|
|
221
|
+
});
|
|
222
|
+
}
|
|
205
223
|
}
|
|
206
224
|
export class ScreenShotTool extends BaseAgentTool {
|
|
207
225
|
constructor(osAgentHandler) {
|
|
@@ -542,8 +560,7 @@ export class TypeTool extends BaseAgentTool {
|
|
|
542
560
|
}
|
|
543
561
|
execute(command) {
|
|
544
562
|
return __awaiter(this, void 0, void 0, function* () {
|
|
545
|
-
|
|
546
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
563
|
+
yield this.osAgentHandler.typeText(command.text);
|
|
547
564
|
return {
|
|
548
565
|
output: `Typed text: ${command.text}`,
|
|
549
566
|
};
|
|
@@ -573,8 +590,7 @@ export class AndroidSingleKeyPressTool extends BaseAgentTool {
|
|
|
573
590
|
}
|
|
574
591
|
execute(command) {
|
|
575
592
|
return __awaiter(this, void 0, void 0, function* () {
|
|
576
|
-
|
|
577
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
593
|
+
yield this.osAgentHandler.androidKeyPress(command.key);
|
|
578
594
|
return {
|
|
579
595
|
output: `Pressed Android key ${command.key}`,
|
|
580
596
|
};
|
|
@@ -605,8 +621,7 @@ export class AndroidSequenceKeyPressTool extends BaseAgentTool {
|
|
|
605
621
|
}
|
|
606
622
|
execute(command) {
|
|
607
623
|
return __awaiter(this, void 0, void 0, function* () {
|
|
608
|
-
|
|
609
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
624
|
+
yield this.osAgentHandler.androidKeySequencePress(command.keys);
|
|
610
625
|
return {
|
|
611
626
|
output: `Pressed Android keys: ${command.keys.join(', ')}`,
|
|
612
627
|
};
|
|
@@ -645,7 +660,7 @@ export class AgentErrorTool extends BaseAgentTool {
|
|
|
645
660
|
toParams() {
|
|
646
661
|
return {
|
|
647
662
|
name: 'agent_error_tool',
|
|
648
|
-
description: '
|
|
663
|
+
description: 'Intentionally raises an error to signal that the agent cannot proceed with the current task. Use this when the agent encounters an unsolvable problem, gets stuck in a loop, or needs to communicate a critical failure that prevents further automation.',
|
|
649
664
|
input_schema: {
|
|
650
665
|
type: 'object',
|
|
651
666
|
properties: {
|
|
@@ -666,8 +681,7 @@ export class ExecuteShellCommandTool extends BaseAgentTool {
|
|
|
666
681
|
}
|
|
667
682
|
execute(command) {
|
|
668
683
|
return __awaiter(this, void 0, void 0, function* () {
|
|
669
|
-
|
|
670
|
-
yield this.osAgentHandler.requestControl(controlCommand);
|
|
684
|
+
yield this.osAgentHandler.executeShellCommand(command.command);
|
|
671
685
|
return {
|
|
672
686
|
output: `Executed shell command: ${command.command}`,
|
|
673
687
|
};
|
|
@@ -719,3 +733,32 @@ export class WaitTool extends BaseAgentTool {
|
|
|
719
733
|
};
|
|
720
734
|
}
|
|
721
735
|
}
|
|
736
|
+
export class PrintTool extends BaseAgentTool {
|
|
737
|
+
constructor() {
|
|
738
|
+
super();
|
|
739
|
+
}
|
|
740
|
+
execute(command) {
|
|
741
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
742
|
+
console.log(command.text);
|
|
743
|
+
return {
|
|
744
|
+
output: `Printed text: ${command.text}`,
|
|
745
|
+
};
|
|
746
|
+
});
|
|
747
|
+
}
|
|
748
|
+
toParams() {
|
|
749
|
+
return {
|
|
750
|
+
name: 'print_tool',
|
|
751
|
+
description: 'Outputs text to the console for debugging, status updates, or user communication. Useful for providing feedback about automation progress, errors, or important information during test execution.',
|
|
752
|
+
input_schema: {
|
|
753
|
+
type: 'object',
|
|
754
|
+
properties: {
|
|
755
|
+
text: {
|
|
756
|
+
type: 'string',
|
|
757
|
+
description: 'The text to output to the console.',
|
|
758
|
+
},
|
|
759
|
+
},
|
|
760
|
+
required: ['text'],
|
|
761
|
+
},
|
|
762
|
+
};
|
|
763
|
+
}
|
|
764
|
+
}
|
|
@@ -55,6 +55,7 @@ export declare class UiControlClient extends ApiCommands {
|
|
|
55
55
|
stopVideoRecording(): Promise<void>;
|
|
56
56
|
readVideoRecording(): Promise<string>;
|
|
57
57
|
private shouldAnnotateAfterCommandExecution;
|
|
58
|
+
private beforeNoneInferenceCallCommandExecution;
|
|
58
59
|
private afterCommandExecution;
|
|
59
60
|
annotate(annotationRequest?: AnnotationRequest): Promise<Annotation>;
|
|
60
61
|
annotateInteractively(): Promise<void>;
|
|
@@ -475,6 +476,28 @@ export declare class UiControlClient extends ApiCommands {
|
|
|
475
476
|
* @returns {ExpectAllExistResult.elements} - ExpectExistenceElement[].
|
|
476
477
|
*/
|
|
477
478
|
expectAllExist(query: ElementExistsQuery[]): Promise<ExpectAllExistResult>;
|
|
479
|
+
/**
|
|
480
|
+
* Holds down a key on the keyboard.
|
|
481
|
+
*
|
|
482
|
+
* **Examples:**
|
|
483
|
+
* ```typescript
|
|
484
|
+
* await aui.keyDown('a').exec();
|
|
485
|
+
* ```
|
|
486
|
+
*
|
|
487
|
+
* @param {PC_AND_MODIFIER_KEY} key - The key to hold down.
|
|
488
|
+
*/
|
|
489
|
+
keyDown(key: PC_AND_MODIFIER_KEY): Executable;
|
|
490
|
+
/**
|
|
491
|
+
* Releases a key up that was previously held down.
|
|
492
|
+
*
|
|
493
|
+
* **Examples:**
|
|
494
|
+
* ```typescript
|
|
495
|
+
* await aui.keyUp('a').exec();
|
|
496
|
+
* ```
|
|
497
|
+
*
|
|
498
|
+
* @param {PC_AND_MODIFIER_KEY} key - The key to release up.
|
|
499
|
+
*/
|
|
500
|
+
keyUp(key: PC_AND_MODIFIER_KEY): Executable;
|
|
478
501
|
/**
|
|
479
502
|
* Instructs the agent to autonomously achieve a specified goal through UI interactions.
|
|
480
503
|
*
|
|
@@ -573,4 +596,10 @@ export declare class UiControlClient extends ApiCommands {
|
|
|
573
596
|
*/
|
|
574
597
|
act(goal: string, options?: ActOptions): Promise<AgentHistory>;
|
|
575
598
|
act(goal: string, imagePathOrBase64String: string, options?: ActOptions): Promise<AgentHistory>;
|
|
599
|
+
/**
|
|
600
|
+
* Adds tools to the agent that allow it to interact with AI elements.
|
|
601
|
+
*
|
|
602
|
+
* @returns {Promise<void>} - A promise that resolves when the tools are added to the agent.
|
|
603
|
+
*/
|
|
604
|
+
addAIElementsToolsToAgent(): Promise<void>;
|
|
576
605
|
}
|
|
@@ -16,6 +16,7 @@ import { UiControlClientDependencyBuilder } from './ui-control-client-dependency
|
|
|
16
16
|
import { AIElementCollection } from '../core/ai-element/ai-element-collection';
|
|
17
17
|
import { NoRetryStrategy } from './retry-strategies';
|
|
18
18
|
import { AskUIAgent } from '../core/models/anthropic';
|
|
19
|
+
import { AskUIGetAskUIElementTool, AskUIListAIElementTool } from '../core/models/anthropic/tools/askui-api-tools';
|
|
19
20
|
export class UiControlClient extends ApiCommands {
|
|
20
21
|
constructor(workspaceId, executionRuntime, stepReporter, aiElementArgs, agent) {
|
|
21
22
|
super();
|
|
@@ -79,6 +80,22 @@ export class UiControlClient extends ApiCommands {
|
|
|
79
80
|
return (this.stepReporter.config.withDetectedElements === 'onFailure' && error !== undefined)
|
|
80
81
|
|| (this.stepReporter.config.withDetectedElements === 'always');
|
|
81
82
|
}
|
|
83
|
+
beforeNoneInferenceCallCommandExecution(instruction) {
|
|
84
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
85
|
+
this.stepReporter.resetStep(instruction);
|
|
86
|
+
let annotation;
|
|
87
|
+
if (this.stepReporter.config.withDetectedElements === 'begin'
|
|
88
|
+
|| this.stepReporter.config.withDetectedElements === 'always') {
|
|
89
|
+
annotation = yield this.executionRuntime.annotateImage();
|
|
90
|
+
}
|
|
91
|
+
const createdAt = new Date();
|
|
92
|
+
yield this.stepReporter.onStepBegin({
|
|
93
|
+
createdAt,
|
|
94
|
+
detectedElements: annotation === null || annotation === void 0 ? void 0 : annotation.detected_elements,
|
|
95
|
+
screenshot: annotation === null || annotation === void 0 ? void 0 : annotation.image,
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
}
|
|
82
99
|
afterCommandExecution(instruction, error) {
|
|
83
100
|
return __awaiter(this, void 0, void 0, function* () {
|
|
84
101
|
var _a;
|
|
@@ -148,7 +165,7 @@ export class UiControlClient extends ApiCommands {
|
|
|
148
165
|
]);
|
|
149
166
|
logger.debug(instruction);
|
|
150
167
|
try {
|
|
151
|
-
|
|
168
|
+
this.stepReporter.resetStep(instruction);
|
|
152
169
|
yield this.executionRuntime.executeInstruction(instruction, modelComposition);
|
|
153
170
|
yield this.afterCommandExecution(instruction);
|
|
154
171
|
return yield Promise.resolve();
|
|
@@ -337,10 +354,14 @@ export class UiControlClient extends ApiCommands {
|
|
|
337
354
|
// eslint-disable-next-line class-methods-use-this
|
|
338
355
|
waitFor(delayInMs) {
|
|
339
356
|
return {
|
|
340
|
-
exec() {
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
357
|
+
exec: () => __awaiter(this, void 0, void 0, function* () {
|
|
358
|
+
const stepTitle = `Wait for ${delayInMs} ms`;
|
|
359
|
+
const instruction = yield this.buildInstruction(stepTitle, []);
|
|
360
|
+
yield this.beforeNoneInferenceCallCommandExecution(instruction);
|
|
361
|
+
yield new Promise((resolve) => { setTimeout(resolve, delayInMs); });
|
|
362
|
+
yield this.afterCommandExecution(instruction);
|
|
363
|
+
return Promise.resolve();
|
|
364
|
+
}),
|
|
344
365
|
};
|
|
345
366
|
}
|
|
346
367
|
/**
|
|
@@ -735,12 +756,95 @@ export class UiControlClient extends ApiCommands {
|
|
|
735
756
|
};
|
|
736
757
|
});
|
|
737
758
|
}
|
|
759
|
+
/**
|
|
760
|
+
* Holds down a key on the keyboard.
|
|
761
|
+
*
|
|
762
|
+
* **Examples:**
|
|
763
|
+
* ```typescript
|
|
764
|
+
* await aui.keyDown('a').exec();
|
|
765
|
+
* ```
|
|
766
|
+
*
|
|
767
|
+
* @param {PC_AND_MODIFIER_KEY} key - The key to hold down.
|
|
768
|
+
*/
|
|
769
|
+
keyDown(key) {
|
|
770
|
+
return {
|
|
771
|
+
exec: () => __awaiter(this, void 0, void 0, function* () {
|
|
772
|
+
const stepTitle = `Hold down key ${key}`;
|
|
773
|
+
const instruction = yield this.buildInstruction(stepTitle, []);
|
|
774
|
+
try {
|
|
775
|
+
yield this.beforeNoneInferenceCallCommandExecution(instruction);
|
|
776
|
+
yield this.agent.getOsAgentHandler().desktopKeyHoldDown(key, []);
|
|
777
|
+
yield this.afterCommandExecution(instruction);
|
|
778
|
+
}
|
|
779
|
+
catch (error) {
|
|
780
|
+
yield this.afterCommandExecution(instruction, error instanceof Error ? error : new Error(String(error)));
|
|
781
|
+
return Promise.reject(error);
|
|
782
|
+
}
|
|
783
|
+
return Promise.resolve();
|
|
784
|
+
}),
|
|
785
|
+
};
|
|
786
|
+
}
|
|
787
|
+
/**
|
|
788
|
+
* Releases a key up that was previously held down.
|
|
789
|
+
*
|
|
790
|
+
* **Examples:**
|
|
791
|
+
* ```typescript
|
|
792
|
+
* await aui.keyUp('a').exec();
|
|
793
|
+
* ```
|
|
794
|
+
*
|
|
795
|
+
* @param {PC_AND_MODIFIER_KEY} key - The key to release up.
|
|
796
|
+
*/
|
|
797
|
+
keyUp(key) {
|
|
798
|
+
return {
|
|
799
|
+
exec: () => __awaiter(this, void 0, void 0, function* () {
|
|
800
|
+
const stepTitle = `Release key ${key}`;
|
|
801
|
+
const instruction = yield this.buildInstruction(stepTitle, []);
|
|
802
|
+
try {
|
|
803
|
+
yield this.beforeNoneInferenceCallCommandExecution(instruction);
|
|
804
|
+
yield this.agent.getOsAgentHandler().desktopKeyRelease(key, []);
|
|
805
|
+
yield this.afterCommandExecution(instruction);
|
|
806
|
+
}
|
|
807
|
+
catch (error) {
|
|
808
|
+
yield this.afterCommandExecution(instruction, error instanceof Error ? error : new Error(String(error)));
|
|
809
|
+
return Promise.reject(error);
|
|
810
|
+
}
|
|
811
|
+
return Promise.resolve();
|
|
812
|
+
}),
|
|
813
|
+
};
|
|
814
|
+
}
|
|
738
815
|
act(goal, imageOrOptions, options) {
|
|
739
816
|
return __awaiter(this, void 0, void 0, function* () {
|
|
740
817
|
if (typeof imageOrOptions === 'string') {
|
|
741
818
|
return this.agent.act(goal, imageOrOptions, options);
|
|
742
819
|
}
|
|
743
|
-
|
|
820
|
+
const fullTitle = `Act: ${goal}`;
|
|
821
|
+
const stepTitle = fullTitle.length > 50 ? `${fullTitle.substring(0, 47)}...` : fullTitle;
|
|
822
|
+
const instruction = yield this.buildInstruction(stepTitle, []);
|
|
823
|
+
try {
|
|
824
|
+
yield this.beforeNoneInferenceCallCommandExecution(instruction);
|
|
825
|
+
const result = yield this.agent.act(goal, undefined, imageOrOptions);
|
|
826
|
+
yield this.afterCommandExecution(instruction);
|
|
827
|
+
return result;
|
|
828
|
+
}
|
|
829
|
+
catch (error) {
|
|
830
|
+
yield this.afterCommandExecution(instruction, error instanceof Error ? error : new Error(String(error)));
|
|
831
|
+
return Promise.reject(error);
|
|
832
|
+
}
|
|
833
|
+
});
|
|
834
|
+
}
|
|
835
|
+
/**
|
|
836
|
+
* Adds tools to the agent that allow it to interact with AI elements.
|
|
837
|
+
*
|
|
838
|
+
* @returns {Promise<void>} - A promise that resolves when the tools are added to the agent.
|
|
839
|
+
*/
|
|
840
|
+
addAIElementsToolsToAgent() {
|
|
841
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
842
|
+
const aiElementLocator = (aiElementName) => this.get().aiElement(aiElementName).exec();
|
|
843
|
+
const askUIGetAskUIElementTool = new AskUIGetAskUIElementTool(this.agent.getOsAgentHandler(), aiElementLocator, 'aiElement');
|
|
844
|
+
this.agent.addTool(askUIGetAskUIElementTool);
|
|
845
|
+
const listAIElementNamesFunction = () => (AIElementCollection.collectAIElements(this.workspaceId, this.aiElementArgs)).then((aiElementCollection) => aiElementCollection.getNames());
|
|
846
|
+
const askUIListAIElementTool = new AskUIListAIElementTool(listAIElementNamesFunction);
|
|
847
|
+
this.agent.addTool(askUIListAIElementTool);
|
|
744
848
|
});
|
|
745
849
|
}
|
|
746
850
|
}
|
|
@@ -179,7 +179,7 @@ export class CreateExampleProject {
|
|
|
179
179
|
return __awaiter(this, void 0, void 0, function* () {
|
|
180
180
|
const runCommand = promisify(exec);
|
|
181
181
|
const frameworkDependencies = {
|
|
182
|
-
jest: 'npm i -D @askui/askui-reporters typescript ts-node @types/jest ts-jest jest @askui/jest-allure-circus eslint @typescript-eslint/parser @typescript-eslint/eslint-plugin eslint-plugin-import @askui/eslint-plugin-askui hpagent',
|
|
182
|
+
jest: 'npm i -D @askui/askui-reporters typescript ts-node @types/jest@30.0.0 ts-jest@29.4.0 jest@29.7.0 @askui/jest-allure-circus eslint @typescript-eslint/parser @typescript-eslint/eslint-plugin eslint-plugin-import @askui/eslint-plugin-askui hpagent',
|
|
183
183
|
};
|
|
184
184
|
yield runCommand(frameworkDependencies.jest);
|
|
185
185
|
});
|
package/dist/esm/main.d.ts
CHANGED
|
@@ -4,4 +4,4 @@ export { Instruction, Reporter, ReporterConfig, Snapshot, SnapshotDetailLevel, S
|
|
|
4
4
|
export { Annotation } from './core/annotation/annotation';
|
|
5
5
|
export { DetectedElement } from './core/model/annotation-result/detected-element';
|
|
6
6
|
export { LogLevels } from './shared';
|
|
7
|
-
export { ToolFailure, ToolError, BaseAgentTool } from './core/models/anthropic';
|
|
7
|
+
export { ToolFailure, ToolError, BaseAgentTool, BetaTool, ToolResult, } from './core/models/anthropic';
|
package/dist/esm/main.js
CHANGED
|
@@ -3,4 +3,4 @@ export * from './execution';
|
|
|
3
3
|
export { Annotation } from './core/annotation/annotation';
|
|
4
4
|
export { DetectedElement } from './core/model/annotation-result/detected-element';
|
|
5
5
|
export { LogLevels } from './shared';
|
|
6
|
-
export { ToolFailure, ToolError, BaseAgentTool } from './core/models/anthropic';
|
|
6
|
+
export { ToolFailure, ToolError, BaseAgentTool, } from './core/models/anthropic';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "askui",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.27.0",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": "askui GmbH <info@askui.com> (http://www.askui.com/)",
|
|
6
6
|
"description": "Reliable, automated end-to-end-testing that depends on what is shown on your screen instead of the technology you are running on",
|