askui 0.24.0 → 0.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/core/models/anthropic/askui-agent.d.ts +13 -0
- package/dist/cjs/core/models/anthropic/askui-agent.js +195 -0
- package/dist/cjs/core/models/anthropic/claude-agent.d.ts +40 -0
- package/dist/cjs/core/models/anthropic/claude-agent.js +200 -0
- package/dist/cjs/core/models/anthropic/index.d.ts +4 -0
- package/dist/cjs/core/models/anthropic/index.js +9 -0
- package/dist/cjs/core/models/anthropic/tools/agent-errors.d.ts +2 -0
- package/dist/cjs/core/models/anthropic/tools/agent-errors.js +6 -0
- package/dist/cjs/core/models/anthropic/tools/base.d.ts +24 -0
- package/dist/cjs/core/models/anthropic/tools/base.js +66 -0
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +113 -0
- package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +476 -0
- package/dist/cjs/core/ui-control-commands/index.d.ts +2 -0
- package/dist/cjs/core/ui-control-commands/index.js +5 -1
- package/dist/cjs/execution/dsl.d.ts +7 -4
- package/dist/cjs/execution/dsl.js +4 -1
- package/dist/cjs/execution/execution-runtime.d.ts +11 -1
- package/dist/cjs/execution/execution-runtime.js +5 -0
- package/dist/cjs/execution/inference-client.d.ts +9 -0
- package/dist/cjs/execution/inference-client.js +13 -5
- package/dist/cjs/execution/ui-control-client-dependency-builder.js +1 -1
- package/dist/cjs/execution/ui-control-client.d.ts +50 -0
- package/dist/cjs/execution/ui-control-client.js +61 -3
- package/dist/cjs/lib/interactive_cli/create-example-project.d.ts +1 -0
- package/dist/cjs/lib/interactive_cli/create-example-project.js +20 -3
- package/dist/cjs/main.d.ts +1 -0
- package/dist/cjs/main.js +5 -1
- package/dist/cjs/utils/base_64_image/base-64-image.d.ts +2 -1
- package/dist/cjs/utils/base_64_image/base-64-image.js +17 -2
- package/dist/esm/core/models/anthropic/askui-agent.d.ts +13 -0
- package/dist/esm/core/models/anthropic/askui-agent.js +191 -0
- package/dist/esm/core/models/anthropic/claude-agent.d.ts +40 -0
- package/dist/esm/core/models/anthropic/claude-agent.js +196 -0
- package/dist/esm/core/models/anthropic/index.d.ts +4 -0
- package/dist/esm/core/models/anthropic/index.js +2 -0
- package/dist/esm/core/models/anthropic/tools/agent-errors.d.ts +2 -0
- package/dist/esm/core/models/anthropic/tools/agent-errors.js +2 -0
- package/dist/esm/core/models/anthropic/tools/base.d.ts +24 -0
- package/dist/esm/core/models/anthropic/tools/base.js +59 -0
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +113 -0
- package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +461 -0
- package/dist/esm/core/ui-control-commands/index.d.ts +2 -0
- package/dist/esm/core/ui-control-commands/index.js +2 -0
- package/dist/esm/execution/dsl.d.ts +7 -4
- package/dist/esm/execution/dsl.js +3 -0
- package/dist/esm/execution/execution-runtime.d.ts +11 -1
- package/dist/esm/execution/execution-runtime.js +5 -0
- package/dist/esm/execution/inference-client.d.ts +9 -0
- package/dist/esm/execution/inference-client.js +13 -5
- package/dist/esm/execution/ui-control-client-dependency-builder.js +1 -1
- package/dist/esm/execution/ui-control-client.d.ts +50 -0
- package/dist/esm/execution/ui-control-client.js +61 -3
- package/dist/esm/lib/interactive_cli/create-example-project.d.ts +1 -0
- package/dist/esm/lib/interactive_cli/create-example-project.js +20 -3
- package/dist/esm/main.d.ts +1 -0
- package/dist/esm/main.js +1 -0
- package/dist/esm/utils/base_64_image/base-64-image.d.ts +2 -1
- package/dist/esm/utils/base_64_image/base-64-image.js +17 -2
- package/dist/example_projects_templates/configs/vscode-settings.json +41 -0
- package/package.json +2 -1
|
@@ -33,6 +33,7 @@ class InferenceClient {
|
|
|
33
33
|
? (0, url_join_1.default)(versionedBaseUrl, 'workspaces', workspaceId)
|
|
34
34
|
: versionedBaseUrl;
|
|
35
35
|
this.urls = {
|
|
36
|
+
actEndpoint: (0, url_join_1.default)(url, 'act', 'inference'),
|
|
36
37
|
inference: (0, url_join_1.default)(url, 'inference'),
|
|
37
38
|
isImageRequired: (0, url_join_1.default)(url, 'instruction', 'is-image-required'),
|
|
38
39
|
vqaInference: (0, url_join_1.default)(url, 'vqa', 'inference'),
|
|
@@ -74,7 +75,7 @@ class InferenceClient {
|
|
|
74
75
|
instruction,
|
|
75
76
|
modelComposition: modelComposition.length > 0 ? modelComposition : this.modelComposition,
|
|
76
77
|
});
|
|
77
|
-
InferenceClient.logMetaInformation(response);
|
|
78
|
+
InferenceClient.logMetaInformation(response.headers);
|
|
78
79
|
return ui_control_commands_1.InferenceResponse.fromJson(response.body, resizedImage.resizeRatio, image);
|
|
79
80
|
});
|
|
80
81
|
}
|
|
@@ -85,13 +86,13 @@ class InferenceClient {
|
|
|
85
86
|
image,
|
|
86
87
|
prompt,
|
|
87
88
|
});
|
|
88
|
-
InferenceClient.logMetaInformation(response);
|
|
89
|
+
InferenceClient.logMetaInformation(response.headers);
|
|
89
90
|
return response.body;
|
|
90
91
|
});
|
|
91
92
|
}
|
|
92
|
-
static logMetaInformation(
|
|
93
|
-
if (
|
|
94
|
-
logger_1.logger.warn(
|
|
93
|
+
static logMetaInformation(headers) {
|
|
94
|
+
if (headers['askui-usage-warnings'] !== undefined) {
|
|
95
|
+
logger_1.logger.warn(headers['askui-usage-warnings']);
|
|
95
96
|
}
|
|
96
97
|
}
|
|
97
98
|
predictControlCommand(instruction_1, modelComposition_1) {
|
|
@@ -134,5 +135,12 @@ class InferenceClient {
|
|
|
134
135
|
return response;
|
|
135
136
|
});
|
|
136
137
|
}
|
|
138
|
+
predictActResponse(params) {
|
|
139
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
140
|
+
const response = yield this.httpClient.post(this.urls.actEndpoint, params);
|
|
141
|
+
InferenceClient.logMetaInformation(response.headers);
|
|
142
|
+
return response.body;
|
|
143
|
+
});
|
|
144
|
+
}
|
|
137
145
|
}
|
|
138
146
|
exports.InferenceClient = InferenceClient;
|
|
@@ -65,7 +65,7 @@ class UiControlClientDependencyBuilder {
|
|
|
65
65
|
onLocationNotExist: (_d = (_c = clientArgs.aiElementArgs) === null || _c === void 0 ? void 0 : _c.onLocationNotExist) !== null && _d !== void 0 ? _d : 'error',
|
|
66
66
|
}, context: {
|
|
67
67
|
isCi: (_f = (_e = clientArgs.context) === null || _e === void 0 ? void 0 : _e.isCi) !== null && _f !== void 0 ? _f : is_ci_1.default,
|
|
68
|
-
}, credentials: (0, read_credentials_1.readCredentials)(clientArgs), inferenceServerApiVersion: (_g = clientArgs.inferenceServerApiVersion) !== null && _g !== void 0 ? _g : '
|
|
68
|
+
}, credentials: (0, read_credentials_1.readCredentials)(clientArgs), inferenceServerApiVersion: (_g = clientArgs.inferenceServerApiVersion) !== null && _g !== void 0 ? _g : 'v1', inferenceServerUrl: (_h = clientArgs.inferenceServerUrl) !== null && _h !== void 0 ? _h : 'https://inference.askui.com', proxyAgents: (_j = clientArgs.proxyAgents) !== null && _j !== void 0 ? _j : (yield (0, proxy_builder_1.envProxyAgents)()), uiControllerUrl: (_k = clientArgs.uiControllerUrl) !== null && _k !== void 0 ? _k : 'http://127.0.0.1:6769' });
|
|
69
69
|
});
|
|
70
70
|
}
|
|
71
71
|
}
|
|
@@ -5,6 +5,7 @@ import { AnnotationRequest } from '../core/model/annotation-result/annotation-in
|
|
|
5
5
|
import { DetectedElement } from '../core/model/annotation-result/detected-element';
|
|
6
6
|
import { ClientArgs } from './ui-controller-client-interface';
|
|
7
7
|
import { ModelCompositionBranch } from './model-composition-branch';
|
|
8
|
+
import { AskUIAgent, AgentHistory } from '../core/models/anthropic';
|
|
8
9
|
export type RelationsForConvenienceMethods = 'nearestTo' | 'leftOf' | 'above' | 'rightOf' | 'below' | 'contains';
|
|
9
10
|
export type TextMatchingOption = 'similar' | 'exact' | 'regex';
|
|
10
11
|
export type ElementExistsQueryType = 'otherElement' | 'switch' | 'element' | 'container' | 'checkbox' | 'element' | 'button' | 'table' | 'text' | 'icon' | 'image' | 'textfield';
|
|
@@ -33,6 +34,7 @@ export declare class UiControlClient extends ApiCommands {
|
|
|
33
34
|
private executionRuntime;
|
|
34
35
|
private stepReporter;
|
|
35
36
|
private aiElementArgs;
|
|
37
|
+
agent: AskUIAgent;
|
|
36
38
|
private constructor();
|
|
37
39
|
static build(clientArgs?: ClientArgs): Promise<UiControlClient>;
|
|
38
40
|
/**
|
|
@@ -473,4 +475,52 @@ export declare class UiControlClient extends ApiCommands {
|
|
|
473
475
|
* @returns {ExpectAllExistResult.elements} - ExpectExistenceElement[].
|
|
474
476
|
*/
|
|
475
477
|
expectAllExist(query: ElementExistsQuery[]): Promise<ExpectAllExistResult>;
|
|
478
|
+
/**
|
|
479
|
+
* Instructs the agent to achieve a specified goal through autonomous actions.
|
|
480
|
+
*
|
|
481
|
+
* The agent will analyze the screen, determine necessary steps, and perform actions
|
|
482
|
+
* to accomplish the goal. This may include clicking, typing, scrolling, and other
|
|
483
|
+
* interface interactions.
|
|
484
|
+
*
|
|
485
|
+
* The `options` parameter allows the caller to maintain contextual continuity across
|
|
486
|
+
* multiple `act` calls, either from the same or different agent interfaces.
|
|
487
|
+
*
|
|
488
|
+
* **Examples:**
|
|
489
|
+
*
|
|
490
|
+
* ```ts
|
|
491
|
+
* // Use chatId to maintain context across consecutive steps
|
|
492
|
+
* await aui.act("Search online for the current gold price", {
|
|
493
|
+
* chatId: "session-gold-price"
|
|
494
|
+
* });
|
|
495
|
+
* await aui.act("Create a new text file and type the gold price result into it", {
|
|
496
|
+
* chatId: "session-gold-price"
|
|
497
|
+
* });
|
|
498
|
+
*
|
|
499
|
+
* // Share history explicitly between separate agents (e.g., desktop and Android)
|
|
500
|
+
* // By default, the agent operates as a computer agent.
|
|
501
|
+
* // To control an Android device, you must configure it explicitly:
|
|
502
|
+
* await auiAndroid.agent.configureAsAndroidAgent();
|
|
503
|
+
* const history = await auiDesktop.act("Copy username from desktop app");
|
|
504
|
+
* await auiAndroid.act("Paste username into the mobile login screen", {
|
|
505
|
+
* agentHistory: history
|
|
506
|
+
* });
|
|
507
|
+
* ```
|
|
508
|
+
*
|
|
509
|
+
* @param {string} goal - A description of what the agent should achieve.
|
|
510
|
+
* @param {Object} [options] - Optional parameters to maintain or share context.
|
|
511
|
+
* @param {string} [options.chatId] - A session identifier used to persist memory between
|
|
512
|
+
* consecutive `act` calls. When multiple actions share the
|
|
513
|
+
* same `chatId`, the agent retains knowledge of prior steps,
|
|
514
|
+
* such as extracted data or navigation history.
|
|
515
|
+
* @param {AgentHistory} [options.agentHistory] - A shared interaction history object that can be
|
|
516
|
+
* passed between different agent clients (e.g., between
|
|
517
|
+
* `auiDesktop` and `auiAndroid`) to ensure continuity
|
|
518
|
+
* of understanding and task flow.
|
|
519
|
+
* @returns {Promise<AgentHistory>} - Updated action history after executing the goal.
|
|
520
|
+
* @throws {Error} If the agent is not connected when the method is called.
|
|
521
|
+
*/
|
|
522
|
+
act(goal: string, options?: {
|
|
523
|
+
chatId?: string;
|
|
524
|
+
agentHistory?: AgentHistory;
|
|
525
|
+
}): Promise<AgentHistory>;
|
|
476
526
|
}
|
|
@@ -18,13 +18,15 @@ const logger_1 = require("../lib/logger");
|
|
|
18
18
|
const ui_control_client_dependency_builder_1 = require("./ui-control-client-dependency-builder");
|
|
19
19
|
const ai_element_collection_1 = require("../core/ai-element/ai-element-collection");
|
|
20
20
|
const retry_strategies_1 = require("./retry-strategies");
|
|
21
|
+
const anthropic_1 = require("../core/models/anthropic");
|
|
21
22
|
class UiControlClient extends dsl_1.ApiCommands {
|
|
22
|
-
constructor(workspaceId, executionRuntime, stepReporter, aiElementArgs) {
|
|
23
|
+
constructor(workspaceId, executionRuntime, stepReporter, aiElementArgs, agent) {
|
|
23
24
|
super();
|
|
24
25
|
this.workspaceId = workspaceId;
|
|
25
26
|
this.executionRuntime = executionRuntime;
|
|
26
27
|
this.stepReporter = stepReporter;
|
|
27
28
|
this.aiElementArgs = aiElementArgs;
|
|
29
|
+
this.agent = agent;
|
|
28
30
|
this.secretText = undefined;
|
|
29
31
|
}
|
|
30
32
|
static build() {
|
|
@@ -32,7 +34,8 @@ class UiControlClient extends dsl_1.ApiCommands {
|
|
|
32
34
|
const builder = ui_control_client_dependency_builder_1.UiControlClientDependencyBuilder;
|
|
33
35
|
const clientArgsWithDefaults = yield builder.getClientArgsWithDefaults(clientArgs);
|
|
34
36
|
const { workspaceId, executionRuntime, stepReporter, } = yield builder.build(clientArgsWithDefaults);
|
|
35
|
-
|
|
37
|
+
const agent = new anthropic_1.AskUIAgent(executionRuntime);
|
|
38
|
+
return new UiControlClient(workspaceId, executionRuntime, stepReporter, clientArgsWithDefaults.aiElementArgs, agent);
|
|
36
39
|
});
|
|
37
40
|
}
|
|
38
41
|
/**
|
|
@@ -40,7 +43,10 @@ class UiControlClient extends dsl_1.ApiCommands {
|
|
|
40
43
|
*/
|
|
41
44
|
connect() {
|
|
42
45
|
return __awaiter(this, void 0, void 0, function* () {
|
|
43
|
-
|
|
46
|
+
const connectionState = yield this.executionRuntime.connect();
|
|
47
|
+
yield this.agent.initializeOsAgentHandler();
|
|
48
|
+
yield this.agent.configureAsDesktopAgent();
|
|
49
|
+
return connectionState;
|
|
44
50
|
});
|
|
45
51
|
}
|
|
46
52
|
/**
|
|
@@ -732,5 +738,57 @@ class UiControlClient extends dsl_1.ApiCommands {
|
|
|
732
738
|
};
|
|
733
739
|
});
|
|
734
740
|
}
|
|
741
|
+
/**
|
|
742
|
+
* Instructs the agent to achieve a specified goal through autonomous actions.
|
|
743
|
+
*
|
|
744
|
+
* The agent will analyze the screen, determine necessary steps, and perform actions
|
|
745
|
+
* to accomplish the goal. This may include clicking, typing, scrolling, and other
|
|
746
|
+
* interface interactions.
|
|
747
|
+
*
|
|
748
|
+
* The `options` parameter allows the caller to maintain contextual continuity across
|
|
749
|
+
* multiple `act` calls, either from the same or different agent interfaces.
|
|
750
|
+
*
|
|
751
|
+
* **Examples:**
|
|
752
|
+
*
|
|
753
|
+
* ```ts
|
|
754
|
+
* // Use chatId to maintain context across consecutive steps
|
|
755
|
+
* await aui.act("Search online for the current gold price", {
|
|
756
|
+
* chatId: "session-gold-price"
|
|
757
|
+
* });
|
|
758
|
+
* await aui.act("Create a new text file and type the gold price result into it", {
|
|
759
|
+
* chatId: "session-gold-price"
|
|
760
|
+
* });
|
|
761
|
+
*
|
|
762
|
+
* // Share history explicitly between separate agents (e.g., desktop and Android)
|
|
763
|
+
* // By default, the agent operates as a computer agent.
|
|
764
|
+
* // To control an Android device, you must configure it explicitly:
|
|
765
|
+
* await auiAndroid.agent.configureAsAndroidAgent();
|
|
766
|
+
* const history = await auiDesktop.act("Copy username from desktop app");
|
|
767
|
+
* await auiAndroid.act("Paste username into the mobile login screen", {
|
|
768
|
+
* agentHistory: history
|
|
769
|
+
* });
|
|
770
|
+
* ```
|
|
771
|
+
*
|
|
772
|
+
* @param {string} goal - A description of what the agent should achieve.
|
|
773
|
+
* @param {Object} [options] - Optional parameters to maintain or share context.
|
|
774
|
+
* @param {string} [options.chatId] - A session identifier used to persist memory between
|
|
775
|
+
* consecutive `act` calls. When multiple actions share the
|
|
776
|
+
* same `chatId`, the agent retains knowledge of prior steps,
|
|
777
|
+
* such as extracted data or navigation history.
|
|
778
|
+
* @param {AgentHistory} [options.agentHistory] - A shared interaction history object that can be
|
|
779
|
+
* passed between different agent clients (e.g., between
|
|
780
|
+
* `auiDesktop` and `auiAndroid`) to ensure continuity
|
|
781
|
+
* of understanding and task flow.
|
|
782
|
+
* @returns {Promise<AgentHistory>} - Updated action history after executing the goal.
|
|
783
|
+
* @throws {Error} If the agent is not connected when the method is called.
|
|
784
|
+
*/
|
|
785
|
+
act(goal, options) {
|
|
786
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
787
|
+
if (!this.agent.isConnected()) {
|
|
788
|
+
throw new Error('Agent is not connected, Please call connect() first');
|
|
789
|
+
}
|
|
790
|
+
return this.agent.act(goal, options);
|
|
791
|
+
});
|
|
792
|
+
}
|
|
735
793
|
}
|
|
736
794
|
exports.UiControlClient = UiControlClient;
|
|
@@ -176,6 +176,10 @@ class CreateExampleProject {
|
|
|
176
176
|
title: 'Add eslint run command',
|
|
177
177
|
task: () => __awaiter(this, void 0, void 0, function* () { return this.addESLintRunCommand(); }),
|
|
178
178
|
},
|
|
179
|
+
{
|
|
180
|
+
title: 'Add vscode settings',
|
|
181
|
+
task: () => __awaiter(this, void 0, void 0, function* () { return this.addVSCodeSettings(); }),
|
|
182
|
+
},
|
|
179
183
|
]);
|
|
180
184
|
}),
|
|
181
185
|
}];
|
|
@@ -235,23 +239,36 @@ class CreateExampleProject {
|
|
|
235
239
|
];
|
|
236
240
|
});
|
|
237
241
|
}
|
|
242
|
+
addVSCodeSettings() {
|
|
243
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
244
|
+
const vscodeSettingsFilePath = path_1.default.join('example_projects_templates', 'configs', 'vscode-settings.json');
|
|
245
|
+
const vscodeSettingsTargetDirPath = path_1.default.join(this.projectRootDirectoryPath, '.vscode');
|
|
246
|
+
const vscodeSettingsTargetFilePath = path_1.default.join(vscodeSettingsTargetDirPath, 'settings.json');
|
|
247
|
+
return [{
|
|
248
|
+
enabled: () => !fs_extra_1.default.existsSync(vscodeSettingsTargetFilePath),
|
|
249
|
+
task: () => __awaiter(this, void 0, void 0, function* () {
|
|
250
|
+
yield fs_extra_1.default.mkdir(vscodeSettingsTargetDirPath, { recursive: true });
|
|
251
|
+
yield fs_extra_1.default.copyFile(path_1.default.join((0, path_2.getPathToNodeModulesRoot)(), vscodeSettingsFilePath), vscodeSettingsTargetFilePath);
|
|
252
|
+
}),
|
|
253
|
+
title: 'Copy VSCode settings',
|
|
254
|
+
}];
|
|
255
|
+
});
|
|
256
|
+
}
|
|
238
257
|
copyTsConfigFile() {
|
|
239
258
|
return __awaiter(this, void 0, void 0, function* () {
|
|
240
259
|
const tsConfigFilePath = path_1.default.join('example_projects_templates', 'typescript', 'tsconfig.json');
|
|
241
260
|
const tsConfigTargetFilePath = path_1.default.join(this.projectRootDirectoryPath, 'tsconfig.json');
|
|
242
|
-
/* eslint-disable sort-keys */
|
|
243
261
|
return [
|
|
244
262
|
{
|
|
245
|
-
title: 'Copy ts config file',
|
|
246
263
|
enabled: () => this.cliOptions.typescriptConfig || !fs_extra_1.default.existsSync(tsConfigTargetFilePath),
|
|
247
264
|
task: () => __awaiter(this, void 0, void 0, function* () {
|
|
248
265
|
return fs_extra_1.default.copyFile(path_1.default.join((0, path_2.getPathToNodeModulesRoot)(), tsConfigFilePath), tsConfigTargetFilePath);
|
|
249
266
|
}),
|
|
267
|
+
title: 'Copy ts config file',
|
|
250
268
|
},
|
|
251
269
|
];
|
|
252
270
|
});
|
|
253
271
|
}
|
|
254
|
-
/* eslint-enable */
|
|
255
272
|
createExampleProject() {
|
|
256
273
|
return __awaiter(this, void 0, void 0, function* () {
|
|
257
274
|
const tasks = new listr_1.default();
|
package/dist/cjs/main.d.ts
CHANGED
|
@@ -4,3 +4,4 @@ export { Instruction, Reporter, ReporterConfig, Snapshot, SnapshotDetailLevel, S
|
|
|
4
4
|
export { Annotation } from './core/annotation/annotation';
|
|
5
5
|
export { DetectedElement } from './core/model/annotation-result/detected-element';
|
|
6
6
|
export { LogLevels } from './shared';
|
|
7
|
+
export { ToolFailure, ToolError, BaseAgentTool } from './core/models/anthropic';
|
package/dist/cjs/main.js
CHANGED
|
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
exports.LogLevels = exports.DetectedElement = exports.Annotation = exports.UiController = void 0;
|
|
17
|
+
exports.BaseAgentTool = exports.ToolError = exports.ToolFailure = exports.LogLevels = exports.DetectedElement = exports.Annotation = exports.UiController = void 0;
|
|
18
18
|
var lib_1 = require("./lib");
|
|
19
19
|
Object.defineProperty(exports, "UiController", { enumerable: true, get: function () { return lib_1.UiController; } });
|
|
20
20
|
__exportStar(require("./execution"), exports);
|
|
@@ -24,3 +24,7 @@ var detected_element_1 = require("./core/model/annotation-result/detected-elemen
|
|
|
24
24
|
Object.defineProperty(exports, "DetectedElement", { enumerable: true, get: function () { return detected_element_1.DetectedElement; } });
|
|
25
25
|
var shared_1 = require("./shared");
|
|
26
26
|
Object.defineProperty(exports, "LogLevels", { enumerable: true, get: function () { return shared_1.LogLevels; } });
|
|
27
|
+
var anthropic_1 = require("./core/models/anthropic");
|
|
28
|
+
Object.defineProperty(exports, "ToolFailure", { enumerable: true, get: function () { return anthropic_1.ToolFailure; } });
|
|
29
|
+
Object.defineProperty(exports, "ToolError", { enumerable: true, get: function () { return anthropic_1.ToolError; } });
|
|
30
|
+
Object.defineProperty(exports, "BaseAgentTool", { enumerable: true, get: function () { return anthropic_1.BaseAgentTool; } });
|
|
@@ -11,5 +11,6 @@ export declare class Base64Image {
|
|
|
11
11
|
private getSharp;
|
|
12
12
|
getInfo(): Promise<sharp.OutputInfo>;
|
|
13
13
|
resizeToFitInto(dimension: number): Promise<Base64Image>;
|
|
14
|
-
|
|
14
|
+
resizeWithSameAspectRatio(width: number, height: number): Promise<Base64Image>;
|
|
15
|
+
toString(withPrefix?: boolean): string;
|
|
15
16
|
}
|
|
@@ -77,8 +77,23 @@ class Base64Image {
|
|
|
77
77
|
return Base64Image.fromBuffer(buffer);
|
|
78
78
|
});
|
|
79
79
|
}
|
|
80
|
-
|
|
81
|
-
return
|
|
80
|
+
resizeWithSameAspectRatio(width, height) {
|
|
81
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
82
|
+
const buffer = yield (yield this.getSharp())
|
|
83
|
+
.resize({
|
|
84
|
+
fit: 'contain',
|
|
85
|
+
height,
|
|
86
|
+
width,
|
|
87
|
+
})
|
|
88
|
+
.toBuffer();
|
|
89
|
+
return Base64Image.fromBuffer(buffer);
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
toString(withPrefix = true) {
|
|
93
|
+
if (withPrefix) {
|
|
94
|
+
return `${Base64Image.strPrefix}${this.buffer.toString('base64')}`;
|
|
95
|
+
}
|
|
96
|
+
return this.buffer.toString('base64');
|
|
82
97
|
}
|
|
83
98
|
}
|
|
84
99
|
exports.Base64Image = Base64Image;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { ClaudeAgent } from './claude-agent';
|
|
2
|
+
import { ExecutionRuntime } from '../../../execution/execution-runtime';
|
|
3
|
+
export declare class AskUIAgent extends ClaudeAgent {
|
|
4
|
+
private osAgentHandler;
|
|
5
|
+
private executionRuntime;
|
|
6
|
+
constructor(executionRuntime: ExecutionRuntime);
|
|
7
|
+
isConnected(): boolean;
|
|
8
|
+
initializeOsAgentHandler(): Promise<void>;
|
|
9
|
+
configureAsDesktopAgent(): Promise<void>;
|
|
10
|
+
configureAsAndroidAgent(): Promise<void>;
|
|
11
|
+
private static DesktopSystemPrompt;
|
|
12
|
+
private static AndroidSystemPrompt;
|
|
13
|
+
}
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
|
|
2
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
3
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
4
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
5
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
6
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
7
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
import { DesktopKeyPressSequenceTool, DesktopSingleKeyPressTool, MouseClickTool, MouseMoveTool, MouseScrollTool, OsAgentHandler, ScreenShotTool, TypeTool, AgentErrorTool, AndroidSequenceKeyPressTool, AndroidSingleKeyPressTool, ExecuteShellCommandTool, } from './tools/os-agent-tools';
|
|
11
|
+
import { ClaudeAgent } from './claude-agent';
|
|
12
|
+
export class AskUIAgent extends ClaudeAgent {
|
|
13
|
+
constructor(executionRuntime) {
|
|
14
|
+
super((params) => executionRuntime.predictActResponse(params));
|
|
15
|
+
this.osAgentHandler = undefined;
|
|
16
|
+
this.executionRuntime = executionRuntime;
|
|
17
|
+
}
|
|
18
|
+
isConnected() {
|
|
19
|
+
return this.osAgentHandler !== undefined;
|
|
20
|
+
}
|
|
21
|
+
initializeOsAgentHandler() {
|
|
22
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
23
|
+
this.osAgentHandler = yield OsAgentHandler.createInstance(this.executionRuntime);
|
|
24
|
+
});
|
|
25
|
+
}
|
|
26
|
+
configureAsDesktopAgent() {
|
|
27
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
28
|
+
if (!this.osAgentHandler) {
|
|
29
|
+
throw new Error('Agent OS client is not connected');
|
|
30
|
+
}
|
|
31
|
+
const tools = [
|
|
32
|
+
new AgentErrorTool(),
|
|
33
|
+
new ScreenShotTool(this.osAgentHandler),
|
|
34
|
+
new MouseMoveTool(this.osAgentHandler),
|
|
35
|
+
new MouseClickTool(this.osAgentHandler),
|
|
36
|
+
new MouseScrollTool(this.osAgentHandler),
|
|
37
|
+
new DesktopKeyPressSequenceTool(this.osAgentHandler),
|
|
38
|
+
new DesktopSingleKeyPressTool(this.osAgentHandler),
|
|
39
|
+
new TypeTool(this.osAgentHandler),
|
|
40
|
+
];
|
|
41
|
+
this.setTools(tools);
|
|
42
|
+
this.setSystemPrompt(AskUIAgent.DesktopSystemPrompt);
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
configureAsAndroidAgent() {
|
|
46
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
47
|
+
if (!this.osAgentHandler) {
|
|
48
|
+
throw new Error('Agent OS client is not connected');
|
|
49
|
+
}
|
|
50
|
+
const tools = [
|
|
51
|
+
new AgentErrorTool(),
|
|
52
|
+
new ScreenShotTool(this.osAgentHandler),
|
|
53
|
+
new MouseMoveTool(this.osAgentHandler),
|
|
54
|
+
new MouseClickTool(this.osAgentHandler),
|
|
55
|
+
new MouseScrollTool(this.osAgentHandler),
|
|
56
|
+
new AndroidSingleKeyPressTool(this.osAgentHandler),
|
|
57
|
+
new AndroidSequenceKeyPressTool(this.osAgentHandler),
|
|
58
|
+
new TypeTool(this.osAgentHandler),
|
|
59
|
+
new ExecuteShellCommandTool(this.osAgentHandler),
|
|
60
|
+
];
|
|
61
|
+
this.setTools(tools);
|
|
62
|
+
this.setSystemPrompt(AskUIAgent.AndroidSystemPrompt);
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
AskUIAgent.DesktopSystemPrompt = `
|
|
67
|
+
<SYSTEM_CAPABILITY>
|
|
68
|
+
You are an autonomous AI assistant operating on a ${process.platform} machine with ${process.arch} architecture. You have full access to the system and internet connectivity.
|
|
69
|
+
Your main goal is to mimic a human user interacting with a desktop computer. So you should try to use the tools in a way that a human would use a mouse and keyboard to interact with a computer.
|
|
70
|
+
|
|
71
|
+
Key Capabilities:
|
|
72
|
+
* Full system control through mouse and keyboard interactions
|
|
73
|
+
* Screen capture and analysis
|
|
74
|
+
* Web browser automation and navigation
|
|
75
|
+
* File system access and manipulation
|
|
76
|
+
* PDF document handling and text extraction
|
|
77
|
+
* Error handling and recovery mechanisms
|
|
78
|
+
|
|
79
|
+
Available Tools:
|
|
80
|
+
* Mouse control (move, click, scroll)
|
|
81
|
+
* Keyboard input (single keys, key combinations, typing)
|
|
82
|
+
* Screen capture and analysis
|
|
83
|
+
* Error reporting and recovery
|
|
84
|
+
|
|
85
|
+
Current Date: ${new Date().toUTCString()} UTC
|
|
86
|
+
</SYSTEM_CAPABILITY>
|
|
87
|
+
|
|
88
|
+
<OPERATIONAL_GUIDELINES>
|
|
89
|
+
1. Autonomous Operation:
|
|
90
|
+
* Work independently to achieve user goals
|
|
91
|
+
* Make informed decisions based on available information
|
|
92
|
+
* Chain multiple actions efficiently when possible
|
|
93
|
+
* Verify results after each significant action
|
|
94
|
+
|
|
95
|
+
2. Web Interaction:
|
|
96
|
+
* Launch appropriate browser if not already open
|
|
97
|
+
* Ensure full page visibility through zoom or scrolling
|
|
98
|
+
* Handle browser-specific behaviors (e.g., Firefox startup wizard)
|
|
99
|
+
* Extract and process PDF content when encountered
|
|
100
|
+
|
|
101
|
+
3. Error Handling:
|
|
102
|
+
* Detect and analyze failure points
|
|
103
|
+
* Implement appropriate recovery strategies
|
|
104
|
+
* Report issues with clear diagnostic information
|
|
105
|
+
* Use the error tool when stuck or unable to proceed
|
|
106
|
+
|
|
107
|
+
4. Performance Optimization:
|
|
108
|
+
* Batch related actions when possible
|
|
109
|
+
* Minimize unnecessary screen captures
|
|
110
|
+
* Use efficient navigation patterns
|
|
111
|
+
* Maintain context between actions
|
|
112
|
+
|
|
113
|
+
5. Safety and Validation:
|
|
114
|
+
* Verify coordinates are within screen bounds
|
|
115
|
+
* Validate input parameters before execution
|
|
116
|
+
* Ensure proper cleanup after operations
|
|
117
|
+
* Maintain system stability
|
|
118
|
+
</OPERATIONAL_GUIDELINES>
|
|
119
|
+
|
|
120
|
+
<IMPORTANT_NOTES>
|
|
121
|
+
* When you are stuck or unable to proceed, use the error tool to raise an error.
|
|
122
|
+
* Always verify tool availability before use
|
|
123
|
+
* Use screenshots strategically for state analysis
|
|
124
|
+
* Report issues promptly with clear diagnostic information
|
|
125
|
+
* Maintain awareness of screen boundaries and coordinate validity
|
|
126
|
+
* Adapt to unexpected situations with appropriate fallback strategies
|
|
127
|
+
</IMPORTANT_NOTES>
|
|
128
|
+
`;
|
|
129
|
+
AskUIAgent.AndroidSystemPrompt = `
|
|
130
|
+
<SYSTEM_CAPABILITY>
|
|
131
|
+
You are an autonomous AI assistant operating on an Android device via ADB. The host machine is ${process.platform} with ${process.arch} architecture and internet connectivity.
|
|
132
|
+
Your main goal is to mimic a human user interacting with an Android device. So you should try to use the tools in a way that a human would use a touch screen to interact with an Android device.
|
|
133
|
+
|
|
134
|
+
Key Capabilities:
|
|
135
|
+
* Full Android device control through ADB
|
|
136
|
+
* Screen capture and analysis
|
|
137
|
+
* Touch input simulation
|
|
138
|
+
* Android-specific key events
|
|
139
|
+
* Error handling and recovery mechanisms
|
|
140
|
+
|
|
141
|
+
Available Tools:
|
|
142
|
+
* Touch control (click, swipe, scroll)
|
|
143
|
+
* Android key events (single and sequence)
|
|
144
|
+
* Screen capture and analysis
|
|
145
|
+
* Error reporting and recovery
|
|
146
|
+
|
|
147
|
+
Current Date: ${new Date().toUTCString()} UTC
|
|
148
|
+
</SYSTEM_CAPABILITY>
|
|
149
|
+
|
|
150
|
+
<OPERATIONAL_GUIDELINES>
|
|
151
|
+
1. Autonomous Operation:
|
|
152
|
+
* Work independently to achieve user goals
|
|
153
|
+
* Make informed decisions based on available information
|
|
154
|
+
* Chain multiple actions efficiently when possible
|
|
155
|
+
* Verify results after each significant action
|
|
156
|
+
|
|
157
|
+
2. Screen Interaction:
|
|
158
|
+
* Analyze screen state before interactions
|
|
159
|
+
* Use appropriate input methods (touch, keys)
|
|
160
|
+
* Handle dynamic UI elements effectively
|
|
161
|
+
* Implement efficient navigation patterns
|
|
162
|
+
|
|
163
|
+
3. Error Handling:
|
|
164
|
+
* Detect and analyze failure points
|
|
165
|
+
* Implement appropriate recovery strategies
|
|
166
|
+
* Report issues with clear diagnostic information
|
|
167
|
+
* Use the error tool when stuck or unable to proceed
|
|
168
|
+
|
|
169
|
+
4. Performance Optimization:
|
|
170
|
+
* Batch related actions when possible
|
|
171
|
+
* Minimize unnecessary screen captures
|
|
172
|
+
* Use efficient navigation patterns
|
|
173
|
+
* Maintain context between actions
|
|
174
|
+
|
|
175
|
+
5. Safety and Validation:
|
|
176
|
+
* Verify coordinates are within screen bounds
|
|
177
|
+
* Validate input parameters before execution
|
|
178
|
+
* Ensure proper cleanup after operations
|
|
179
|
+
* Maintain device stability
|
|
180
|
+
</OPERATIONAL_GUIDELINES>
|
|
181
|
+
|
|
182
|
+
<IMPORTANT_NOTES>
|
|
183
|
+
* When you are stuck or unable to proceed, use the error tool to raise an error.
|
|
184
|
+
* Always verify tool availability before use
|
|
185
|
+
* Use screenshots strategically for state analysis
|
|
186
|
+
* Ensure all coordinates are integers and within screen bounds
|
|
187
|
+
* Handle permission issues and device state appropriately
|
|
188
|
+
* Report issues promptly with clear diagnostic information
|
|
189
|
+
* Adapt to unexpected situations with appropriate fallback strategies
|
|
190
|
+
</IMPORTANT_NOTES>
|
|
191
|
+
`;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { Beta } from '@anthropic-ai/sdk/resources';
|
|
2
|
+
import { BaseAgentTool } from './tools/base';
|
|
3
|
+
type PredictActResponseFunction = (params: {
|
|
4
|
+
max_tokens: number;
|
|
5
|
+
messages: Beta.BetaMessageParam[];
|
|
6
|
+
model: string;
|
|
7
|
+
system?: string;
|
|
8
|
+
tools?: any[];
|
|
9
|
+
betas?: string[];
|
|
10
|
+
}) => Promise<Beta.BetaMessage>;
|
|
11
|
+
export declare class ClaudeAgent {
|
|
12
|
+
private predictActResponseFunction;
|
|
13
|
+
private maxTokens;
|
|
14
|
+
private onlyNMostRecentImages;
|
|
15
|
+
private imageTruncationThreshold;
|
|
16
|
+
private systemPrompt;
|
|
17
|
+
private model;
|
|
18
|
+
private betas;
|
|
19
|
+
private _toolCollection;
|
|
20
|
+
private tools;
|
|
21
|
+
private history;
|
|
22
|
+
constructor(predictActResponseFunction: PredictActResponseFunction);
|
|
23
|
+
setTools(tools: BaseAgentTool[]): void;
|
|
24
|
+
addTool(tool: BaseAgentTool): void;
|
|
25
|
+
listToolNames(): string[];
|
|
26
|
+
removeToolByName(toolName: string): void;
|
|
27
|
+
setSystemPrompt(systemPrompt: string): void;
|
|
28
|
+
private IsConfigured;
|
|
29
|
+
private get toolCollection();
|
|
30
|
+
private setHistory;
|
|
31
|
+
private getHistory;
|
|
32
|
+
act(goal: string, options?: {
|
|
33
|
+
chatId?: string;
|
|
34
|
+
agentHistory?: Beta.BetaMessageParam[];
|
|
35
|
+
}): Promise<Beta.BetaMessageParam[]>;
|
|
36
|
+
private makeApiToolResult;
|
|
37
|
+
private maybePrependSystemToolResult;
|
|
38
|
+
private static filterNMostRecentImages;
|
|
39
|
+
}
|
|
40
|
+
export {};
|