askui 0.24.0 → 0.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/cjs/core/models/anthropic/askui-agent.d.ts +13 -0
  2. package/dist/cjs/core/models/anthropic/askui-agent.js +195 -0
  3. package/dist/cjs/core/models/anthropic/claude-agent.d.ts +40 -0
  4. package/dist/cjs/core/models/anthropic/claude-agent.js +200 -0
  5. package/dist/cjs/core/models/anthropic/index.d.ts +4 -0
  6. package/dist/cjs/core/models/anthropic/index.js +9 -0
  7. package/dist/cjs/core/models/anthropic/tools/agent-errors.d.ts +2 -0
  8. package/dist/cjs/core/models/anthropic/tools/agent-errors.js +6 -0
  9. package/dist/cjs/core/models/anthropic/tools/base.d.ts +24 -0
  10. package/dist/cjs/core/models/anthropic/tools/base.js +66 -0
  11. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +113 -0
  12. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +476 -0
  13. package/dist/cjs/core/ui-control-commands/index.d.ts +2 -0
  14. package/dist/cjs/core/ui-control-commands/index.js +5 -1
  15. package/dist/cjs/execution/dsl.d.ts +7 -4
  16. package/dist/cjs/execution/dsl.js +4 -1
  17. package/dist/cjs/execution/execution-runtime.d.ts +11 -1
  18. package/dist/cjs/execution/execution-runtime.js +5 -0
  19. package/dist/cjs/execution/inference-client.d.ts +9 -0
  20. package/dist/cjs/execution/inference-client.js +13 -5
  21. package/dist/cjs/execution/ui-control-client-dependency-builder.js +1 -1
  22. package/dist/cjs/execution/ui-control-client.d.ts +50 -0
  23. package/dist/cjs/execution/ui-control-client.js +61 -3
  24. package/dist/cjs/lib/interactive_cli/create-example-project.d.ts +1 -0
  25. package/dist/cjs/lib/interactive_cli/create-example-project.js +20 -3
  26. package/dist/cjs/main.d.ts +1 -0
  27. package/dist/cjs/main.js +5 -1
  28. package/dist/cjs/utils/base_64_image/base-64-image.d.ts +2 -1
  29. package/dist/cjs/utils/base_64_image/base-64-image.js +17 -2
  30. package/dist/esm/core/models/anthropic/askui-agent.d.ts +13 -0
  31. package/dist/esm/core/models/anthropic/askui-agent.js +191 -0
  32. package/dist/esm/core/models/anthropic/claude-agent.d.ts +40 -0
  33. package/dist/esm/core/models/anthropic/claude-agent.js +196 -0
  34. package/dist/esm/core/models/anthropic/index.d.ts +4 -0
  35. package/dist/esm/core/models/anthropic/index.js +2 -0
  36. package/dist/esm/core/models/anthropic/tools/agent-errors.d.ts +2 -0
  37. package/dist/esm/core/models/anthropic/tools/agent-errors.js +2 -0
  38. package/dist/esm/core/models/anthropic/tools/base.d.ts +24 -0
  39. package/dist/esm/core/models/anthropic/tools/base.js +59 -0
  40. package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +113 -0
  41. package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +461 -0
  42. package/dist/esm/core/ui-control-commands/index.d.ts +2 -0
  43. package/dist/esm/core/ui-control-commands/index.js +2 -0
  44. package/dist/esm/execution/dsl.d.ts +7 -4
  45. package/dist/esm/execution/dsl.js +3 -0
  46. package/dist/esm/execution/execution-runtime.d.ts +11 -1
  47. package/dist/esm/execution/execution-runtime.js +5 -0
  48. package/dist/esm/execution/inference-client.d.ts +9 -0
  49. package/dist/esm/execution/inference-client.js +13 -5
  50. package/dist/esm/execution/ui-control-client-dependency-builder.js +1 -1
  51. package/dist/esm/execution/ui-control-client.d.ts +50 -0
  52. package/dist/esm/execution/ui-control-client.js +61 -3
  53. package/dist/esm/lib/interactive_cli/create-example-project.d.ts +1 -0
  54. package/dist/esm/lib/interactive_cli/create-example-project.js +20 -3
  55. package/dist/esm/main.d.ts +1 -0
  56. package/dist/esm/main.js +1 -0
  57. package/dist/esm/utils/base_64_image/base-64-image.d.ts +2 -1
  58. package/dist/esm/utils/base_64_image/base-64-image.js +17 -2
  59. package/dist/example_projects_templates/configs/vscode-settings.json +41 -0
  60. package/package.json +2 -1
@@ -33,6 +33,7 @@ class InferenceClient {
33
33
  ? (0, url_join_1.default)(versionedBaseUrl, 'workspaces', workspaceId)
34
34
  : versionedBaseUrl;
35
35
  this.urls = {
36
+ actEndpoint: (0, url_join_1.default)(url, 'act', 'inference'),
36
37
  inference: (0, url_join_1.default)(url, 'inference'),
37
38
  isImageRequired: (0, url_join_1.default)(url, 'instruction', 'is-image-required'),
38
39
  vqaInference: (0, url_join_1.default)(url, 'vqa', 'inference'),
@@ -74,7 +75,7 @@ class InferenceClient {
74
75
  instruction,
75
76
  modelComposition: modelComposition.length > 0 ? modelComposition : this.modelComposition,
76
77
  });
77
- InferenceClient.logMetaInformation(response);
78
+ InferenceClient.logMetaInformation(response.headers);
78
79
  return ui_control_commands_1.InferenceResponse.fromJson(response.body, resizedImage.resizeRatio, image);
79
80
  });
80
81
  }
@@ -85,13 +86,13 @@ class InferenceClient {
85
86
  image,
86
87
  prompt,
87
88
  });
88
- InferenceClient.logMetaInformation(response);
89
+ InferenceClient.logMetaInformation(response.headers);
89
90
  return response.body;
90
91
  });
91
92
  }
92
- static logMetaInformation(response) {
93
- if (response.headers['askui-usage-warnings'] !== undefined) {
94
- logger_1.logger.warn(response.headers['askui-usage-warnings']);
93
+ static logMetaInformation(headers) {
94
+ if (headers['askui-usage-warnings'] !== undefined) {
95
+ logger_1.logger.warn(headers['askui-usage-warnings']);
95
96
  }
96
97
  }
97
98
  predictControlCommand(instruction_1, modelComposition_1) {
@@ -134,5 +135,12 @@ class InferenceClient {
134
135
  return response;
135
136
  });
136
137
  }
138
+ predictActResponse(params) {
139
+ return __awaiter(this, void 0, void 0, function* () {
140
+ const response = yield this.httpClient.post(this.urls.actEndpoint, params);
141
+ InferenceClient.logMetaInformation(response.headers);
142
+ return response.body;
143
+ });
144
+ }
137
145
  }
138
146
  exports.InferenceClient = InferenceClient;
@@ -65,7 +65,7 @@ class UiControlClientDependencyBuilder {
65
65
  onLocationNotExist: (_d = (_c = clientArgs.aiElementArgs) === null || _c === void 0 ? void 0 : _c.onLocationNotExist) !== null && _d !== void 0 ? _d : 'error',
66
66
  }, context: {
67
67
  isCi: (_f = (_e = clientArgs.context) === null || _e === void 0 ? void 0 : _e.isCi) !== null && _f !== void 0 ? _f : is_ci_1.default,
68
- }, credentials: (0, read_credentials_1.readCredentials)(clientArgs), inferenceServerApiVersion: (_g = clientArgs.inferenceServerApiVersion) !== null && _g !== void 0 ? _g : 'v3', inferenceServerUrl: (_h = clientArgs.inferenceServerUrl) !== null && _h !== void 0 ? _h : 'https://inference.askui.com', proxyAgents: (_j = clientArgs.proxyAgents) !== null && _j !== void 0 ? _j : (yield (0, proxy_builder_1.envProxyAgents)()), uiControllerUrl: (_k = clientArgs.uiControllerUrl) !== null && _k !== void 0 ? _k : 'http://127.0.0.1:6769' });
68
+ }, credentials: (0, read_credentials_1.readCredentials)(clientArgs), inferenceServerApiVersion: (_g = clientArgs.inferenceServerApiVersion) !== null && _g !== void 0 ? _g : 'v1', inferenceServerUrl: (_h = clientArgs.inferenceServerUrl) !== null && _h !== void 0 ? _h : 'https://inference.askui.com', proxyAgents: (_j = clientArgs.proxyAgents) !== null && _j !== void 0 ? _j : (yield (0, proxy_builder_1.envProxyAgents)()), uiControllerUrl: (_k = clientArgs.uiControllerUrl) !== null && _k !== void 0 ? _k : 'http://127.0.0.1:6769' });
69
69
  });
70
70
  }
71
71
  }
@@ -5,6 +5,7 @@ import { AnnotationRequest } from '../core/model/annotation-result/annotation-in
5
5
  import { DetectedElement } from '../core/model/annotation-result/detected-element';
6
6
  import { ClientArgs } from './ui-controller-client-interface';
7
7
  import { ModelCompositionBranch } from './model-composition-branch';
8
+ import { AskUIAgent, AgentHistory } from '../core/models/anthropic';
8
9
  export type RelationsForConvenienceMethods = 'nearestTo' | 'leftOf' | 'above' | 'rightOf' | 'below' | 'contains';
9
10
  export type TextMatchingOption = 'similar' | 'exact' | 'regex';
10
11
  export type ElementExistsQueryType = 'otherElement' | 'switch' | 'element' | 'container' | 'checkbox' | 'element' | 'button' | 'table' | 'text' | 'icon' | 'image' | 'textfield';
@@ -33,6 +34,7 @@ export declare class UiControlClient extends ApiCommands {
33
34
  private executionRuntime;
34
35
  private stepReporter;
35
36
  private aiElementArgs;
37
+ agent: AskUIAgent;
36
38
  private constructor();
37
39
  static build(clientArgs?: ClientArgs): Promise<UiControlClient>;
38
40
  /**
@@ -473,4 +475,52 @@ export declare class UiControlClient extends ApiCommands {
473
475
  * @returns {ExpectAllExistResult.elements} - ExpectExistenceElement[].
474
476
  */
475
477
  expectAllExist(query: ElementExistsQuery[]): Promise<ExpectAllExistResult>;
478
+ /**
479
+ * Instructs the agent to achieve a specified goal through autonomous actions.
480
+ *
481
+ * The agent will analyze the screen, determine necessary steps, and perform actions
482
+ * to accomplish the goal. This may include clicking, typing, scrolling, and other
483
+ * interface interactions.
484
+ *
485
+ * The `options` parameter allows the caller to maintain contextual continuity across
486
+ * multiple `act` calls, either from the same or different agent interfaces.
487
+ *
488
+ * **Examples:**
489
+ *
490
+ * ```ts
491
+ * // Use chatId to maintain context across consecutive steps
492
+ * await aui.act("Search online for the current gold price", {
493
+ * chatId: "session-gold-price"
494
+ * });
495
+ * await aui.act("Create a new text file and type the gold price result into it", {
496
+ * chatId: "session-gold-price"
497
+ * });
498
+ *
499
+ * // Share history explicitly between separate agents (e.g., desktop and Android)
500
+ * // By default, the agent operates as a computer agent.
501
+ * // To control an Android device, you must configure it explicitly:
502
+ * await auiAndroid.agent.configureAsAndroidAgent();
503
+ * const history = await auiDesktop.act("Copy username from desktop app");
504
+ * await auiAndroid.act("Paste username into the mobile login screen", {
505
+ * agentHistory: history
506
+ * });
507
+ * ```
508
+ *
509
+ * @param {string} goal - A description of what the agent should achieve.
510
+ * @param {Object} [options] - Optional parameters to maintain or share context.
511
+ * @param {string} [options.chatId] - A session identifier used to persist memory between
512
+ * consecutive `act` calls. When multiple actions share the
513
+ * same `chatId`, the agent retains knowledge of prior steps,
514
+ * such as extracted data or navigation history.
515
+ * @param {AgentHistory} [options.agentHistory] - A shared interaction history object that can be
516
+ * passed between different agent clients (e.g., between
517
+ * `auiDesktop` and `auiAndroid`) to ensure continuity
518
+ * of understanding and task flow.
519
+ * @returns {Promise<AgentHistory>} - Updated action history after executing the goal.
520
+ * @throws {Error} If the agent is not connected when the method is called.
521
+ */
522
+ act(goal: string, options?: {
523
+ chatId?: string;
524
+ agentHistory?: AgentHistory;
525
+ }): Promise<AgentHistory>;
476
526
  }
@@ -18,13 +18,15 @@ const logger_1 = require("../lib/logger");
18
18
  const ui_control_client_dependency_builder_1 = require("./ui-control-client-dependency-builder");
19
19
  const ai_element_collection_1 = require("../core/ai-element/ai-element-collection");
20
20
  const retry_strategies_1 = require("./retry-strategies");
21
+ const anthropic_1 = require("../core/models/anthropic");
21
22
  class UiControlClient extends dsl_1.ApiCommands {
22
- constructor(workspaceId, executionRuntime, stepReporter, aiElementArgs) {
23
+ constructor(workspaceId, executionRuntime, stepReporter, aiElementArgs, agent) {
23
24
  super();
24
25
  this.workspaceId = workspaceId;
25
26
  this.executionRuntime = executionRuntime;
26
27
  this.stepReporter = stepReporter;
27
28
  this.aiElementArgs = aiElementArgs;
29
+ this.agent = agent;
28
30
  this.secretText = undefined;
29
31
  }
30
32
  static build() {
@@ -32,7 +34,8 @@ class UiControlClient extends dsl_1.ApiCommands {
32
34
  const builder = ui_control_client_dependency_builder_1.UiControlClientDependencyBuilder;
33
35
  const clientArgsWithDefaults = yield builder.getClientArgsWithDefaults(clientArgs);
34
36
  const { workspaceId, executionRuntime, stepReporter, } = yield builder.build(clientArgsWithDefaults);
35
- return new UiControlClient(workspaceId, executionRuntime, stepReporter, clientArgsWithDefaults.aiElementArgs);
37
+ const agent = new anthropic_1.AskUIAgent(executionRuntime);
38
+ return new UiControlClient(workspaceId, executionRuntime, stepReporter, clientArgsWithDefaults.aiElementArgs, agent);
36
39
  });
37
40
  }
38
41
  /**
@@ -40,7 +43,10 @@ class UiControlClient extends dsl_1.ApiCommands {
40
43
  */
41
44
  connect() {
42
45
  return __awaiter(this, void 0, void 0, function* () {
43
- return this.executionRuntime.connect();
46
+ const connectionState = yield this.executionRuntime.connect();
47
+ yield this.agent.initializeOsAgentHandler();
48
+ yield this.agent.configureAsDesktopAgent();
49
+ return connectionState;
44
50
  });
45
51
  }
46
52
  /**
@@ -732,5 +738,57 @@ class UiControlClient extends dsl_1.ApiCommands {
732
738
  };
733
739
  });
734
740
  }
741
+ /**
742
+ * Instructs the agent to achieve a specified goal through autonomous actions.
743
+ *
744
+ * The agent will analyze the screen, determine necessary steps, and perform actions
745
+ * to accomplish the goal. This may include clicking, typing, scrolling, and other
746
+ * interface interactions.
747
+ *
748
+ * The `options` parameter allows the caller to maintain contextual continuity across
749
+ * multiple `act` calls, either from the same or different agent interfaces.
750
+ *
751
+ * **Examples:**
752
+ *
753
+ * ```ts
754
+ * // Use chatId to maintain context across consecutive steps
755
+ * await aui.act("Search online for the current gold price", {
756
+ * chatId: "session-gold-price"
757
+ * });
758
+ * await aui.act("Create a new text file and type the gold price result into it", {
759
+ * chatId: "session-gold-price"
760
+ * });
761
+ *
762
+ * // Share history explicitly between separate agents (e.g., desktop and Android)
763
+ * // By default, the agent operates as a computer agent.
764
+ * // To control an Android device, you must configure it explicitly:
765
+ * await auiAndroid.agent.configureAsAndroidAgent();
766
+ * const history = await auiDesktop.act("Copy username from desktop app");
767
+ * await auiAndroid.act("Paste username into the mobile login screen", {
768
+ * agentHistory: history
769
+ * });
770
+ * ```
771
+ *
772
+ * @param {string} goal - A description of what the agent should achieve.
773
+ * @param {Object} [options] - Optional parameters to maintain or share context.
774
+ * @param {string} [options.chatId] - A session identifier used to persist memory between
775
+ * consecutive `act` calls. When multiple actions share the
776
+ * same `chatId`, the agent retains knowledge of prior steps,
777
+ * such as extracted data or navigation history.
778
+ * @param {AgentHistory} [options.agentHistory] - A shared interaction history object that can be
779
+ * passed between different agent clients (e.g., between
780
+ * `auiDesktop` and `auiAndroid`) to ensure continuity
781
+ * of understanding and task flow.
782
+ * @returns {Promise<AgentHistory>} - Updated action history after executing the goal.
783
+ * @throws {Error} If the agent is not connected when the method is called.
784
+ */
785
+ act(goal, options) {
786
+ return __awaiter(this, void 0, void 0, function* () {
787
+ if (!this.agent.isConnected()) {
788
+ throw new Error('Agent is not connected, Please call connect() first');
789
+ }
790
+ return this.agent.act(goal, options);
791
+ });
792
+ }
735
793
  }
736
794
  exports.UiControlClient = UiControlClient;
@@ -20,6 +20,7 @@ export declare class CreateExampleProject {
20
20
  private static installTestFrameworkPackages;
21
21
  private copyESLintConfigFiles;
22
22
  private copyGitignore;
23
+ private addVSCodeSettings;
23
24
  private copyTsConfigFile;
24
25
  createExampleProject(): Promise<void>;
25
26
  }
@@ -176,6 +176,10 @@ class CreateExampleProject {
176
176
  title: 'Add eslint run command',
177
177
  task: () => __awaiter(this, void 0, void 0, function* () { return this.addESLintRunCommand(); }),
178
178
  },
179
+ {
180
+ title: 'Add vscode settings',
181
+ task: () => __awaiter(this, void 0, void 0, function* () { return this.addVSCodeSettings(); }),
182
+ },
179
183
  ]);
180
184
  }),
181
185
  }];
@@ -235,23 +239,36 @@ class CreateExampleProject {
235
239
  ];
236
240
  });
237
241
  }
242
+ addVSCodeSettings() {
243
+ return __awaiter(this, void 0, void 0, function* () {
244
+ const vscodeSettingsFilePath = path_1.default.join('example_projects_templates', 'configs', 'vscode-settings.json');
245
+ const vscodeSettingsTargetDirPath = path_1.default.join(this.projectRootDirectoryPath, '.vscode');
246
+ const vscodeSettingsTargetFilePath = path_1.default.join(vscodeSettingsTargetDirPath, 'settings.json');
247
+ return [{
248
+ enabled: () => !fs_extra_1.default.existsSync(vscodeSettingsTargetFilePath),
249
+ task: () => __awaiter(this, void 0, void 0, function* () {
250
+ yield fs_extra_1.default.mkdir(vscodeSettingsTargetDirPath, { recursive: true });
251
+ yield fs_extra_1.default.copyFile(path_1.default.join((0, path_2.getPathToNodeModulesRoot)(), vscodeSettingsFilePath), vscodeSettingsTargetFilePath);
252
+ }),
253
+ title: 'Copy VSCode settings',
254
+ }];
255
+ });
256
+ }
238
257
  copyTsConfigFile() {
239
258
  return __awaiter(this, void 0, void 0, function* () {
240
259
  const tsConfigFilePath = path_1.default.join('example_projects_templates', 'typescript', 'tsconfig.json');
241
260
  const tsConfigTargetFilePath = path_1.default.join(this.projectRootDirectoryPath, 'tsconfig.json');
242
- /* eslint-disable sort-keys */
243
261
  return [
244
262
  {
245
- title: 'Copy ts config file',
246
263
  enabled: () => this.cliOptions.typescriptConfig || !fs_extra_1.default.existsSync(tsConfigTargetFilePath),
247
264
  task: () => __awaiter(this, void 0, void 0, function* () {
248
265
  return fs_extra_1.default.copyFile(path_1.default.join((0, path_2.getPathToNodeModulesRoot)(), tsConfigFilePath), tsConfigTargetFilePath);
249
266
  }),
267
+ title: 'Copy ts config file',
250
268
  },
251
269
  ];
252
270
  });
253
271
  }
254
- /* eslint-enable */
255
272
  createExampleProject() {
256
273
  return __awaiter(this, void 0, void 0, function* () {
257
274
  const tasks = new listr_1.default();
@@ -4,3 +4,4 @@ export { Instruction, Reporter, ReporterConfig, Snapshot, SnapshotDetailLevel, S
4
4
  export { Annotation } from './core/annotation/annotation';
5
5
  export { DetectedElement } from './core/model/annotation-result/detected-element';
6
6
  export { LogLevels } from './shared';
7
+ export { ToolFailure, ToolError, BaseAgentTool } from './core/models/anthropic';
package/dist/cjs/main.js CHANGED
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.LogLevels = exports.DetectedElement = exports.Annotation = exports.UiController = void 0;
17
+ exports.BaseAgentTool = exports.ToolError = exports.ToolFailure = exports.LogLevels = exports.DetectedElement = exports.Annotation = exports.UiController = void 0;
18
18
  var lib_1 = require("./lib");
19
19
  Object.defineProperty(exports, "UiController", { enumerable: true, get: function () { return lib_1.UiController; } });
20
20
  __exportStar(require("./execution"), exports);
@@ -24,3 +24,7 @@ var detected_element_1 = require("./core/model/annotation-result/detected-elemen
24
24
  Object.defineProperty(exports, "DetectedElement", { enumerable: true, get: function () { return detected_element_1.DetectedElement; } });
25
25
  var shared_1 = require("./shared");
26
26
  Object.defineProperty(exports, "LogLevels", { enumerable: true, get: function () { return shared_1.LogLevels; } });
27
+ var anthropic_1 = require("./core/models/anthropic");
28
+ Object.defineProperty(exports, "ToolFailure", { enumerable: true, get: function () { return anthropic_1.ToolFailure; } });
29
+ Object.defineProperty(exports, "ToolError", { enumerable: true, get: function () { return anthropic_1.ToolError; } });
30
+ Object.defineProperty(exports, "BaseAgentTool", { enumerable: true, get: function () { return anthropic_1.BaseAgentTool; } });
@@ -11,5 +11,6 @@ export declare class Base64Image {
11
11
  private getSharp;
12
12
  getInfo(): Promise<sharp.OutputInfo>;
13
13
  resizeToFitInto(dimension: number): Promise<Base64Image>;
14
- toString(): string;
14
+ resizeWithSameAspectRatio(width: number, height: number): Promise<Base64Image>;
15
+ toString(withPrefix?: boolean): string;
15
16
  }
@@ -77,8 +77,23 @@ class Base64Image {
77
77
  return Base64Image.fromBuffer(buffer);
78
78
  });
79
79
  }
80
- toString() {
81
- return `${Base64Image.strPrefix}${this.buffer.toString('base64')}`;
80
+ resizeWithSameAspectRatio(width, height) {
81
+ return __awaiter(this, void 0, void 0, function* () {
82
+ const buffer = yield (yield this.getSharp())
83
+ .resize({
84
+ fit: 'contain',
85
+ height,
86
+ width,
87
+ })
88
+ .toBuffer();
89
+ return Base64Image.fromBuffer(buffer);
90
+ });
91
+ }
92
+ toString(withPrefix = true) {
93
+ if (withPrefix) {
94
+ return `${Base64Image.strPrefix}${this.buffer.toString('base64')}`;
95
+ }
96
+ return this.buffer.toString('base64');
82
97
  }
83
98
  }
84
99
  exports.Base64Image = Base64Image;
@@ -0,0 +1,13 @@
1
+ import { ClaudeAgent } from './claude-agent';
2
+ import { ExecutionRuntime } from '../../../execution/execution-runtime';
3
+ export declare class AskUIAgent extends ClaudeAgent {
4
+ private osAgentHandler;
5
+ private executionRuntime;
6
+ constructor(executionRuntime: ExecutionRuntime);
7
+ isConnected(): boolean;
8
+ initializeOsAgentHandler(): Promise<void>;
9
+ configureAsDesktopAgent(): Promise<void>;
10
+ configureAsAndroidAgent(): Promise<void>;
11
+ private static DesktopSystemPrompt;
12
+ private static AndroidSystemPrompt;
13
+ }
@@ -0,0 +1,191 @@
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { DesktopKeyPressSequenceTool, DesktopSingleKeyPressTool, MouseClickTool, MouseMoveTool, MouseScrollTool, OsAgentHandler, ScreenShotTool, TypeTool, AgentErrorTool, AndroidSequenceKeyPressTool, AndroidSingleKeyPressTool, ExecuteShellCommandTool, } from './tools/os-agent-tools';
11
+ import { ClaudeAgent } from './claude-agent';
12
+ export class AskUIAgent extends ClaudeAgent {
13
+ constructor(executionRuntime) {
14
+ super((params) => executionRuntime.predictActResponse(params));
15
+ this.osAgentHandler = undefined;
16
+ this.executionRuntime = executionRuntime;
17
+ }
18
+ isConnected() {
19
+ return this.osAgentHandler !== undefined;
20
+ }
21
+ initializeOsAgentHandler() {
22
+ return __awaiter(this, void 0, void 0, function* () {
23
+ this.osAgentHandler = yield OsAgentHandler.createInstance(this.executionRuntime);
24
+ });
25
+ }
26
+ configureAsDesktopAgent() {
27
+ return __awaiter(this, void 0, void 0, function* () {
28
+ if (!this.osAgentHandler) {
29
+ throw new Error('Agent OS client is not connected');
30
+ }
31
+ const tools = [
32
+ new AgentErrorTool(),
33
+ new ScreenShotTool(this.osAgentHandler),
34
+ new MouseMoveTool(this.osAgentHandler),
35
+ new MouseClickTool(this.osAgentHandler),
36
+ new MouseScrollTool(this.osAgentHandler),
37
+ new DesktopKeyPressSequenceTool(this.osAgentHandler),
38
+ new DesktopSingleKeyPressTool(this.osAgentHandler),
39
+ new TypeTool(this.osAgentHandler),
40
+ ];
41
+ this.setTools(tools);
42
+ this.setSystemPrompt(AskUIAgent.DesktopSystemPrompt);
43
+ });
44
+ }
45
+ configureAsAndroidAgent() {
46
+ return __awaiter(this, void 0, void 0, function* () {
47
+ if (!this.osAgentHandler) {
48
+ throw new Error('Agent OS client is not connected');
49
+ }
50
+ const tools = [
51
+ new AgentErrorTool(),
52
+ new ScreenShotTool(this.osAgentHandler),
53
+ new MouseMoveTool(this.osAgentHandler),
54
+ new MouseClickTool(this.osAgentHandler),
55
+ new MouseScrollTool(this.osAgentHandler),
56
+ new AndroidSingleKeyPressTool(this.osAgentHandler),
57
+ new AndroidSequenceKeyPressTool(this.osAgentHandler),
58
+ new TypeTool(this.osAgentHandler),
59
+ new ExecuteShellCommandTool(this.osAgentHandler),
60
+ ];
61
+ this.setTools(tools);
62
+ this.setSystemPrompt(AskUIAgent.AndroidSystemPrompt);
63
+ });
64
+ }
65
+ }
66
+ AskUIAgent.DesktopSystemPrompt = `
67
+ <SYSTEM_CAPABILITY>
68
+ You are an autonomous AI assistant operating on a ${process.platform} machine with ${process.arch} architecture. You have full access to the system and internet connectivity.
69
+ Your main goal is to mimic a human user interacting with a desktop computer. So you should try to use the tools in a way that a human would use a mouse and keyboard to interact with a computer.
70
+
71
+ Key Capabilities:
72
+ * Full system control through mouse and keyboard interactions
73
+ * Screen capture and analysis
74
+ * Web browser automation and navigation
75
+ * File system access and manipulation
76
+ * PDF document handling and text extraction
77
+ * Error handling and recovery mechanisms
78
+
79
+ Available Tools:
80
+ * Mouse control (move, click, scroll)
81
+ * Keyboard input (single keys, key combinations, typing)
82
+ * Screen capture and analysis
83
+ * Error reporting and recovery
84
+
85
+ Current Date: ${new Date().toUTCString()} UTC
86
+ </SYSTEM_CAPABILITY>
87
+
88
+ <OPERATIONAL_GUIDELINES>
89
+ 1. Autonomous Operation:
90
+ * Work independently to achieve user goals
91
+ * Make informed decisions based on available information
92
+ * Chain multiple actions efficiently when possible
93
+ * Verify results after each significant action
94
+
95
+ 2. Web Interaction:
96
+ * Launch appropriate browser if not already open
97
+ * Ensure full page visibility through zoom or scrolling
98
+ * Handle browser-specific behaviors (e.g., Firefox startup wizard)
99
+ * Extract and process PDF content when encountered
100
+
101
+ 3. Error Handling:
102
+ * Detect and analyze failure points
103
+ * Implement appropriate recovery strategies
104
+ * Report issues with clear diagnostic information
105
+ * Use the error tool when stuck or unable to proceed
106
+
107
+ 4. Performance Optimization:
108
+ * Batch related actions when possible
109
+ * Minimize unnecessary screen captures
110
+ * Use efficient navigation patterns
111
+ * Maintain context between actions
112
+
113
+ 5. Safety and Validation:
114
+ * Verify coordinates are within screen bounds
115
+ * Validate input parameters before execution
116
+ * Ensure proper cleanup after operations
117
+ * Maintain system stability
118
+ </OPERATIONAL_GUIDELINES>
119
+
120
+ <IMPORTANT_NOTES>
121
+ * When you are stuck or unable to proceed, use the error tool to raise an error.
122
+ * Always verify tool availability before use
123
+ * Use screenshots strategically for state analysis
124
+ * Report issues promptly with clear diagnostic information
125
+ * Maintain awareness of screen boundaries and coordinate validity
126
+ * Adapt to unexpected situations with appropriate fallback strategies
127
+ </IMPORTANT_NOTES>
128
+ `;
129
+ AskUIAgent.AndroidSystemPrompt = `
130
+ <SYSTEM_CAPABILITY>
131
+ You are an autonomous AI assistant operating on an Android device via ADB. The host machine is ${process.platform} with ${process.arch} architecture and internet connectivity.
132
+ Your main goal is to mimic a human user interacting with an Android device. So you should try to use the tools in a way that a human would use a touch screen to interact with an Android device.
133
+
134
+ Key Capabilities:
135
+ * Full Android device control through ADB
136
+ * Screen capture and analysis
137
+ * Touch input simulation
138
+ * Android-specific key events
139
+ * Error handling and recovery mechanisms
140
+
141
+ Available Tools:
142
+ * Touch control (click, swipe, scroll)
143
+ * Android key events (single and sequence)
144
+ * Screen capture and analysis
145
+ * Error reporting and recovery
146
+
147
+ Current Date: ${new Date().toUTCString()} UTC
148
+ </SYSTEM_CAPABILITY>
149
+
150
+ <OPERATIONAL_GUIDELINES>
151
+ 1. Autonomous Operation:
152
+ * Work independently to achieve user goals
153
+ * Make informed decisions based on available information
154
+ * Chain multiple actions efficiently when possible
155
+ * Verify results after each significant action
156
+
157
+ 2. Screen Interaction:
158
+ * Analyze screen state before interactions
159
+ * Use appropriate input methods (touch, keys)
160
+ * Handle dynamic UI elements effectively
161
+ * Implement efficient navigation patterns
162
+
163
+ 3. Error Handling:
164
+ * Detect and analyze failure points
165
+ * Implement appropriate recovery strategies
166
+ * Report issues with clear diagnostic information
167
+ * Use the error tool when stuck or unable to proceed
168
+
169
+ 4. Performance Optimization:
170
+ * Batch related actions when possible
171
+ * Minimize unnecessary screen captures
172
+ * Use efficient navigation patterns
173
+ * Maintain context between actions
174
+
175
+ 5. Safety and Validation:
176
+ * Verify coordinates are within screen bounds
177
+ * Validate input parameters before execution
178
+ * Ensure proper cleanup after operations
179
+ * Maintain device stability
180
+ </OPERATIONAL_GUIDELINES>
181
+
182
+ <IMPORTANT_NOTES>
183
+ * When you are stuck or unable to proceed, use the error tool to raise an error.
184
+ * Always verify tool availability before use
185
+ * Use screenshots strategically for state analysis
186
+ * Ensure all coordinates are integers and within screen bounds
187
+ * Handle permission issues and device state appropriately
188
+ * Report issues promptly with clear diagnostic information
189
+ * Adapt to unexpected situations with appropriate fallback strategies
190
+ </IMPORTANT_NOTES>
191
+ `;
@@ -0,0 +1,40 @@
1
+ import { Beta } from '@anthropic-ai/sdk/resources';
2
+ import { BaseAgentTool } from './tools/base';
3
+ type PredictActResponseFunction = (params: {
4
+ max_tokens: number;
5
+ messages: Beta.BetaMessageParam[];
6
+ model: string;
7
+ system?: string;
8
+ tools?: any[];
9
+ betas?: string[];
10
+ }) => Promise<Beta.BetaMessage>;
11
+ export declare class ClaudeAgent {
12
+ private predictActResponseFunction;
13
+ private maxTokens;
14
+ private onlyNMostRecentImages;
15
+ private imageTruncationThreshold;
16
+ private systemPrompt;
17
+ private model;
18
+ private betas;
19
+ private _toolCollection;
20
+ private tools;
21
+ private history;
22
+ constructor(predictActResponseFunction: PredictActResponseFunction);
23
+ setTools(tools: BaseAgentTool[]): void;
24
+ addTool(tool: BaseAgentTool): void;
25
+ listToolNames(): string[];
26
+ removeToolByName(toolName: string): void;
27
+ setSystemPrompt(systemPrompt: string): void;
28
+ private IsConfigured;
29
+ private get toolCollection();
30
+ private setHistory;
31
+ private getHistory;
32
+ act(goal: string, options?: {
33
+ chatId?: string;
34
+ agentHistory?: Beta.BetaMessageParam[];
35
+ }): Promise<Beta.BetaMessageParam[]>;
36
+ private makeApiToolResult;
37
+ private maybePrependSystemToolResult;
38
+ private static filterNMostRecentImages;
39
+ }
40
+ export {};