askui 0.26.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/cjs/core/ai-element/ai-element-collection.d.ts +1 -0
  2. package/dist/cjs/core/ai-element/ai-element-collection.js +3 -0
  3. package/dist/cjs/core/models/anthropic/askui-agent.d.ts +2 -0
  4. package/dist/cjs/core/models/anthropic/askui-agent.js +7 -0
  5. package/dist/cjs/core/models/anthropic/claude-agent.js +1 -1
  6. package/dist/cjs/core/models/anthropic/index.d.ts +1 -1
  7. package/dist/cjs/core/models/anthropic/tools/askui-api-tools.d.ts +19 -0
  8. package/dist/cjs/core/models/anthropic/tools/askui-api-tools.js +81 -0
  9. package/dist/cjs/core/models/anthropic/tools/base.d.ts +2 -0
  10. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +11 -1
  11. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +54 -10
  12. package/dist/cjs/core/runner-protocol/request/get-starting-arguments-request.d.ts +5 -0
  13. package/dist/cjs/core/runner-protocol/request/get-starting-arguments-request.js +10 -0
  14. package/dist/cjs/core/runner-protocol/request/index.d.ts +1 -0
  15. package/dist/cjs/core/runner-protocol/request/index.js +3 -1
  16. package/dist/cjs/core/runner-protocol/response/index.d.ts +7 -0
  17. package/dist/cjs/execution/execution-runtime.d.ts +1 -0
  18. package/dist/cjs/execution/execution-runtime.js +6 -0
  19. package/dist/cjs/execution/ui-control-client.d.ts +76 -0
  20. package/dist/cjs/execution/ui-control-client.js +161 -6
  21. package/dist/cjs/execution/ui-controller-client.d.ts +2 -1
  22. package/dist/cjs/execution/ui-controller-client.js +3 -0
  23. package/dist/cjs/lib/interactive_cli/create-example-project.js +1 -1
  24. package/dist/cjs/main.d.ts +1 -1
  25. package/dist/esm/core/ai-element/ai-element-collection.d.ts +1 -0
  26. package/dist/esm/core/ai-element/ai-element-collection.js +3 -0
  27. package/dist/esm/core/models/anthropic/askui-agent.d.ts +2 -0
  28. package/dist/esm/core/models/anthropic/askui-agent.js +8 -1
  29. package/dist/esm/core/models/anthropic/claude-agent.js +1 -1
  30. package/dist/esm/core/models/anthropic/index.d.ts +1 -1
  31. package/dist/esm/core/models/anthropic/index.js +1 -1
  32. package/dist/esm/core/models/anthropic/tools/askui-api-tools.d.ts +19 -0
  33. package/dist/esm/core/models/anthropic/tools/askui-api-tools.js +76 -0
  34. package/dist/esm/core/models/anthropic/tools/base.d.ts +2 -0
  35. package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +11 -1
  36. package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +52 -9
  37. package/dist/esm/core/runner-protocol/request/get-starting-arguments-request.d.ts +5 -0
  38. package/dist/esm/core/runner-protocol/request/get-starting-arguments-request.js +6 -0
  39. package/dist/esm/core/runner-protocol/request/index.d.ts +1 -0
  40. package/dist/esm/core/runner-protocol/request/index.js +1 -0
  41. package/dist/esm/core/runner-protocol/response/index.d.ts +7 -0
  42. package/dist/esm/execution/execution-runtime.d.ts +1 -0
  43. package/dist/esm/execution/execution-runtime.js +6 -0
  44. package/dist/esm/execution/ui-control-client.d.ts +76 -0
  45. package/dist/esm/execution/ui-control-client.js +161 -6
  46. package/dist/esm/execution/ui-controller-client.d.ts +2 -1
  47. package/dist/esm/execution/ui-controller-client.js +4 -1
  48. package/dist/esm/lib/interactive_cli/create-example-project.js +1 -1
  49. package/dist/esm/main.d.ts +1 -1
  50. package/dist/esm/main.js +1 -1
  51. package/package.json +1 -1
@@ -19,6 +19,7 @@ const ui_control_client_dependency_builder_1 = require("./ui-control-client-depe
19
19
  const ai_element_collection_1 = require("../core/ai-element/ai-element-collection");
20
20
  const retry_strategies_1 = require("./retry-strategies");
21
21
  const anthropic_1 = require("../core/models/anthropic");
22
+ const askui_api_tools_1 = require("../core/models/anthropic/tools/askui-api-tools");
22
23
  class UiControlClient extends dsl_1.ApiCommands {
23
24
  constructor(workspaceId, executionRuntime, stepReporter, aiElementArgs, agent) {
24
25
  super();
@@ -82,6 +83,22 @@ class UiControlClient extends dsl_1.ApiCommands {
82
83
  return (this.stepReporter.config.withDetectedElements === 'onFailure' && error !== undefined)
83
84
  || (this.stepReporter.config.withDetectedElements === 'always');
84
85
  }
86
+ beforeNoneInferenceCallCommandExecution(instruction) {
87
+ return __awaiter(this, void 0, void 0, function* () {
88
+ this.stepReporter.resetStep(instruction);
89
+ let annotation;
90
+ if (this.stepReporter.config.withDetectedElements === 'begin'
91
+ || this.stepReporter.config.withDetectedElements === 'always') {
92
+ annotation = yield this.executionRuntime.annotateImage();
93
+ }
94
+ const createdAt = new Date();
95
+ yield this.stepReporter.onStepBegin({
96
+ createdAt,
97
+ detectedElements: annotation === null || annotation === void 0 ? void 0 : annotation.detected_elements,
98
+ screenshot: annotation === null || annotation === void 0 ? void 0 : annotation.image,
99
+ });
100
+ });
101
+ }
85
102
  afterCommandExecution(instruction, error) {
86
103
  return __awaiter(this, void 0, void 0, function* () {
87
104
  var _a;
@@ -151,7 +168,7 @@ class UiControlClient extends dsl_1.ApiCommands {
151
168
  ]);
152
169
  logger_1.logger.debug(instruction);
153
170
  try {
154
- yield this.stepReporter.resetStep(instruction);
171
+ this.stepReporter.resetStep(instruction);
155
172
  yield this.executionRuntime.executeInstruction(instruction, modelComposition);
156
173
  yield this.afterCommandExecution(instruction);
157
174
  return yield Promise.resolve();
@@ -340,10 +357,14 @@ class UiControlClient extends dsl_1.ApiCommands {
340
357
  // eslint-disable-next-line class-methods-use-this
341
358
  waitFor(delayInMs) {
342
359
  return {
343
- exec() {
344
- logger_1.logger.debug(`Wait for ${delayInMs} ms`);
345
- return new Promise((resolve) => { setTimeout(() => resolve(), delayInMs); });
346
- },
360
+ exec: () => __awaiter(this, void 0, void 0, function* () {
361
+ const stepTitle = `Wait for ${delayInMs} ms`;
362
+ const instruction = yield this.buildInstruction(stepTitle, []);
363
+ yield this.beforeNoneInferenceCallCommandExecution(instruction);
364
+ yield new Promise((resolve) => { setTimeout(resolve, delayInMs); });
365
+ yield this.afterCommandExecution(instruction);
366
+ return Promise.resolve();
367
+ }),
347
368
  };
348
369
  }
349
370
  /**
@@ -738,12 +759,146 @@ class UiControlClient extends dsl_1.ApiCommands {
738
759
  };
739
760
  });
740
761
  }
762
+ /**
763
+ * Holds down a key on the keyboard.
764
+ *
765
+ * **Examples:**
766
+ * ```typescript
767
+ * await aui.keyDown('a').exec();
768
+ * ```
769
+ *
770
+ * @param {PC_AND_MODIFIER_KEY} key - The key to hold down.
771
+ */
772
+ keyDown(key) {
773
+ return {
774
+ exec: () => __awaiter(this, void 0, void 0, function* () {
775
+ const stepTitle = `Hold down key ${key}`;
776
+ const instruction = yield this.buildInstruction(stepTitle, []);
777
+ try {
778
+ yield this.beforeNoneInferenceCallCommandExecution(instruction);
779
+ yield this.agent.getOsAgentHandler().desktopKeyHoldDown(key, []);
780
+ yield this.afterCommandExecution(instruction);
781
+ }
782
+ catch (error) {
783
+ yield this.afterCommandExecution(instruction, error instanceof Error ? error : new Error(String(error)));
784
+ return Promise.reject(error);
785
+ }
786
+ return Promise.resolve();
787
+ }),
788
+ };
789
+ }
790
+ /**
791
+ * Releases a key up that was previously held down.
792
+ *
793
+ * **Examples:**
794
+ * ```typescript
795
+ * await aui.keyUp('a').exec();
796
+ * ```
797
+ *
798
+ * @param {PC_AND_MODIFIER_KEY} key - The key to release up.
799
+ */
800
+ keyUp(key) {
801
+ return {
802
+ exec: () => __awaiter(this, void 0, void 0, function* () {
803
+ const stepTitle = `Release key ${key}`;
804
+ const instruction = yield this.buildInstruction(stepTitle, []);
805
+ try {
806
+ yield this.beforeNoneInferenceCallCommandExecution(instruction);
807
+ yield this.agent.getOsAgentHandler().desktopKeyRelease(key, []);
808
+ yield this.afterCommandExecution(instruction);
809
+ }
810
+ catch (error) {
811
+ yield this.afterCommandExecution(instruction, error instanceof Error ? error : new Error(String(error)));
812
+ return Promise.reject(error);
813
+ }
814
+ return Promise.resolve();
815
+ }),
816
+ };
817
+ }
741
818
  act(goal, imageOrOptions, options) {
742
819
  return __awaiter(this, void 0, void 0, function* () {
743
820
  if (typeof imageOrOptions === 'string') {
744
821
  return this.agent.act(goal, imageOrOptions, options);
745
822
  }
746
- return this.agent.act(goal, undefined, imageOrOptions);
823
+ const fullTitle = `Act: ${goal}`;
824
+ const stepTitle = fullTitle.length > 50 ? `${fullTitle.substring(0, 47)}...` : fullTitle;
825
+ const instruction = yield this.buildInstruction(stepTitle, []);
826
+ try {
827
+ yield this.beforeNoneInferenceCallCommandExecution(instruction);
828
+ const result = yield this.agent.act(goal, undefined, imageOrOptions);
829
+ yield this.afterCommandExecution(instruction);
830
+ return result;
831
+ }
832
+ catch (error) {
833
+ yield this.afterCommandExecution(instruction, error instanceof Error ? error : new Error(String(error)));
834
+ return Promise.reject(error);
835
+ }
836
+ });
837
+ }
838
+ /**
839
+ * Adds tools to the agent that allow it to interact with AI elements.
840
+ *
841
+ * @returns {Promise<void>} - A promise that resolves when the tools are added to the agent.
842
+ */
843
+ addAIElementsToolsToAgent() {
844
+ return __awaiter(this, void 0, void 0, function* () {
845
+ const aiElementLocator = (aiElementName) => this.get().aiElement(aiElementName).exec();
846
+ const askUIGetAskUIElementTool = new askui_api_tools_1.AskUIGetAskUIElementTool(this.agent.getOsAgentHandler(), aiElementLocator, 'aiElement');
847
+ this.agent.addTool(askUIGetAskUIElementTool);
848
+ const listAIElementNamesFunction = () => (ai_element_collection_1.AIElementCollection.collectAIElements(this.workspaceId, this.aiElementArgs)).then((aiElementCollection) => aiElementCollection.getNames());
849
+ const askUIListAIElementTool = new askui_api_tools_1.AskUIListAIElementTool(listAIElementNamesFunction);
850
+ this.agent.addTool(askUIListAIElementTool);
851
+ });
852
+ }
853
+ /**
854
+ * Retrieves the starting arguments used when the controller server was initialized.
855
+ *
856
+ * Useful for debugging, logging, or verifying the current server configuration.
857
+ *
858
+ * @property {string} displayNum - Display number controlled by the controller
859
+ * @property {boolean} minimize - Whether controller starts minimized
860
+ * @property {string} runtime - Runtime type ("desktop" or "android")
861
+ * @property {number} port - Communication port
862
+ * @property {number} actionWaitTime - Action wait time
863
+ * @property {string} host - Host address
864
+ * @property {string} logFile - Log file path
865
+ * @property {boolean} hideOverlay - Whether overlay is hidden
866
+ * @property {boolean} debugDraw - Whether debug drawing is enabled
867
+ * @property {string} deviceId - Android device ID
868
+ * @property {string} configFile - Configuration file path
869
+ * @property {string} logLevel - Logging level
870
+ *
871
+ * @example
872
+ * ```typescript
873
+ * const startingArguments = await aui.getControllerStartingArguments();
874
+ * console.log(startingArguments);
875
+ * // Output example:
876
+ * // {
877
+ * // displayNum: 0,
878
+ * // minimize: true,
879
+ * // runtime: 'desktop',
880
+ * // port: 5000,
881
+ * // actionWaitTime: 1000,
882
+ * // host: '127.0.0.1',
883
+ * // logFile: '/tmp/askui/askui-server.log',
884
+ * // hideOverlay: false,
885
+ * // debugDraw: false,
886
+ * // deviceId: 'emulator-5554',
887
+ * // configFile: '/tmp/askui/askui-config.json',
888
+ * // logLevel: 'info',
889
+ * // }
890
+ * ```
891
+ *
892
+ * @example Retrieving Android device ID:
893
+ * ```typescript
894
+ * const startingArguments = await aui.getControllerStartingArguments();
895
+ * console.log(startingArguments.deviceId);
896
+ * // Output example: "emulator-5554"
897
+ * ```
898
+ */
899
+ getControllerStartingArguments() {
900
+ return __awaiter(this, void 0, void 0, function* () {
901
+ return this.executionRuntime.getStartingArguments();
747
902
  });
748
903
  }
749
904
  }
@@ -1,6 +1,6 @@
1
1
  import WebSocket from 'ws';
2
2
  import { DetectedElement } from '../core/model/annotation-result/detected-element';
3
- import { CaptureScreenshotResponse, ControlResponse, StartRecordingResponse, StopRecordingResponse, ReadRecordingPartResponse, InteractiveAnnotationResponse, GetProcessPidResponse } from '../core/runner-protocol';
3
+ import { CaptureScreenshotResponse, ControlResponse, StartRecordingResponse, StopRecordingResponse, ReadRecordingPartResponse, InteractiveAnnotationResponse, GetProcessPidResponse, GetStartingArgumentsResponse } from '../core/runner-protocol';
4
4
  import { ControlCommand } from '../core/ui-control-commands';
5
5
  import { UiControllerClientConnectionState } from './ui-controller-client-connection-state';
6
6
  export declare class UiControllerClient {
@@ -22,6 +22,7 @@ export declare class UiControllerClient {
22
22
  private sendAndReceive;
23
23
  private send;
24
24
  requestScreenshot(): Promise<CaptureScreenshotResponse>;
25
+ getStartingArguments(): Promise<GetStartingArgumentsResponse>;
25
26
  getServerPid(): Promise<GetProcessPidResponse>;
26
27
  startVideoRecording(): Promise<StartRecordingResponse>;
27
28
  stopVideoRecording(): Promise<StopRecordingResponse>;
@@ -101,6 +101,9 @@ class UiControllerClient {
101
101
  requestScreenshot() {
102
102
  return this.sendAndReceive(new runner_protocol_1.CaptureScreenshotRequest());
103
103
  }
104
+ getStartingArguments() {
105
+ return this.sendAndReceive(new runner_protocol_1.GetStartingArgumentsRequest());
106
+ }
104
107
  getServerPid() {
105
108
  return this.sendAndReceive(new runner_protocol_1.GetProcessPidRequest());
106
109
  }
@@ -185,7 +185,7 @@ class CreateExampleProject {
185
185
  return __awaiter(this, void 0, void 0, function* () {
186
186
  const runCommand = (0, util_1.promisify)(child_process_1.exec);
187
187
  const frameworkDependencies = {
188
- jest: 'npm i -D @askui/askui-reporters typescript ts-node @types/jest ts-jest jest @askui/jest-allure-circus eslint @typescript-eslint/parser @typescript-eslint/eslint-plugin eslint-plugin-import @askui/eslint-plugin-askui hpagent',
188
+ jest: 'npm i -D @askui/askui-reporters typescript ts-node @types/jest@30.0.0 ts-jest@29.4.0 jest@29.7.0 @askui/jest-allure-circus eslint @typescript-eslint/parser @typescript-eslint/eslint-plugin eslint-plugin-import @askui/eslint-plugin-askui hpagent',
189
189
  };
190
190
  yield runCommand(frameworkDependencies.jest);
191
191
  });
@@ -4,4 +4,4 @@ export { Instruction, Reporter, ReporterConfig, Snapshot, SnapshotDetailLevel, S
4
4
  export { Annotation } from './core/annotation/annotation';
5
5
  export { DetectedElement } from './core/model/annotation-result/detected-element';
6
6
  export { LogLevels } from './shared';
7
- export { ToolFailure, ToolError, BaseAgentTool } from './core/models/anthropic';
7
+ export { ToolFailure, ToolError, BaseAgentTool, BetaTool, ToolResult, } from './core/models/anthropic';
@@ -8,5 +8,6 @@ export declare class AIElementCollection {
8
8
  static collectAIElements(workspaceId: string | undefined, aiElementArgs: AIElementArgs): Promise<AIElementCollection>;
9
9
  getByName(name: string): CustomElementJson[];
10
10
  getByNames(names: string[]): CustomElementJson[];
11
+ getNames(): string[];
11
12
  private static CollectAiElementsFromLocation;
12
13
  }
@@ -67,6 +67,9 @@ export class AIElementCollection {
67
67
  }
68
68
  return names.flatMap((name) => this.getByName(name));
69
69
  }
70
+ getNames() {
71
+ return [...new Set(this.elements.map((element) => element.name))];
72
+ }
70
73
  static CollectAiElementsFromLocation(aiElementLocation) {
71
74
  const files = fs.readdirSync(aiElementLocation);
72
75
  if (files.length === 0) {
@@ -1,3 +1,4 @@
1
+ import { OsAgentHandler } from './tools/os-agent-tools';
1
2
  import { ClaudeAgent } from './claude-agent';
2
3
  import { ExecutionRuntime } from '../../../execution/execution-runtime';
3
4
  export declare class AskUIAgent extends ClaudeAgent {
@@ -6,6 +7,7 @@ export declare class AskUIAgent extends ClaudeAgent {
6
7
  constructor(executionRuntime: ExecutionRuntime);
7
8
  isConnected(): boolean;
8
9
  initializeOsAgentHandler(): Promise<void>;
10
+ getOsAgentHandler(): OsAgentHandler;
9
11
  configureAsDesktopAgent(): Promise<void>;
10
12
  configureAsAndroidAgent(): Promise<void>;
11
13
  private static DesktopSystemPrompt;
@@ -7,7 +7,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
7
7
  step((generator = generator.apply(thisArg, _arguments || [])).next());
8
8
  });
9
9
  };
10
- import { DesktopPressAndReleaseKeysTool, MouseClickTool, MouseMoveTool, MouseScrollTool, OsAgentHandler, ScreenShotTool, TypeTool, AgentErrorTool, AndroidSequenceKeyPressTool, AndroidSingleKeyPressTool, ExecuteShellCommandTool, DesktopKeyHoldDownTool, DesktopKeyReleaseTool, MouseReleaseLeftButtonTool, MouseHoldLeftButtonDownTool, MouseDragAndDropTool, WaitTool, } from './tools/os-agent-tools';
10
+ import { DesktopPressAndReleaseKeysTool, MouseClickTool, MouseMoveTool, MouseScrollTool, OsAgentHandler, ScreenShotTool, TypeTool, AgentErrorTool, AndroidSequenceKeyPressTool, AndroidSingleKeyPressTool, ExecuteShellCommandTool, DesktopKeyHoldDownTool, DesktopKeyReleaseTool, MouseReleaseLeftButtonTool, MouseHoldLeftButtonDownTool, MouseDragAndDropTool, WaitTool, PrintTool, } from './tools/os-agent-tools';
11
11
  import { ClaudeAgent } from './claude-agent';
12
12
  export class AskUIAgent extends ClaudeAgent {
13
13
  constructor(executionRuntime) {
@@ -23,6 +23,12 @@ export class AskUIAgent extends ClaudeAgent {
23
23
  this.osAgentHandler = yield OsAgentHandler.createInstance(this.executionRuntime);
24
24
  });
25
25
  }
26
+ getOsAgentHandler() {
27
+ if (!this.osAgentHandler) {
28
+ throw new Error('Agent OS client is not connected');
29
+ }
30
+ return this.osAgentHandler;
31
+ }
26
32
  configureAsDesktopAgent() {
27
33
  return __awaiter(this, void 0, void 0, function* () {
28
34
  if (!this.osAgentHandler) {
@@ -30,6 +36,7 @@ export class AskUIAgent extends ClaudeAgent {
30
36
  }
31
37
  const tools = [
32
38
  new AgentErrorTool(),
39
+ new PrintTool(),
33
40
  new ScreenShotTool(this.osAgentHandler),
34
41
  new MouseMoveTool(this.osAgentHandler),
35
42
  new MouseClickTool(this.osAgentHandler),
@@ -23,7 +23,7 @@ export class ClaudeAgent {
23
23
  this.tools = [];
24
24
  this.history = {};
25
25
  this.toolChoice = {
26
- type: 'any',
26
+ type: 'auto',
27
27
  };
28
28
  }
29
29
  setToolChoice(toolChoice) {
@@ -1,3 +1,3 @@
1
1
  export { AskUIAgent } from './askui-agent';
2
- export { ToolFailure, ToolError, BaseAgentTool } from './tools/base';
2
+ export { ToolFailure, ToolError, BaseAgentTool, BetaTool, ToolResult, } from './tools/base';
3
3
  export { AgentHistory, ActOptions } from './claude-agent';
@@ -1,2 +1,2 @@
1
1
  export { AskUIAgent } from './askui-agent';
2
- export { ToolFailure, ToolError, BaseAgentTool } from './tools/base';
2
+ export { ToolFailure, ToolError, BaseAgentTool, } from './tools/base';
@@ -0,0 +1,19 @@
1
+ import { DetectedElement } from '../../../model/annotation-result/detected-element';
2
+ import { BaseAgentTool, ToolResult, BetaTool } from './base';
3
+ import { OsAgentHandler } from './os-agent-tools';
4
+ export declare class AskUIGetAskUIElementTool extends BaseAgentTool {
5
+ private osAgentHandler;
6
+ private locatorFunction;
7
+ private elementType;
8
+ constructor(osAgentHandler: OsAgentHandler, locatorFunction: (aiElementName: string) => Promise<DetectedElement[]>, elementType: string);
9
+ execute(params: {
10
+ elementName: string;
11
+ }): Promise<ToolResult>;
12
+ toParams(): BetaTool;
13
+ }
14
+ export declare class AskUIListAIElementTool extends BaseAgentTool {
15
+ private listFunction;
16
+ constructor(listFunction: () => Promise<string[]>);
17
+ execute(): Promise<ToolResult>;
18
+ toParams(): BetaTool;
19
+ }
@@ -0,0 +1,76 @@
1
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
2
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
3
+ return new (P || (P = Promise))(function (resolve, reject) {
4
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
5
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
6
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
7
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
8
+ });
9
+ };
10
+ import { BaseAgentTool } from './base';
11
+ export class AskUIGetAskUIElementTool extends BaseAgentTool {
12
+ constructor(osAgentHandler, locatorFunction, elementType) {
13
+ super();
14
+ this.osAgentHandler = osAgentHandler;
15
+ this.locatorFunction = locatorFunction;
16
+ this.elementType = elementType;
17
+ }
18
+ execute(params) {
19
+ return __awaiter(this, void 0, void 0, function* () {
20
+ const { elementName } = params;
21
+ const detectedElements = yield this.locatorFunction(elementName);
22
+ const scaledElementsBoundingBoxes = detectedElements.map((element) => {
23
+ const xMid = (element.bndbox.xmin + element.bndbox.xmax) / 2;
24
+ const yMid = (element.bndbox.ymin + element.bndbox.ymax) / 2;
25
+ const [x, y] = this.osAgentHandler.scaleCoordinates('computer', xMid, yMid);
26
+ return {
27
+ x, y,
28
+ };
29
+ });
30
+ return {
31
+ output: `Found ${scaledElementsBoundingBoxes.length} elements of type ${this.elementType}. center coordinates: ${JSON.stringify(scaledElementsBoundingBoxes)}`,
32
+ };
33
+ });
34
+ }
35
+ toParams() {
36
+ return {
37
+ description: `Locates and retrieves the bounding box coordinates of AskUI ${this.elementType} elements on the screen. ` +
38
+ `This tool is essential for UI automation as it provides the exact pixel coordinates needed to interact with UI elements. ` +
39
+ `The coordinates returned can be used for clicking, hovering, or other mouse interactions. ` +
40
+ `Use this tool when you need to find and interact with specific ${this.elementType} UI elements by their semantic names.`,
41
+ input_schema: {
42
+ properties: {
43
+ elementName: {
44
+ type: 'string',
45
+ description: `The semantic name or identifier of the ${this.elementType} element to locate on the screen. `
46
+ },
47
+ },
48
+ required: ['elementName'],
49
+ type: 'object',
50
+ },
51
+ name: `get_askui_${this.elementType}_element_tool`,
52
+ };
53
+ }
54
+ }
55
+ export class AskUIListAIElementTool extends BaseAgentTool {
56
+ constructor(listFunction) {
57
+ super();
58
+ this.listFunction = listFunction;
59
+ }
60
+ execute() {
61
+ return __awaiter(this, void 0, void 0, function* () {
62
+ const elementNames = yield this.listFunction();
63
+ return {
64
+ output: `Found ${elementNames.length} element names that can be used to retrieve bounding boxes. Names: ${JSON.stringify(elementNames)}`,
65
+ };
66
+ });
67
+ }
68
+ toParams() {
69
+ return {
70
+ description: 'Retrieves a comprehensive list of all valid AskUI AI element names that can be used for element location and interaction. ' +
71
+ 'The returned names can be used as input for the get_askui_aiElement_element_tool to locate specific ai elements. ',
72
+ input_schema: { type: 'object', properties: {}, required: [] },
73
+ name: 'list_ai_element_names_tool',
74
+ };
75
+ }
76
+ }
@@ -1,3 +1,5 @@
1
+ import { BetaTool as AnthropicBetaTool } from '@anthropic-ai/sdk/resources/beta/messages/messages';
2
+ export type BetaTool = AnthropicBetaTool;
1
3
  export interface ToolResult {
2
4
  output?: string;
3
5
  error?: string;
@@ -24,7 +24,7 @@ export declare class OsAgentHandler {
24
24
  };
25
25
  setTargetResolution(width: number, height: number): void;
26
26
  takeScreenshot(): Promise<string>;
27
- private scaleCoordinates;
27
+ scaleCoordinates(source: 'api' | 'computer', x: number, y: number): [number, number];
28
28
  requestControl(controlCommand: ControlCommand): Promise<void>;
29
29
  mouseMove(x: number, y: number): Promise<void>;
30
30
  mouseClick(button: "left" | "right" | "middle", doubleClick: boolean): Promise<void>;
@@ -35,6 +35,9 @@ export declare class OsAgentHandler {
35
35
  desktopKeyHoldDown(key: PC_AND_MODIFIER_KEY, modifiers?: MODIFIER_KEY[]): Promise<void>;
36
36
  desktopKeyRelease(key: PC_AND_MODIFIER_KEY, modifiers?: MODIFIER_KEY[]): Promise<void>;
37
37
  typeText(text: string): Promise<void>;
38
+ androidKeyPress(key: ANDROID_KEY): Promise<void>;
39
+ androidKeySequencePress(keys: ANDROID_KEY[]): Promise<void>;
40
+ executeShellCommand(command: string): Promise<void>;
38
41
  }
39
42
  export declare class ScreenShotTool extends BaseAgentTool {
40
43
  private osAgentHandler;
@@ -165,3 +168,10 @@ export declare class WaitTool extends BaseAgentTool {
165
168
  }): Promise<ToolResult>;
166
169
  toParams(): BetaTool;
167
170
  }
171
+ export declare class PrintTool extends BaseAgentTool {
172
+ constructor();
173
+ execute(command: {
174
+ text: string;
175
+ }): Promise<ToolResult>;
176
+ toParams(): BetaTool;
177
+ }
@@ -202,6 +202,24 @@ export class OsAgentHandler {
202
202
  yield this.requestControl(controlCommand);
203
203
  });
204
204
  }
205
+ androidKeyPress(key) {
206
+ return __awaiter(this, void 0, void 0, function* () {
207
+ const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_ANDROID_SINGLE_KEY, { x: 0, y: 0 }, key, {})]);
208
+ yield this.requestControl(controlCommand);
209
+ });
210
+ }
211
+ androidKeySequencePress(keys) {
212
+ return __awaiter(this, void 0, void 0, function* () {
213
+ const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_ANDROID_KEY_SEQUENCE, { x: 0, y: 0 }, keys.join(' '), {})]);
214
+ yield this.requestControl(controlCommand);
215
+ });
216
+ }
217
+ executeShellCommand(command) {
218
+ return __awaiter(this, void 0, void 0, function* () {
219
+ const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.EXECUTE_COMMAND, { x: 0, y: 0 }, command, {})]);
220
+ yield this.requestControl(controlCommand);
221
+ });
222
+ }
205
223
  }
206
224
  export class ScreenShotTool extends BaseAgentTool {
207
225
  constructor(osAgentHandler) {
@@ -542,8 +560,7 @@ export class TypeTool extends BaseAgentTool {
542
560
  }
543
561
  execute(command) {
544
562
  return __awaiter(this, void 0, void 0, function* () {
545
- const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.TYPE, { x: 0, y: 0 }, command.text, {})]);
546
- yield this.osAgentHandler.requestControl(controlCommand);
563
+ yield this.osAgentHandler.typeText(command.text);
547
564
  return {
548
565
  output: `Typed text: ${command.text}`,
549
566
  };
@@ -573,8 +590,7 @@ export class AndroidSingleKeyPressTool extends BaseAgentTool {
573
590
  }
574
591
  execute(command) {
575
592
  return __awaiter(this, void 0, void 0, function* () {
576
- const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.key, {})]);
577
- yield this.osAgentHandler.requestControl(controlCommand);
593
+ yield this.osAgentHandler.androidKeyPress(command.key);
578
594
  return {
579
595
  output: `Pressed Android key ${command.key}`,
580
596
  };
@@ -605,8 +621,7 @@ export class AndroidSequenceKeyPressTool extends BaseAgentTool {
605
621
  }
606
622
  execute(command) {
607
623
  return __awaiter(this, void 0, void 0, function* () {
608
- const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.PRESS_KEY_SEQUENCE, { x: 0, y: 0 }, command.keys.join(' '), {})]);
609
- yield this.osAgentHandler.requestControl(controlCommand);
624
+ yield this.osAgentHandler.androidKeySequencePress(command.keys);
610
625
  return {
611
626
  output: `Pressed Android keys: ${command.keys.join(', ')}`,
612
627
  };
@@ -645,7 +660,7 @@ export class AgentErrorTool extends BaseAgentTool {
645
660
  toParams() {
646
661
  return {
647
662
  name: 'agent_error_tool',
648
- description: 'Raises an error in the agent',
663
+ description: 'Intentionally raises an error to signal that the agent cannot proceed with the current task. Use this when the agent encounters an unsolvable problem, gets stuck in a loop, or needs to communicate a critical failure that prevents further automation.',
649
664
  input_schema: {
650
665
  type: 'object',
651
666
  properties: {
@@ -666,8 +681,7 @@ export class ExecuteShellCommandTool extends BaseAgentTool {
666
681
  }
667
682
  execute(command) {
668
683
  return __awaiter(this, void 0, void 0, function* () {
669
- const controlCommand = new ControlCommand(ControlCommandCode.OK, [new Action(InputEvent.EXECUTE_COMMAND, { x: 0, y: 0 }, command.command, {})]);
670
- yield this.osAgentHandler.requestControl(controlCommand);
684
+ yield this.osAgentHandler.executeShellCommand(command.command);
671
685
  return {
672
686
  output: `Executed shell command: ${command.command}`,
673
687
  };
@@ -719,3 +733,32 @@ export class WaitTool extends BaseAgentTool {
719
733
  };
720
734
  }
721
735
  }
736
+ export class PrintTool extends BaseAgentTool {
737
+ constructor() {
738
+ super();
739
+ }
740
+ execute(command) {
741
+ return __awaiter(this, void 0, void 0, function* () {
742
+ console.log(command.text);
743
+ return {
744
+ output: `Printed text: ${command.text}`,
745
+ };
746
+ });
747
+ }
748
+ toParams() {
749
+ return {
750
+ name: 'print_tool',
751
+ description: 'Outputs text to the console for debugging, status updates, or user communication. Useful for providing feedback about automation progress, errors, or important information during test execution.',
752
+ input_schema: {
753
+ type: 'object',
754
+ properties: {
755
+ text: {
756
+ type: 'string',
757
+ description: 'The text to output to the console.',
758
+ },
759
+ },
760
+ required: ['text'],
761
+ },
762
+ };
763
+ }
764
+ }
@@ -0,0 +1,5 @@
1
+ import { RunnerProtocolRequest } from './runner-protocol-request';
2
+ export declare class GetStartingArgumentsRequest implements RunnerProtocolRequest {
3
+ static msgName: string;
4
+ msgName: string;
5
+ }
@@ -0,0 +1,6 @@
1
+ export class GetStartingArgumentsRequest {
2
+ constructor() {
3
+ this.msgName = GetStartingArgumentsRequest.msgName;
4
+ }
5
+ }
6
+ GetStartingArgumentsRequest.msgName = 'GET_STARTING_ARGUMENTS_REQUEST';
@@ -6,3 +6,4 @@ export { RunnerProtocolRequest } from './runner-protocol-request';
6
6
  export { StartRecordingRequest } from './start-recording-request';
7
7
  export { StopRecordingRequest } from './stop-recording-request';
8
8
  export { GetProcessPidRequest } from './get-server-process-pid';
9
+ export { GetStartingArgumentsRequest } from './get-starting-arguments-request';
@@ -5,3 +5,4 @@ export { ReadRecordingRequest } from './read-recording-request';
5
5
  export { StartRecordingRequest } from './start-recording-request';
6
6
  export { StopRecordingRequest } from './stop-recording-request';
7
7
  export { GetProcessPidRequest } from './get-server-process-pid';
8
+ export { GetStartingArgumentsRequest } from './get-starting-arguments-request';
@@ -100,3 +100,10 @@ export interface GetProcessPidResponse {
100
100
  };
101
101
  msgName: 'GET_PROCESS_PID_RESPONSE';
102
102
  }
103
+ export interface GetStartingArgumentsResponse {
104
+ data: {
105
+ error?: string;
106
+ arguments: Record<string, string>;
107
+ };
108
+ msgName: 'GET_STARTING_ARGUMENTS_RESPONSE';
109
+ }
@@ -37,6 +37,7 @@ export declare class ExecutionRuntime {
37
37
  private predictCommand;
38
38
  annotateInteractively(): Promise<void>;
39
39
  takeScreenshotIfImageisNotProvided(imagePath?: string): Promise<string>;
40
+ getStartingArguments(): Promise<Record<string, string | number | boolean>>;
40
41
  getDetectedElements(instruction: string, customElementJson?: CustomElementJson[]): Promise<DetectedElement[]>;
41
42
  annotateImage(imagePath?: string, customElementJson?: CustomElementJson[], elements?: DetectedElement[]): Promise<Annotation>;
42
43
  predictVQA(prompt: string, config?: object): Promise<any>;
@@ -180,6 +180,12 @@ export class ExecutionRuntime {
180
180
  return base64Image;
181
181
  });
182
182
  }
183
+ getStartingArguments() {
184
+ return __awaiter(this, void 0, void 0, function* () {
185
+ const startingArgumentsResponse = yield this.uiControllerClient.getStartingArguments();
186
+ return startingArgumentsResponse.data.arguments;
187
+ });
188
+ }
183
189
  getDetectedElements(instruction, customElementJson) {
184
190
  return __awaiter(this, void 0, void 0, function* () {
185
191
  let customElements = [];