askui 0.26.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/cjs/core/ai-element/ai-element-collection.d.ts +1 -0
  2. package/dist/cjs/core/ai-element/ai-element-collection.js +3 -0
  3. package/dist/cjs/core/models/anthropic/askui-agent.d.ts +2 -0
  4. package/dist/cjs/core/models/anthropic/askui-agent.js +7 -0
  5. package/dist/cjs/core/models/anthropic/claude-agent.js +1 -1
  6. package/dist/cjs/core/models/anthropic/index.d.ts +1 -1
  7. package/dist/cjs/core/models/anthropic/tools/askui-api-tools.d.ts +19 -0
  8. package/dist/cjs/core/models/anthropic/tools/askui-api-tools.js +81 -0
  9. package/dist/cjs/core/models/anthropic/tools/base.d.ts +2 -0
  10. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +11 -1
  11. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +54 -10
  12. package/dist/cjs/core/runner-protocol/request/get-starting-arguments-request.d.ts +5 -0
  13. package/dist/cjs/core/runner-protocol/request/get-starting-arguments-request.js +10 -0
  14. package/dist/cjs/core/runner-protocol/request/index.d.ts +1 -0
  15. package/dist/cjs/core/runner-protocol/request/index.js +3 -1
  16. package/dist/cjs/core/runner-protocol/response/index.d.ts +7 -0
  17. package/dist/cjs/execution/execution-runtime.d.ts +1 -0
  18. package/dist/cjs/execution/execution-runtime.js +6 -0
  19. package/dist/cjs/execution/ui-control-client.d.ts +76 -0
  20. package/dist/cjs/execution/ui-control-client.js +161 -6
  21. package/dist/cjs/execution/ui-controller-client.d.ts +2 -1
  22. package/dist/cjs/execution/ui-controller-client.js +3 -0
  23. package/dist/cjs/lib/interactive_cli/create-example-project.js +1 -1
  24. package/dist/cjs/main.d.ts +1 -1
  25. package/dist/esm/core/ai-element/ai-element-collection.d.ts +1 -0
  26. package/dist/esm/core/ai-element/ai-element-collection.js +3 -0
  27. package/dist/esm/core/models/anthropic/askui-agent.d.ts +2 -0
  28. package/dist/esm/core/models/anthropic/askui-agent.js +8 -1
  29. package/dist/esm/core/models/anthropic/claude-agent.js +1 -1
  30. package/dist/esm/core/models/anthropic/index.d.ts +1 -1
  31. package/dist/esm/core/models/anthropic/index.js +1 -1
  32. package/dist/esm/core/models/anthropic/tools/askui-api-tools.d.ts +19 -0
  33. package/dist/esm/core/models/anthropic/tools/askui-api-tools.js +76 -0
  34. package/dist/esm/core/models/anthropic/tools/base.d.ts +2 -0
  35. package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +11 -1
  36. package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +52 -9
  37. package/dist/esm/core/runner-protocol/request/get-starting-arguments-request.d.ts +5 -0
  38. package/dist/esm/core/runner-protocol/request/get-starting-arguments-request.js +6 -0
  39. package/dist/esm/core/runner-protocol/request/index.d.ts +1 -0
  40. package/dist/esm/core/runner-protocol/request/index.js +1 -0
  41. package/dist/esm/core/runner-protocol/response/index.d.ts +7 -0
  42. package/dist/esm/execution/execution-runtime.d.ts +1 -0
  43. package/dist/esm/execution/execution-runtime.js +6 -0
  44. package/dist/esm/execution/ui-control-client.d.ts +76 -0
  45. package/dist/esm/execution/ui-control-client.js +161 -6
  46. package/dist/esm/execution/ui-controller-client.d.ts +2 -1
  47. package/dist/esm/execution/ui-controller-client.js +4 -1
  48. package/dist/esm/lib/interactive_cli/create-example-project.js +1 -1
  49. package/dist/esm/main.d.ts +1 -1
  50. package/dist/esm/main.js +1 -1
  51. package/package.json +1 -1
@@ -55,6 +55,7 @@ export declare class UiControlClient extends ApiCommands {
55
55
  stopVideoRecording(): Promise<void>;
56
56
  readVideoRecording(): Promise<string>;
57
57
  private shouldAnnotateAfterCommandExecution;
58
+ private beforeNoneInferenceCallCommandExecution;
58
59
  private afterCommandExecution;
59
60
  annotate(annotationRequest?: AnnotationRequest): Promise<Annotation>;
60
61
  annotateInteractively(): Promise<void>;
@@ -475,6 +476,28 @@ export declare class UiControlClient extends ApiCommands {
475
476
  * @returns {ExpectAllExistResult.elements} - ExpectExistenceElement[].
476
477
  */
477
478
  expectAllExist(query: ElementExistsQuery[]): Promise<ExpectAllExistResult>;
479
+ /**
480
+ * Holds down a key on the keyboard.
481
+ *
482
+ * **Examples:**
483
+ * ```typescript
484
+ * await aui.keyDown('a').exec();
485
+ * ```
486
+ *
487
+ * @param {PC_AND_MODIFIER_KEY} key - The key to hold down.
488
+ */
489
+ keyDown(key: PC_AND_MODIFIER_KEY): Executable;
490
+ /**
491
+ * Releases a key up that was previously held down.
492
+ *
493
+ * **Examples:**
494
+ * ```typescript
495
+ * await aui.keyUp('a').exec();
496
+ * ```
497
+ *
498
+ * @param {PC_AND_MODIFIER_KEY} key - The key to release up.
499
+ */
500
+ keyUp(key: PC_AND_MODIFIER_KEY): Executable;
478
501
  /**
479
502
  * Instructs the agent to autonomously achieve a specified goal through UI interactions.
480
503
  *
@@ -573,4 +596,57 @@ export declare class UiControlClient extends ApiCommands {
573
596
  */
574
597
  act(goal: string, options?: ActOptions): Promise<AgentHistory>;
575
598
  act(goal: string, imagePathOrBase64String: string, options?: ActOptions): Promise<AgentHistory>;
599
+ /**
600
+ * Adds tools to the agent that allow it to interact with AI elements.
601
+ *
602
+ * @returns {Promise<void>} - A promise that resolves when the tools are added to the agent.
603
+ */
604
+ addAIElementsToolsToAgent(): Promise<void>;
605
+ /**
606
+ * Retrieves the starting arguments used when the controller server was initialized.
607
+ *
608
+ * Useful for debugging, logging, or verifying the current server configuration.
609
+ *
610
+ * @property {string} displayNum - Display number controlled by the controller
611
+ * @property {boolean} minimize - Whether controller starts minimized
612
+ * @property {string} runtime - Runtime type ("desktop" or "android")
613
+ * @property {number} port - Communication port
614
+ * @property {number} actionWaitTime - Action wait time
615
+ * @property {string} host - Host address
616
+ * @property {string} logFile - Log file path
617
+ * @property {boolean} hideOverlay - Whether overlay is hidden
618
+ * @property {boolean} debugDraw - Whether debug drawing is enabled
619
+ * @property {string} deviceId - Android device ID
620
+ * @property {string} configFile - Configuration file path
621
+ * @property {string} logLevel - Logging level
622
+ *
623
+ * @example
624
+ * ```typescript
625
+ * const startingArguments = await aui.getControllerStartingArguments();
626
+ * console.log(startingArguments);
627
+ * // Output example:
628
+ * // {
629
+ * // displayNum: 0,
630
+ * // minimize: true,
631
+ * // runtime: 'desktop',
632
+ * // port: 5000,
633
+ * // actionWaitTime: 1000,
634
+ * // host: '127.0.0.1',
635
+ * // logFile: '/tmp/askui/askui-server.log',
636
+ * // hideOverlay: false,
637
+ * // debugDraw: false,
638
+ * // deviceId: 'emulator-5554',
639
+ * // configFile: '/tmp/askui/askui-config.json',
640
+ * // logLevel: 'info',
641
+ * // }
642
+ * ```
643
+ *
644
+ * @example Retrieving Android device ID:
645
+ * ```typescript
646
+ * const startingArguments = await aui.getControllerStartingArguments();
647
+ * console.log(startingArguments.deviceId);
648
+ * // Output example: "emulator-5554"
649
+ * ```
650
+ */
651
+ getControllerStartingArguments(): Promise<Record<'displayNum' | 'minimize' | 'runtime' | 'port' | 'actionWaitTime' | 'host' | 'logFile' | 'hideOverlay' | 'debugDraw' | 'deviceId' | 'configFile' | 'logLevel', string | number | boolean>>;
576
652
  }
@@ -16,6 +16,7 @@ import { UiControlClientDependencyBuilder } from './ui-control-client-dependency
16
16
  import { AIElementCollection } from '../core/ai-element/ai-element-collection';
17
17
  import { NoRetryStrategy } from './retry-strategies';
18
18
  import { AskUIAgent } from '../core/models/anthropic';
19
+ import { AskUIGetAskUIElementTool, AskUIListAIElementTool } from '../core/models/anthropic/tools/askui-api-tools';
19
20
  export class UiControlClient extends ApiCommands {
20
21
  constructor(workspaceId, executionRuntime, stepReporter, aiElementArgs, agent) {
21
22
  super();
@@ -79,6 +80,22 @@ export class UiControlClient extends ApiCommands {
79
80
  return (this.stepReporter.config.withDetectedElements === 'onFailure' && error !== undefined)
80
81
  || (this.stepReporter.config.withDetectedElements === 'always');
81
82
  }
83
+ beforeNoneInferenceCallCommandExecution(instruction) {
84
+ return __awaiter(this, void 0, void 0, function* () {
85
+ this.stepReporter.resetStep(instruction);
86
+ let annotation;
87
+ if (this.stepReporter.config.withDetectedElements === 'begin'
88
+ || this.stepReporter.config.withDetectedElements === 'always') {
89
+ annotation = yield this.executionRuntime.annotateImage();
90
+ }
91
+ const createdAt = new Date();
92
+ yield this.stepReporter.onStepBegin({
93
+ createdAt,
94
+ detectedElements: annotation === null || annotation === void 0 ? void 0 : annotation.detected_elements,
95
+ screenshot: annotation === null || annotation === void 0 ? void 0 : annotation.image,
96
+ });
97
+ });
98
+ }
82
99
  afterCommandExecution(instruction, error) {
83
100
  return __awaiter(this, void 0, void 0, function* () {
84
101
  var _a;
@@ -148,7 +165,7 @@ export class UiControlClient extends ApiCommands {
148
165
  ]);
149
166
  logger.debug(instruction);
150
167
  try {
151
- yield this.stepReporter.resetStep(instruction);
168
+ this.stepReporter.resetStep(instruction);
152
169
  yield this.executionRuntime.executeInstruction(instruction, modelComposition);
153
170
  yield this.afterCommandExecution(instruction);
154
171
  return yield Promise.resolve();
@@ -337,10 +354,14 @@ export class UiControlClient extends ApiCommands {
337
354
  // eslint-disable-next-line class-methods-use-this
338
355
  waitFor(delayInMs) {
339
356
  return {
340
- exec() {
341
- logger.debug(`Wait for ${delayInMs} ms`);
342
- return new Promise((resolve) => { setTimeout(() => resolve(), delayInMs); });
343
- },
357
+ exec: () => __awaiter(this, void 0, void 0, function* () {
358
+ const stepTitle = `Wait for ${delayInMs} ms`;
359
+ const instruction = yield this.buildInstruction(stepTitle, []);
360
+ yield this.beforeNoneInferenceCallCommandExecution(instruction);
361
+ yield new Promise((resolve) => { setTimeout(resolve, delayInMs); });
362
+ yield this.afterCommandExecution(instruction);
363
+ return Promise.resolve();
364
+ }),
344
365
  };
345
366
  }
346
367
  /**
@@ -735,12 +756,146 @@ export class UiControlClient extends ApiCommands {
735
756
  };
736
757
  });
737
758
  }
759
+ /**
760
+ * Holds down a key on the keyboard.
761
+ *
762
+ * **Examples:**
763
+ * ```typescript
764
+ * await aui.keyDown('a').exec();
765
+ * ```
766
+ *
767
+ * @param {PC_AND_MODIFIER_KEY} key - The key to hold down.
768
+ */
769
+ keyDown(key) {
770
+ return {
771
+ exec: () => __awaiter(this, void 0, void 0, function* () {
772
+ const stepTitle = `Hold down key ${key}`;
773
+ const instruction = yield this.buildInstruction(stepTitle, []);
774
+ try {
775
+ yield this.beforeNoneInferenceCallCommandExecution(instruction);
776
+ yield this.agent.getOsAgentHandler().desktopKeyHoldDown(key, []);
777
+ yield this.afterCommandExecution(instruction);
778
+ }
779
+ catch (error) {
780
+ yield this.afterCommandExecution(instruction, error instanceof Error ? error : new Error(String(error)));
781
+ return Promise.reject(error);
782
+ }
783
+ return Promise.resolve();
784
+ }),
785
+ };
786
+ }
787
+ /**
788
+ * Releases a key up that was previously held down.
789
+ *
790
+ * **Examples:**
791
+ * ```typescript
792
+ * await aui.keyUp('a').exec();
793
+ * ```
794
+ *
795
+ * @param {PC_AND_MODIFIER_KEY} key - The key to release up.
796
+ */
797
+ keyUp(key) {
798
+ return {
799
+ exec: () => __awaiter(this, void 0, void 0, function* () {
800
+ const stepTitle = `Release key ${key}`;
801
+ const instruction = yield this.buildInstruction(stepTitle, []);
802
+ try {
803
+ yield this.beforeNoneInferenceCallCommandExecution(instruction);
804
+ yield this.agent.getOsAgentHandler().desktopKeyRelease(key, []);
805
+ yield this.afterCommandExecution(instruction);
806
+ }
807
+ catch (error) {
808
+ yield this.afterCommandExecution(instruction, error instanceof Error ? error : new Error(String(error)));
809
+ return Promise.reject(error);
810
+ }
811
+ return Promise.resolve();
812
+ }),
813
+ };
814
+ }
738
815
  act(goal, imageOrOptions, options) {
739
816
  return __awaiter(this, void 0, void 0, function* () {
740
817
  if (typeof imageOrOptions === 'string') {
741
818
  return this.agent.act(goal, imageOrOptions, options);
742
819
  }
743
- return this.agent.act(goal, undefined, imageOrOptions);
820
+ const fullTitle = `Act: ${goal}`;
821
+ const stepTitle = fullTitle.length > 50 ? `${fullTitle.substring(0, 47)}...` : fullTitle;
822
+ const instruction = yield this.buildInstruction(stepTitle, []);
823
+ try {
824
+ yield this.beforeNoneInferenceCallCommandExecution(instruction);
825
+ const result = yield this.agent.act(goal, undefined, imageOrOptions);
826
+ yield this.afterCommandExecution(instruction);
827
+ return result;
828
+ }
829
+ catch (error) {
830
+ yield this.afterCommandExecution(instruction, error instanceof Error ? error : new Error(String(error)));
831
+ return Promise.reject(error);
832
+ }
833
+ });
834
+ }
835
+ /**
836
+ * Adds tools to the agent that allow it to interact with AI elements.
837
+ *
838
+ * @returns {Promise<void>} - A promise that resolves when the tools are added to the agent.
839
+ */
840
+ addAIElementsToolsToAgent() {
841
+ return __awaiter(this, void 0, void 0, function* () {
842
+ const aiElementLocator = (aiElementName) => this.get().aiElement(aiElementName).exec();
843
+ const askUIGetAskUIElementTool = new AskUIGetAskUIElementTool(this.agent.getOsAgentHandler(), aiElementLocator, 'aiElement');
844
+ this.agent.addTool(askUIGetAskUIElementTool);
845
+ const listAIElementNamesFunction = () => (AIElementCollection.collectAIElements(this.workspaceId, this.aiElementArgs)).then((aiElementCollection) => aiElementCollection.getNames());
846
+ const askUIListAIElementTool = new AskUIListAIElementTool(listAIElementNamesFunction);
847
+ this.agent.addTool(askUIListAIElementTool);
848
+ });
849
+ }
850
+ /**
851
+ * Retrieves the starting arguments used when the controller server was initialized.
852
+ *
853
+ * Useful for debugging, logging, or verifying the current server configuration.
854
+ *
855
+ * @property {string} displayNum - Display number controlled by the controller
856
+ * @property {boolean} minimize - Whether controller starts minimized
857
+ * @property {string} runtime - Runtime type ("desktop" or "android")
858
+ * @property {number} port - Communication port
859
+ * @property {number} actionWaitTime - Action wait time
860
+ * @property {string} host - Host address
861
+ * @property {string} logFile - Log file path
862
+ * @property {boolean} hideOverlay - Whether overlay is hidden
863
+ * @property {boolean} debugDraw - Whether debug drawing is enabled
864
+ * @property {string} deviceId - Android device ID
865
+ * @property {string} configFile - Configuration file path
866
+ * @property {string} logLevel - Logging level
867
+ *
868
+ * @example
869
+ * ```typescript
870
+ * const startingArguments = await aui.getControllerStartingArguments();
871
+ * console.log(startingArguments);
872
+ * // Output example:
873
+ * // {
874
+ * // displayNum: 0,
875
+ * // minimize: true,
876
+ * // runtime: 'desktop',
877
+ * // port: 5000,
878
+ * // actionWaitTime: 1000,
879
+ * // host: '127.0.0.1',
880
+ * // logFile: '/tmp/askui/askui-server.log',
881
+ * // hideOverlay: false,
882
+ * // debugDraw: false,
883
+ * // deviceId: 'emulator-5554',
884
+ * // configFile: '/tmp/askui/askui-config.json',
885
+ * // logLevel: 'info',
886
+ * // }
887
+ * ```
888
+ *
889
+ * @example Retrieving Android device ID:
890
+ * ```typescript
891
+ * const startingArguments = await aui.getControllerStartingArguments();
892
+ * console.log(startingArguments.deviceId);
893
+ * // Output example: "emulator-5554"
894
+ * ```
895
+ */
896
+ getControllerStartingArguments() {
897
+ return __awaiter(this, void 0, void 0, function* () {
898
+ return this.executionRuntime.getStartingArguments();
744
899
  });
745
900
  }
746
901
  }
@@ -1,6 +1,6 @@
1
1
  import WebSocket from 'ws';
2
2
  import { DetectedElement } from '../core/model/annotation-result/detected-element';
3
- import { CaptureScreenshotResponse, ControlResponse, StartRecordingResponse, StopRecordingResponse, ReadRecordingPartResponse, InteractiveAnnotationResponse, GetProcessPidResponse } from '../core/runner-protocol';
3
+ import { CaptureScreenshotResponse, ControlResponse, StartRecordingResponse, StopRecordingResponse, ReadRecordingPartResponse, InteractiveAnnotationResponse, GetProcessPidResponse, GetStartingArgumentsResponse } from '../core/runner-protocol';
4
4
  import { ControlCommand } from '../core/ui-control-commands';
5
5
  import { UiControllerClientConnectionState } from './ui-controller-client-connection-state';
6
6
  export declare class UiControllerClient {
@@ -22,6 +22,7 @@ export declare class UiControllerClient {
22
22
  private sendAndReceive;
23
23
  private send;
24
24
  requestScreenshot(): Promise<CaptureScreenshotResponse>;
25
+ getStartingArguments(): Promise<GetStartingArgumentsResponse>;
25
26
  getServerPid(): Promise<GetProcessPidResponse>;
26
27
  startVideoRecording(): Promise<StartRecordingResponse>;
27
28
  stopVideoRecording(): Promise<StopRecordingResponse>;
@@ -1,5 +1,5 @@
1
1
  import WebSocket from 'ws';
2
- import { CaptureScreenshotRequest, ControlRequest, StartRecordingRequest, StopRecordingRequest, ReadRecordingRequest, InteractiveAnnotationRequest, GetProcessPidRequest, } from '../core/runner-protocol';
2
+ import { CaptureScreenshotRequest, ControlRequest, StartRecordingRequest, StopRecordingRequest, ReadRecordingRequest, InteractiveAnnotationRequest, GetProcessPidRequest, GetStartingArgumentsRequest, } from '../core/runner-protocol';
3
3
  import { logger } from '../lib/logger';
4
4
  import { UiControllerClientConnectionState } from './ui-controller-client-connection-state';
5
5
  import { ReadRecordingResponseStreamHandler } from './read-recording-response-stream-handler';
@@ -95,6 +95,9 @@ export class UiControllerClient {
95
95
  requestScreenshot() {
96
96
  return this.sendAndReceive(new CaptureScreenshotRequest());
97
97
  }
98
+ getStartingArguments() {
99
+ return this.sendAndReceive(new GetStartingArgumentsRequest());
100
+ }
98
101
  getServerPid() {
99
102
  return this.sendAndReceive(new GetProcessPidRequest());
100
103
  }
@@ -179,7 +179,7 @@ export class CreateExampleProject {
179
179
  return __awaiter(this, void 0, void 0, function* () {
180
180
  const runCommand = promisify(exec);
181
181
  const frameworkDependencies = {
182
- jest: 'npm i -D @askui/askui-reporters typescript ts-node @types/jest ts-jest jest @askui/jest-allure-circus eslint @typescript-eslint/parser @typescript-eslint/eslint-plugin eslint-plugin-import @askui/eslint-plugin-askui hpagent',
182
+ jest: 'npm i -D @askui/askui-reporters typescript ts-node @types/jest@30.0.0 ts-jest@29.4.0 jest@29.7.0 @askui/jest-allure-circus eslint @typescript-eslint/parser @typescript-eslint/eslint-plugin eslint-plugin-import @askui/eslint-plugin-askui hpagent',
183
183
  };
184
184
  yield runCommand(frameworkDependencies.jest);
185
185
  });
@@ -4,4 +4,4 @@ export { Instruction, Reporter, ReporterConfig, Snapshot, SnapshotDetailLevel, S
4
4
  export { Annotation } from './core/annotation/annotation';
5
5
  export { DetectedElement } from './core/model/annotation-result/detected-element';
6
6
  export { LogLevels } from './shared';
7
- export { ToolFailure, ToolError, BaseAgentTool } from './core/models/anthropic';
7
+ export { ToolFailure, ToolError, BaseAgentTool, BetaTool, ToolResult, } from './core/models/anthropic';
package/dist/esm/main.js CHANGED
@@ -3,4 +3,4 @@ export * from './execution';
3
3
  export { Annotation } from './core/annotation/annotation';
4
4
  export { DetectedElement } from './core/model/annotation-result/detected-element';
5
5
  export { LogLevels } from './shared';
6
- export { ToolFailure, ToolError, BaseAgentTool } from './core/models/anthropic';
6
+ export { ToolFailure, ToolError, BaseAgentTool, } from './core/models/anthropic';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "askui",
3
- "version": "0.26.0",
3
+ "version": "0.28.0",
4
4
  "license": "MIT",
5
5
  "author": "askui GmbH <info@askui.com> (http://www.askui.com/)",
6
6
  "description": "Reliable, automated end-to-end-testing that depends on what is shown on your screen instead of the technology you are running on",