askui 0.25.1 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/cjs/core/ai-element/ai-element-collection.d.ts +1 -0
  2. package/dist/cjs/core/ai-element/ai-element-collection.js +3 -0
  3. package/dist/cjs/core/models/anthropic/askui-agent.d.ts +2 -0
  4. package/dist/cjs/core/models/anthropic/askui-agent.js +27 -32
  5. package/dist/cjs/core/models/anthropic/claude-agent.d.ts +16 -4
  6. package/dist/cjs/core/models/anthropic/claude-agent.js +43 -5
  7. package/dist/cjs/core/models/anthropic/index.d.ts +2 -3
  8. package/dist/cjs/core/models/anthropic/tools/askui-api-tools.d.ts +19 -0
  9. package/dist/cjs/core/models/anthropic/tools/askui-api-tools.js +81 -0
  10. package/dist/cjs/core/models/anthropic/tools/base.d.ts +2 -0
  11. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.d.ts +70 -6
  12. package/dist/cjs/core/models/anthropic/tools/os-agent-tools.js +393 -84
  13. package/dist/cjs/core/ui-control-commands/input-event.d.ts +3 -1
  14. package/dist/cjs/core/ui-control-commands/input-event.js +2 -0
  15. package/dist/cjs/execution/execution-runtime.d.ts +4 -0
  16. package/dist/cjs/execution/inference-client.d.ts +4 -0
  17. package/dist/cjs/execution/ui-control-client.d.ts +112 -33
  18. package/dist/cjs/execution/ui-control-client.js +105 -45
  19. package/dist/cjs/lib/interactive_cli/create-example-project.js +1 -1
  20. package/dist/cjs/main.d.ts +1 -1
  21. package/dist/esm/core/ai-element/ai-element-collection.d.ts +1 -0
  22. package/dist/esm/core/ai-element/ai-element-collection.js +3 -0
  23. package/dist/esm/core/models/anthropic/askui-agent.d.ts +2 -0
  24. package/dist/esm/core/models/anthropic/askui-agent.js +28 -33
  25. package/dist/esm/core/models/anthropic/claude-agent.d.ts +16 -4
  26. package/dist/esm/core/models/anthropic/claude-agent.js +43 -5
  27. package/dist/esm/core/models/anthropic/index.d.ts +2 -3
  28. package/dist/esm/core/models/anthropic/index.js +1 -1
  29. package/dist/esm/core/models/anthropic/tools/askui-api-tools.d.ts +19 -0
  30. package/dist/esm/core/models/anthropic/tools/askui-api-tools.js +76 -0
  31. package/dist/esm/core/models/anthropic/tools/base.d.ts +2 -0
  32. package/dist/esm/core/models/anthropic/tools/os-agent-tools.d.ts +70 -6
  33. package/dist/esm/core/models/anthropic/tools/os-agent-tools.js +384 -81
  34. package/dist/esm/core/ui-control-commands/input-event.d.ts +3 -1
  35. package/dist/esm/core/ui-control-commands/input-event.js +2 -0
  36. package/dist/esm/execution/execution-runtime.d.ts +4 -0
  37. package/dist/esm/execution/inference-client.d.ts +4 -0
  38. package/dist/esm/execution/ui-control-client.d.ts +112 -33
  39. package/dist/esm/execution/ui-control-client.js +105 -45
  40. package/dist/esm/lib/interactive_cli/create-example-project.js +1 -1
  41. package/dist/esm/main.d.ts +1 -1
  42. package/dist/esm/main.js +1 -1
  43. package/package.json +2 -2
@@ -8,5 +8,6 @@ export declare class AIElementCollection {
8
8
  static collectAIElements(workspaceId: string | undefined, aiElementArgs: AIElementArgs): Promise<AIElementCollection>;
9
9
  getByName(name: string): CustomElementJson[];
10
10
  getByNames(names: string[]): CustomElementJson[];
11
+ getNames(): string[];
11
12
  private static CollectAiElementsFromLocation;
12
13
  }
@@ -73,6 +73,9 @@ class AIElementCollection {
73
73
  }
74
74
  return names.flatMap((name) => this.getByName(name));
75
75
  }
76
+ getNames() {
77
+ return [...new Set(this.elements.map((element) => element.name))];
78
+ }
76
79
  static CollectAiElementsFromLocation(aiElementLocation) {
77
80
  const files = fs_extra_1.default.readdirSync(aiElementLocation);
78
81
  if (files.length === 0) {
@@ -1,3 +1,4 @@
1
+ import { OsAgentHandler } from './tools/os-agent-tools';
1
2
  import { ClaudeAgent } from './claude-agent';
2
3
  import { ExecutionRuntime } from '../../../execution/execution-runtime';
3
4
  export declare class AskUIAgent extends ClaudeAgent {
@@ -6,6 +7,7 @@ export declare class AskUIAgent extends ClaudeAgent {
6
7
  constructor(executionRuntime: ExecutionRuntime);
7
8
  isConnected(): boolean;
8
9
  initializeOsAgentHandler(): Promise<void>;
10
+ getOsAgentHandler(): OsAgentHandler;
9
11
  configureAsDesktopAgent(): Promise<void>;
10
12
  configureAsAndroidAgent(): Promise<void>;
11
13
  private static DesktopSystemPrompt;
@@ -26,6 +26,12 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
26
26
  this.osAgentHandler = yield os_agent_tools_1.OsAgentHandler.createInstance(this.executionRuntime);
27
27
  });
28
28
  }
29
+ getOsAgentHandler() {
30
+ if (!this.osAgentHandler) {
31
+ throw new Error('Agent OS client is not connected');
32
+ }
33
+ return this.osAgentHandler;
34
+ }
29
35
  configureAsDesktopAgent() {
30
36
  return __awaiter(this, void 0, void 0, function* () {
31
37
  if (!this.osAgentHandler) {
@@ -33,13 +39,19 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
33
39
  }
34
40
  const tools = [
35
41
  new os_agent_tools_1.AgentErrorTool(),
42
+ new os_agent_tools_1.PrintTool(),
36
43
  new os_agent_tools_1.ScreenShotTool(this.osAgentHandler),
37
44
  new os_agent_tools_1.MouseMoveTool(this.osAgentHandler),
38
45
  new os_agent_tools_1.MouseClickTool(this.osAgentHandler),
39
46
  new os_agent_tools_1.MouseScrollTool(this.osAgentHandler),
40
- new os_agent_tools_1.DesktopKeyPressSequenceTool(this.osAgentHandler),
41
- new os_agent_tools_1.DesktopSingleKeyPressTool(this.osAgentHandler),
42
47
  new os_agent_tools_1.TypeTool(this.osAgentHandler),
48
+ new os_agent_tools_1.DesktopPressAndReleaseKeysTool(this.osAgentHandler),
49
+ new os_agent_tools_1.DesktopKeyHoldDownTool(this.osAgentHandler),
50
+ new os_agent_tools_1.DesktopKeyReleaseTool(this.osAgentHandler),
51
+ new os_agent_tools_1.MouseHoldLeftButtonDownTool(this.osAgentHandler),
52
+ new os_agent_tools_1.MouseReleaseLeftButtonTool(this.osAgentHandler),
53
+ new os_agent_tools_1.MouseDragAndDropTool(this.osAgentHandler),
54
+ new os_agent_tools_1.WaitTool(),
43
55
  ];
44
56
  this.setTools(tools);
45
57
  this.setSystemPrompt(AskUIAgent.DesktopSystemPrompt);
@@ -60,6 +72,7 @@ class AskUIAgent extends claude_agent_1.ClaudeAgent {
60
72
  new os_agent_tools_1.AndroidSequenceKeyPressTool(this.osAgentHandler),
61
73
  new os_agent_tools_1.TypeTool(this.osAgentHandler),
62
74
  new os_agent_tools_1.ExecuteShellCommandTool(this.osAgentHandler),
75
+ new os_agent_tools_1.WaitTool(),
63
76
  ];
64
77
  this.setTools(tools);
65
78
  this.setSystemPrompt(AskUIAgent.AndroidSystemPrompt);
@@ -70,22 +83,13 @@ exports.AskUIAgent = AskUIAgent;
70
83
  AskUIAgent.DesktopSystemPrompt = `
71
84
  <SYSTEM_CAPABILITY>
72
85
  You are an autonomous AI assistant operating on a ${process.platform} machine with ${process.arch} architecture. You have full access to the system and internet connectivity.
73
- Your main goal is to mimic a human user interacting with a desktop computer. So you should try to use the tools in a way that a human would use a mouse and keyboard to interact with a computer.
74
-
75
- Key Capabilities:
76
- * Full system control through mouse and keyboard interactions
77
- * Screen capture and analysis
78
- * Web browser automation and navigation
79
- * File system access and manipulation
80
- * PDF document handling and text extraction
81
- * Error handling and recovery mechanisms
82
-
83
- Available Tools:
84
- * Mouse control (move, click, scroll)
85
- * Keyboard input (single keys, key combinations, typing)
86
- * Screen capture and analysis
87
- * Error reporting and recovery
88
-
86
+ Your main goal is to mimic a human user interacting with a desktop computer.
87
+ Use a mouse and keyboard to interact with a computer, and take screenshots.
88
+ * This is an interface to a desktop GUI. You do not have access to a terminal or applications menu. You must click on desktop icons to start applications.
89
+ * Some applications may take time to start or process actions, so you may need to wait and take successive screenshots to see the results of your actions. E.g. if you click on Firefox and a window doesn't open, try taking another screenshot.
90
+ * Whenever you intend to move the cursor to click on an element like an icon, you should consult a screenshot to determine the coordinates of the element before moving the cursor.
91
+ * If you tried clicking on a program or link but it failed to load, even after waiting, try adjusting your cursor position so that the tip of the cursor visually falls on the element that you want to click.
92
+ * Make sure to click any buttons, links, icons, etc with the cursor tip in the center of the element. Don't click boxes on their edges unless asked.
89
93
  Current Date: ${new Date().toUTCString()} UTC
90
94
  </SYSTEM_CAPABILITY>
91
95
 
@@ -134,20 +138,11 @@ AskUIAgent.AndroidSystemPrompt = `
134
138
  <SYSTEM_CAPABILITY>
135
139
  You are an autonomous AI assistant operating on an Android device via ADB. The host machine is ${process.platform} with ${process.arch} architecture and internet connectivity.
136
140
  Your main goal is to mimic a human user interacting with an Android device. So you should try to use the tools in a way that a human would use a touch screen to interact with an Android device.
137
-
138
- Key Capabilities:
139
- * Full Android device control through ADB
140
- * Screen capture and analysis
141
- * Touch input simulation
142
- * Android-specific key events
143
- * Error handling and recovery mechanisms
144
-
145
- Available Tools:
146
- * Touch control (click, swipe, scroll)
147
- * Android key events (single and sequence)
148
- * Screen capture and analysis
149
- * Error reporting and recovery
150
-
141
+ Use a gestures and adb commands to interact with the android device, and take screenshots.
142
+ * Some applications may take time to start or process actions, so you may need to wait and take successive screenshots to see the results of your actions. E.g. if you click on Firefox and a window doesn't open, try taking another screenshot.
143
+ * Whenever you intend to move the cursor to click on an element like an icon, you should consult a screenshot to determine the coordinates of the element before moving the cursor.
144
+ * If you tried clicking on a program or link but it failed to load, even after waiting, try adjusting your cursor position so that the tip of the cursor visually falls on the element that you want to click.
145
+ * Make sure to click any buttons, links, icons, etc with the cursor tip in the center of the element. Don't click boxes on their edges unless asked.
151
146
  Current Date: ${new Date().toUTCString()} UTC
152
147
  </SYSTEM_CAPABILITY>
153
148
 
@@ -1,5 +1,6 @@
1
1
  import { Beta } from '@anthropic-ai/sdk/resources';
2
2
  import { BaseAgentTool } from './tools/base';
3
+ import { BetaMessageParam } from '@anthropic-ai/sdk/resources/beta/messages';
3
4
  type PredictActResponseFunction = (params: {
4
5
  max_tokens: number;
5
6
  messages: Beta.BetaMessageParam[];
@@ -7,7 +8,16 @@ type PredictActResponseFunction = (params: {
7
8
  system?: string;
8
9
  tools?: any[];
9
10
  betas?: string[];
11
+ tool_choice?: {
12
+ type: 'tool' | 'any' | 'auto';
13
+ name?: string;
14
+ };
10
15
  }) => Promise<Beta.BetaMessage>;
16
+ export type AgentHistory = BetaMessageParam[];
17
+ export interface ActOptions {
18
+ chatId?: string;
19
+ agentHistory?: Beta.BetaMessageParam[];
20
+ }
11
21
  export declare class ClaudeAgent {
12
22
  private predictActResponseFunction;
13
23
  private maxTokens;
@@ -19,7 +29,12 @@ export declare class ClaudeAgent {
19
29
  private _toolCollection;
20
30
  private tools;
21
31
  private history;
32
+ private toolChoice;
22
33
  constructor(predictActResponseFunction: PredictActResponseFunction);
34
+ setToolChoice(toolChoice: {
35
+ type: 'tool' | 'any' | 'auto';
36
+ name?: string;
37
+ }): void;
23
38
  setTools(tools: BaseAgentTool[]): void;
24
39
  addTool(tool: BaseAgentTool): void;
25
40
  listToolNames(): string[];
@@ -29,10 +44,7 @@ export declare class ClaudeAgent {
29
44
  private get toolCollection();
30
45
  private setHistory;
31
46
  private getHistory;
32
- act(goal: string, options?: {
33
- chatId?: string;
34
- agentHistory?: Beta.BetaMessageParam[];
35
- }): Promise<Beta.BetaMessageParam[]>;
47
+ act(goal: string, imagePathOrBase64String?: string, options?: ActOptions): Promise<Beta.BetaMessageParam[]>;
36
48
  private makeApiToolResult;
37
49
  private maybePrependSystemToolResult;
38
50
  private static filterNMostRecentImages;
@@ -12,6 +12,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
12
12
  exports.ClaudeAgent = void 0;
13
13
  const base_1 = require("./tools/base");
14
14
  const logger_1 = require("../../../lib/logger");
15
+ const base_64_image_1 = require("../../../utils/base_64_image/base-64-image");
15
16
  class ClaudeAgent {
16
17
  constructor(predictActResponseFunction) {
17
18
  this.predictActResponseFunction = predictActResponseFunction;
@@ -19,11 +20,17 @@ class ClaudeAgent {
19
20
  this.onlyNMostRecentImages = 3;
20
21
  this.imageTruncationThreshold = 10;
21
22
  this.systemPrompt = '';
22
- this.model = 'claude-3-5-sonnet-20241022';
23
- this.betas = ['computer-use-2024-10-22'];
23
+ this.model = 'claude-sonnet-4-20250514';
24
+ this.betas = ['computer-use-2025-01-24'];
24
25
  this._toolCollection = undefined;
25
26
  this.tools = [];
26
27
  this.history = {};
28
+ this.toolChoice = {
29
+ type: 'auto',
30
+ };
31
+ }
32
+ setToolChoice(toolChoice) {
33
+ this.toolChoice = toolChoice;
27
34
  }
28
35
  setTools(tools) {
29
36
  this._toolCollection = undefined;
@@ -40,7 +47,14 @@ class ClaudeAgent {
40
47
  this.tools = this.tools.filter((tool) => tool.ToolName !== toolName);
41
48
  }
42
49
  setSystemPrompt(systemPrompt) {
43
- this.systemPrompt = systemPrompt;
50
+ const enhancedPrompt = `${systemPrompt}
51
+ If you cannot complete a request due to safety concerns, please:
52
+ 1. Explain what specific aspect is problematic
53
+ 2. Suggest alternative approaches that would be acceptable
54
+ 3. Provide partial assistance where possible within guidelines.
55
+ Raise an exception After you have provided the above information. include the error message in the exception.
56
+ `;
57
+ this.systemPrompt = enhancedPrompt;
44
58
  }
45
59
  IsConfigured() {
46
60
  return this.tools.length > 0 && this.systemPrompt !== '';
@@ -57,7 +71,7 @@ class ClaudeAgent {
57
71
  getHistory(key) {
58
72
  return this.history[key] || [];
59
73
  }
60
- act(goal, options) {
74
+ act(goal, imagePathOrBase64String, options) {
61
75
  return __awaiter(this, void 0, void 0, function* () {
62
76
  if (!goal.trim()) {
63
77
  throw new Error('Goal cannot be empty');
@@ -73,8 +87,24 @@ class ClaudeAgent {
73
87
  messages.push(...this.getHistory(options.chatId));
74
88
  }
75
89
  // Add the new goal as a user message
90
+ const userContent = [{
91
+ type: 'text',
92
+ text: goal,
93
+ }];
94
+ if (imagePathOrBase64String !== undefined) {
95
+ const image = yield base_64_image_1.Base64Image.fromPathOrString(imagePathOrBase64String);
96
+ const imageString = image.toString(false);
97
+ userContent.push({
98
+ type: 'image',
99
+ source: {
100
+ type: 'base64',
101
+ media_type: 'image/png',
102
+ data: imageString,
103
+ },
104
+ });
105
+ }
76
106
  messages.push({
77
- content: goal,
107
+ content: userContent,
78
108
  role: 'user',
79
109
  });
80
110
  if (this.onlyNMostRecentImages) {
@@ -88,7 +118,15 @@ class ClaudeAgent {
88
118
  system: this.systemPrompt,
89
119
  tools: (new base_1.ToolCollection(this.tools).toParams()),
90
120
  betas: this.betas,
121
+ tool_choice: this.toolChoice,
91
122
  });
123
+ if (response.stop_reason === 'refusal') {
124
+ const refusalMessage = response.content
125
+ .filter(block => block.type === 'text')
126
+ .map(block => block.text)
127
+ .join(' ');
128
+ throw new Error(`Agent refused to answer: ${refusalMessage || 'The request violates agent\'s usage policies'}`);
129
+ }
92
130
  messages.push({
93
131
  content: response.content,
94
132
  role: 'assistant',
@@ -1,4 +1,3 @@
1
- import { BetaMessageParam } from '@anthropic-ai/sdk/resources/beta/messages';
2
- export type AgentHistory = BetaMessageParam[];
3
1
  export { AskUIAgent } from './askui-agent';
4
- export { ToolFailure, ToolError, BaseAgentTool } from './tools/base';
2
+ export { ToolFailure, ToolError, BaseAgentTool, BetaTool, ToolResult, } from './tools/base';
3
+ export { AgentHistory, ActOptions } from './claude-agent';
@@ -0,0 +1,19 @@
1
+ import { DetectedElement } from '../../../model/annotation-result/detected-element';
2
+ import { BaseAgentTool, ToolResult, BetaTool } from './base';
3
+ import { OsAgentHandler } from './os-agent-tools';
4
+ export declare class AskUIGetAskUIElementTool extends BaseAgentTool {
5
+ private osAgentHandler;
6
+ private locatorFunction;
7
+ private elementType;
8
+ constructor(osAgentHandler: OsAgentHandler, locatorFunction: (aiElementName: string) => Promise<DetectedElement[]>, elementType: string);
9
+ execute(params: {
10
+ elementName: string;
11
+ }): Promise<ToolResult>;
12
+ toParams(): BetaTool;
13
+ }
14
+ export declare class AskUIListAIElementTool extends BaseAgentTool {
15
+ private listFunction;
16
+ constructor(listFunction: () => Promise<string[]>);
17
+ execute(): Promise<ToolResult>;
18
+ toParams(): BetaTool;
19
+ }
@@ -0,0 +1,81 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.AskUIListAIElementTool = exports.AskUIGetAskUIElementTool = void 0;
13
+ const base_1 = require("./base");
14
+ class AskUIGetAskUIElementTool extends base_1.BaseAgentTool {
15
+ constructor(osAgentHandler, locatorFunction, elementType) {
16
+ super();
17
+ this.osAgentHandler = osAgentHandler;
18
+ this.locatorFunction = locatorFunction;
19
+ this.elementType = elementType;
20
+ }
21
+ execute(params) {
22
+ return __awaiter(this, void 0, void 0, function* () {
23
+ const { elementName } = params;
24
+ const detectedElements = yield this.locatorFunction(elementName);
25
+ const scaledElementsBoundingBoxes = detectedElements.map((element) => {
26
+ const xMid = (element.bndbox.xmin + element.bndbox.xmax) / 2;
27
+ const yMid = (element.bndbox.ymin + element.bndbox.ymax) / 2;
28
+ const [x, y] = this.osAgentHandler.scaleCoordinates('computer', xMid, yMid);
29
+ return {
30
+ x, y,
31
+ };
32
+ });
33
+ return {
34
+ output: `Found ${scaledElementsBoundingBoxes.length} elements of type ${this.elementType}. center coordinates: ${JSON.stringify(scaledElementsBoundingBoxes)}`,
35
+ };
36
+ });
37
+ }
38
+ toParams() {
39
+ return {
40
+ description: `Locates and retrieves the bounding box coordinates of AskUI ${this.elementType} elements on the screen. ` +
41
+ `This tool is essential for UI automation as it provides the exact pixel coordinates needed to interact with UI elements. ` +
42
+ `The coordinates returned can be used for clicking, hovering, or other mouse interactions. ` +
43
+ `Use this tool when you need to find and interact with specific ${this.elementType} UI elements by their semantic names.`,
44
+ input_schema: {
45
+ properties: {
46
+ elementName: {
47
+ type: 'string',
48
+ description: `The semantic name or identifier of the ${this.elementType} element to locate on the screen. `
49
+ },
50
+ },
51
+ required: ['elementName'],
52
+ type: 'object',
53
+ },
54
+ name: `get_askui_${this.elementType}_element_tool`,
55
+ };
56
+ }
57
+ }
58
+ exports.AskUIGetAskUIElementTool = AskUIGetAskUIElementTool;
59
+ class AskUIListAIElementTool extends base_1.BaseAgentTool {
60
+ constructor(listFunction) {
61
+ super();
62
+ this.listFunction = listFunction;
63
+ }
64
+ execute() {
65
+ return __awaiter(this, void 0, void 0, function* () {
66
+ const elementNames = yield this.listFunction();
67
+ return {
68
+ output: `Found ${elementNames.length} element names that can be used to retrieve bounding boxes. Names: ${JSON.stringify(elementNames)}`,
69
+ };
70
+ });
71
+ }
72
+ toParams() {
73
+ return {
74
+ description: 'Retrieves a comprehensive list of all valid AskUI AI element names that can be used for element location and interaction. ' +
75
+ 'The returned names can be used as input for the get_askui_aiElement_element_tool to locate specific ai elements. ',
76
+ input_schema: { type: 'object', properties: {}, required: [] },
77
+ name: 'list_ai_element_names_tool',
78
+ };
79
+ }
80
+ }
81
+ exports.AskUIListAIElementTool = AskUIListAIElementTool;
@@ -1,3 +1,5 @@
1
+ import { BetaTool as AnthropicBetaTool } from '@anthropic-ai/sdk/resources/beta/messages/messages';
2
+ export type BetaTool = AnthropicBetaTool;
1
3
  export interface ToolResult {
2
4
  output?: string;
3
5
  error?: string;
@@ -5,21 +5,39 @@ import { ExecutionRuntime } from '../../../../execution/execution-runtime';
5
5
  import { ControlCommand } from '../../../ui-control-commands';
6
6
  export declare class OsAgentHandler {
7
7
  private AgentOsClient;
8
- private TargetResolution;
8
+ private targetResolution;
9
9
  private screenDimensions;
10
+ private paddingInfo;
10
11
  constructor(AgentOsClient: ExecutionRuntime, screenDimensions: {
11
12
  width: number;
12
13
  height: number;
13
14
  });
15
+ private updatePaddingInfo;
14
16
  static createInstance(AgentOsClient: ExecutionRuntime): Promise<OsAgentHandler>;
15
17
  getTargetResolution(): {
16
18
  width: number;
17
19
  height: number;
18
20
  };
21
+ getScreenDimensions(): {
22
+ width: number;
23
+ height: number;
24
+ };
19
25
  setTargetResolution(width: number, height: number): void;
20
26
  takeScreenshot(): Promise<string>;
21
- private scaleCoordinates;
27
+ scaleCoordinates(source: 'api' | 'computer', x: number, y: number): [number, number];
22
28
  requestControl(controlCommand: ControlCommand): Promise<void>;
29
+ mouseMove(x: number, y: number): Promise<void>;
30
+ mouseClick(button: "left" | "right" | "middle", doubleClick: boolean): Promise<void>;
31
+ mouseScroll(dx: number, dy: number): Promise<void>;
32
+ mouseHoldLeftButtonDown(): Promise<void>;
33
+ mouseReleaseLeftButton(): Promise<void>;
34
+ desktopKeyPressAndRelease(key: PC_AND_MODIFIER_KEY, modifiers?: MODIFIER_KEY[]): Promise<void>;
35
+ desktopKeyHoldDown(key: PC_AND_MODIFIER_KEY, modifiers?: MODIFIER_KEY[]): Promise<void>;
36
+ desktopKeyRelease(key: PC_AND_MODIFIER_KEY, modifiers?: MODIFIER_KEY[]): Promise<void>;
37
+ typeText(text: string): Promise<void>;
38
+ androidKeyPress(key: ANDROID_KEY): Promise<void>;
39
+ androidKeySequencePress(keys: ANDROID_KEY[]): Promise<void>;
40
+ executeShellCommand(command: string): Promise<void>;
23
41
  }
24
42
  export declare class ScreenShotTool extends BaseAgentTool {
25
43
  private osAgentHandler;
@@ -54,21 +72,53 @@ export declare class MouseScrollTool extends BaseAgentTool {
54
72
  }): Promise<ToolResult>;
55
73
  toParams(): BetaTool;
56
74
  }
57
- export declare class DesktopKeyPressSequenceTool extends BaseAgentTool {
75
+ export declare class MouseDragAndDropTool extends BaseAgentTool {
76
+ private osAgentHandler;
77
+ constructor(osAgentHandler: OsAgentHandler);
78
+ execute(command: {
79
+ startX: number;
80
+ startY: number;
81
+ endX: number;
82
+ endY: number;
83
+ }): Promise<ToolResult>;
84
+ toParams(): BetaTool;
85
+ }
86
+ export declare class MouseHoldLeftButtonDownTool extends BaseAgentTool {
87
+ private osAgentHandler;
88
+ constructor(osAgentHandler: OsAgentHandler);
89
+ execute(): Promise<ToolResult>;
90
+ toParams(): BetaTool;
91
+ }
92
+ export declare class MouseReleaseLeftButtonTool extends BaseAgentTool {
93
+ private osAgentHandler;
94
+ constructor(osAgentHandler: OsAgentHandler);
95
+ execute(): Promise<ToolResult>;
96
+ toParams(): BetaTool;
97
+ }
98
+ export declare class DesktopPressAndReleaseKeysTool extends BaseAgentTool {
58
99
  private osAgentHandler;
59
100
  constructor(osAgentHandler: OsAgentHandler);
60
101
  execute(command: {
61
102
  key: PC_KEY;
62
- firstModifier?: MODIFIER_KEY;
63
- secondModifier?: MODIFIER_KEY;
103
+ modifiers?: MODIFIER_KEY[];
64
104
  }): Promise<ToolResult>;
65
105
  toParams(): BetaTool;
66
106
  }
67
- export declare class DesktopSingleKeyPressTool extends BaseAgentTool {
107
+ export declare class DesktopKeyHoldDownTool extends BaseAgentTool {
68
108
  private osAgentHandler;
69
109
  constructor(osAgentHandler: OsAgentHandler);
70
110
  execute(command: {
71
111
  key: PC_AND_MODIFIER_KEY;
112
+ modifiers?: MODIFIER_KEY[];
113
+ }): Promise<ToolResult>;
114
+ toParams(): BetaTool;
115
+ }
116
+ export declare class DesktopKeyReleaseTool extends BaseAgentTool {
117
+ private osAgentHandler;
118
+ constructor(osAgentHandler: OsAgentHandler);
119
+ execute(command: {
120
+ key: PC_AND_MODIFIER_KEY;
121
+ modifiers?: MODIFIER_KEY[];
72
122
  }): Promise<ToolResult>;
73
123
  toParams(): BetaTool;
74
124
  }
@@ -111,3 +161,17 @@ export declare class ExecuteShellCommandTool extends BaseAgentTool {
111
161
  }): Promise<ToolResult>;
112
162
  toParams(): BetaTool;
113
163
  }
164
+ export declare class WaitTool extends BaseAgentTool {
165
+ constructor();
166
+ execute(command: {
167
+ milliseconds: number;
168
+ }): Promise<ToolResult>;
169
+ toParams(): BetaTool;
170
+ }
171
+ export declare class PrintTool extends BaseAgentTool {
172
+ constructor();
173
+ execute(command: {
174
+ text: string;
175
+ }): Promise<ToolResult>;
176
+ toParams(): BetaTool;
177
+ }