hud-python 0.3.5__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +15 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +370 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +379 -0
  45. hud/clients/fastmcp.py +222 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -420
  87. hud/tools/computer/hud.py +376 -334
  88. hud/tools/computer/openai.py +295 -292
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.1.dist-info/METADATA +476 -0
  126. hud_python-0.4.1.dist-info/RECORD +132 -0
  127. hud_python-0.4.1.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.5.dist-info → hud_python-0.4.1.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.5.dist-info/METADATA +0 -284
  190. hud_python-0.3.5.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.5.dist-info → hud_python-0.4.1.dist-info}/WHEEL +0 -0
@@ -1,292 +1,295 @@
1
- # flake8: noqa: B008
2
- from __future__ import annotations
3
-
4
- import logging
5
- from typing import Any, Literal, cast
6
-
7
- from mcp import ErrorData, McpError
8
- from mcp.types import INTERNAL_ERROR, INVALID_PARAMS, ImageContent, TextContent
9
- from pydantic import Field
10
-
11
- from hud.tools.base import ToolResult, tool_result_to_content_blocks
12
-
13
- from .hud import HudComputerTool
14
-
15
- logger = logging.getLogger(__name__)
16
-
17
- # Map OpenAI key names to CLA standard keys
18
- OPENAI_TO_CLA_KEYS = {
19
- # Common variations
20
- "return": "enter",
21
- "escape": "escape",
22
- "arrowup": "up",
23
- "arrowdown": "down",
24
- "arrowleft": "left",
25
- "arrowright": "right",
26
- "backspace": "backspace",
27
- "delete": "delete",
28
- "tab": "tab",
29
- "space": "space",
30
- "control": "ctrl",
31
- "alt": "alt",
32
- "shift": "shift",
33
- "meta": "win",
34
- "cmd": "cmd",
35
- "command": "cmd",
36
- "super": "win",
37
- "pageup": "pageup",
38
- "pagedown": "pagedown",
39
- "home": "home",
40
- "end": "end",
41
- "insert": "insert",
42
- }
43
-
44
-
45
- class OpenAIComputerTool(HudComputerTool):
46
- """
47
- OpenAI Computer Use tool for interacting with the computer.
48
- """
49
-
50
- def __init__(
51
- self,
52
- width: int = 1024,
53
- height: int = 768,
54
- environment_width: int = 1920,
55
- environment_height: int = 1080,
56
- display_num: int | None = None,
57
- platform_type: Literal["auto", "xdo", "pyautogui"] = "auto",
58
- rescale_images: bool = False,
59
- **kwargs: Any,
60
- ) -> None:
61
- """
62
- Initialize with OpenAI's default dimensions.
63
-
64
- Args:
65
- width: Target width for rescaling (default: 1024 for OpenAI)
66
- height: Target height for rescaling (default: 768 for OpenAI)
67
- environment_width: Environment screen width (default: 1920)
68
- environment_height: Environment screen height (default: 1080)
69
- display_num: X display number
70
- platform_type: Which executor to use:
71
- - "auto": Automatically detect based on platform
72
- - "xdo": Use XDOExecutor (Linux/X11 only)
73
- - "pyautogui": Use PyAutoGUIExecutor (cross-platform)
74
- rescale_images: If True, rescale screenshots. If False, only rescale action coordinates
75
- **kwargs: Additional arguments passed to HudComputerTool (e.g., executor)
76
- """
77
- super().__init__(
78
- width=width,
79
- height=height,
80
- environment_width=environment_width,
81
- environment_height=environment_height,
82
- display_num=display_num,
83
- platform_type=platform_type,
84
- rescale_images=rescale_images,
85
- **kwargs,
86
- )
87
-
88
- def _map_openai_key_to_cla(self, key: str) -> str:
89
- """Map OpenAI key name to CLA standard key."""
90
- # OpenAI uses lowercase key names
91
- return OPENAI_TO_CLA_KEYS.get(key.lower(), key.lower())
92
-
93
- async def __call__(
94
- self,
95
- *,
96
- type: str = Field(..., description="The action type to perform"),
97
- # Coordinate parameters
98
- x: int | None = Field(None, description="X coordinate for click/move/scroll actions"),
99
- y: int | None = Field(None, description="Y coordinate for click/move/scroll actions"),
100
- # Button parameter
101
- button: str | None = Field(
102
- None, description="Mouse button for click actions (left, right, middle, wheel)"
103
- ),
104
- # Text parameter
105
- text: str | None = Field(None, description="Text to type or response text"),
106
- # Scroll parameters
107
- scroll_x: int | None = Field(None, description="Horizontal scroll amount"),
108
- scroll_y: int | None = Field(None, description="Vertical scroll amount"),
109
- # Wait parameter
110
- ms: int | None = Field(None, description="Time to wait in milliseconds"),
111
- # Key press parameter
112
- keys: list[str] | None = Field(None, description="Keys to press"),
113
- # Drag parameter
114
- path: list[dict[str, int]] | None = Field(
115
- None, description="Path for drag actions as list of {x, y} dicts"
116
- ),
117
- # Custom action parameter
118
- action: str | None = Field(None, description="Custom action name"),
119
- ) -> list[ImageContent | TextContent]:
120
- """
121
- Handle OpenAI Computer Use API calls.
122
-
123
- This converts OpenAI's action format (based on OperatorAdapter) to HudComputerTool's format.
124
-
125
- Returns:
126
- List of MCP content blocks
127
- """
128
- logger.info("OpenAIComputerTool received type: %s", type)
129
-
130
- # Map button names
131
- button_map = {"wheel": "middle"}
132
- if button:
133
- button = button_map.get(button, button)
134
-
135
- # Process based on action type
136
- if type == "screenshot":
137
- screenshot_base64 = await self.executor.screenshot()
138
- if screenshot_base64:
139
- # Rescale screenshot if requested
140
- screenshot_base64 = await self._rescale_screenshot(screenshot_base64)
141
- result = ToolResult(base64_image=screenshot_base64)
142
- else:
143
- result = ToolResult(error="Failed to take screenshot")
144
-
145
- elif type == "click":
146
- if x is not None and y is not None:
147
- # Cast button to proper literal type
148
- button_literal = cast(
149
- "Literal['left', 'right', 'middle', 'back', 'forward']", button or "left"
150
- )
151
- scaled_x, scaled_y = self._scale_coordinates(x, y)
152
- logger.info("Scaled coordinates: %s, %s", scaled_x, scaled_y)
153
- result = await self.executor.click(x=scaled_x, y=scaled_y, button=button_literal)
154
- else:
155
- raise McpError(
156
- ErrorData(code=INVALID_PARAMS, message="x and y coordinates required for click")
157
- )
158
-
159
- elif type == "double_click":
160
- if x is not None and y is not None:
161
- # Use pattern for double-click
162
- scaled_x, scaled_y = self._scale_coordinates(x, y)
163
- result = await self.executor.click(
164
- x=scaled_x, y=scaled_y, button="left", pattern=[100]
165
- )
166
- else:
167
- raise McpError(
168
- ErrorData(
169
- code=INVALID_PARAMS, message="x and y coordinates required for double_click"
170
- )
171
- )
172
-
173
- elif type == "scroll":
174
- if x is None or y is None:
175
- raise McpError(
176
- ErrorData(
177
- code=INVALID_PARAMS, message="x and y coordinates required for scroll"
178
- )
179
- )
180
-
181
- # scroll_x and scroll_y default to 0 if not provided
182
- scaled_x, scaled_y = self._scale_coordinates(x, y)
183
- result = await self.executor.scroll(
184
- x=scaled_x, y=scaled_y, scroll_x=scroll_x or 0, scroll_y=scroll_y or 0
185
- )
186
-
187
- elif type == "type":
188
- if text is None:
189
- raise McpError(ErrorData(code=INVALID_PARAMS, message="text is required for type"))
190
- result = await self.executor.type(text=text, enter_after=False)
191
-
192
- elif type == "wait":
193
- wait_time = ms or 1000 # Default to 1 second
194
- result = await self.executor.wait(time=wait_time)
195
-
196
- elif type == "move":
197
- if x is not None and y is not None:
198
- scaled_x, scaled_y = self._scale_coordinates(x, y)
199
- result = await self.executor.move(x=scaled_x, y=scaled_y)
200
- else:
201
- raise McpError(
202
- ErrorData(code=INVALID_PARAMS, message="x and y coordinates required for move")
203
- )
204
-
205
- elif type == "keypress":
206
- if keys is None or len(keys) == 0:
207
- raise McpError(
208
- ErrorData(code=INVALID_PARAMS, message="keys is required for keypress")
209
- )
210
-
211
- # Map OpenAI keys to CLA standard
212
- cla_keys = []
213
- for key in keys:
214
- cla_key = self._map_openai_key_to_cla(key)
215
- cla_keys.append(cla_key)
216
-
217
- result = await self.executor.press(keys=cla_keys)
218
-
219
- elif type == "drag":
220
- if path is None or len(path) < 2:
221
- raise McpError(
222
- ErrorData(
223
- code=INVALID_PARAMS, message="path with at least 2 points required for drag"
224
- )
225
- )
226
-
227
- # Convert path from list of dicts to list of tuples
228
- drag_path = []
229
- for point in path:
230
- if "x" in point and "y" in point:
231
- drag_path.append((point["x"], point["y"]))
232
- else:
233
- raise McpError(
234
- ErrorData(
235
- code=INVALID_PARAMS, message="Each point in path must have x and y"
236
- )
237
- )
238
-
239
- scaled_path = self._scale_path(drag_path)
240
- result = await self.executor.drag(path=scaled_path)
241
-
242
- elif type == "response":
243
- if text is None:
244
- raise McpError(
245
- ErrorData(code=INVALID_PARAMS, message="text is required for response")
246
- )
247
- # Response returns content blocks directly
248
- return [TextContent(text=text, type="text")]
249
-
250
- elif type == "custom":
251
- # For custom actions, we just return an error since HudComputerTool doesn't support them
252
- raise McpError(
253
- ErrorData(code=INVALID_PARAMS, message=f"Custom action not supported: {action}")
254
- )
255
-
256
- else:
257
- raise McpError(ErrorData(code=INTERNAL_ERROR, message=f"Invalid action type: {type}"))
258
-
259
- # Rescale screenshot in result if present
260
- if isinstance(result, ToolResult) and result.base64_image and self.rescale_images:
261
- rescaled_image = await self._rescale_screenshot(result.base64_image)
262
- result = result.replace(base64_image=rescaled_image)
263
-
264
- # Handle screenshot for actions that need it
265
- screenshot_actions = {
266
- "screenshot",
267
- "click",
268
- "double_click",
269
- "scroll",
270
- "type",
271
- "move",
272
- "keypress",
273
- "drag",
274
- "wait",
275
- }
276
-
277
- if (
278
- type in screenshot_actions
279
- and type != "screenshot"
280
- and isinstance(result, ToolResult)
281
- and not result.base64_image
282
- ):
283
- screenshot_base64 = await self.executor.screenshot()
284
- if screenshot_base64:
285
- # Rescale screenshot if requested
286
- screenshot_base64 = await self._rescale_screenshot(screenshot_base64)
287
- result = ToolResult(
288
- output=result.output, error=result.error, base64_image=screenshot_base64
289
- )
290
-
291
- # Convert to content blocks
292
- return tool_result_to_content_blocks(result)
1
+ # flake8: noqa: B008
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ from typing import TYPE_CHECKING, Any, Literal, cast
6
+
7
+ from mcp import ErrorData, McpError
8
+ from mcp.types import INTERNAL_ERROR, INVALID_PARAMS, ContentBlock, TextContent
9
+ from pydantic import Field
10
+
11
+ from hud.tools.computer.settings import computer_settings
12
+ from hud.tools.types import ContentResult
13
+
14
+ from .hud import HudComputerTool
15
+
16
+ if TYPE_CHECKING:
17
+ from hud.tools.executors.base import BaseExecutor
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Map OpenAI key names to CLA standard keys
22
+ OPENAI_TO_CLA_KEYS = {
23
+ # Common variations
24
+ "return": "enter",
25
+ "escape": "escape",
26
+ "arrowup": "up",
27
+ "arrowdown": "down",
28
+ "arrowleft": "left",
29
+ "arrowright": "right",
30
+ "backspace": "backspace",
31
+ "delete": "delete",
32
+ "tab": "tab",
33
+ "space": "space",
34
+ "control": "ctrl",
35
+ "alt": "alt",
36
+ "shift": "shift",
37
+ "meta": "win",
38
+ "cmd": "cmd",
39
+ "command": "cmd",
40
+ "super": "win",
41
+ "pageup": "pageup",
42
+ "pagedown": "pagedown",
43
+ "home": "home",
44
+ "end": "end",
45
+ "insert": "insert",
46
+ }
47
+
48
+
49
+ class OpenAIComputerTool(HudComputerTool):
50
+ """
51
+ OpenAI Computer Use tool for interacting with the computer.
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ # Define within environment based on platform
57
+ executor: BaseExecutor | None = None,
58
+ platform_type: Literal["auto", "xdo", "pyautogui"] = "auto",
59
+ display_num: int | None = None,
60
+ # Overrides for what dimensions the agent thinks it operates in
61
+ width: int = computer_settings.OPENAI_COMPUTER_WIDTH,
62
+ height: int = computer_settings.OPENAI_COMPUTER_HEIGHT,
63
+ rescale_images: bool = computer_settings.OPENAI_RESCALE_IMAGES,
64
+ name: str | None = None,
65
+ title: str | None = None,
66
+ description: str | None = None,
67
+ **kwargs: Any,
68
+ ) -> None:
69
+ """
70
+ Initialize with OpenAI's default dimensions.
71
+
72
+ Args:
73
+ width: Target width for rescaling (default: 1024 for OpenAI)
74
+ height: Target height for rescaling (default: 768 for OpenAI)
75
+ rescale_images: If True, rescale screenshots. If False, only rescale action coordinates
76
+ name: Tool name for MCP registration (auto-generated from class name if not provided)
77
+ title: Human-readable display name for the tool (auto-generated from class name)
78
+ description: Tool description (auto-generated from docstring if not provided)
79
+ """
80
+ super().__init__(
81
+ executor=executor,
82
+ platform_type=platform_type,
83
+ display_num=display_num,
84
+ width=width,
85
+ height=height,
86
+ rescale_images=rescale_images,
87
+ name=name or "openai_computer",
88
+ title=title or "OpenAI Computer Tool",
89
+ description=description or "Control computer with mouse, keyboard, and screenshots",
90
+ **kwargs,
91
+ )
92
+
93
+ def _map_openai_key_to_cla(self, key: str) -> str:
94
+ """Map OpenAI key name to CLA standard key."""
95
+ # OpenAI uses lowercase key names
96
+ return OPENAI_TO_CLA_KEYS.get(key.lower(), key.lower())
97
+
98
+ async def __call__(
99
+ self,
100
+ type: str = Field(..., description="The action type to perform"),
101
+ # Coordinate parameters
102
+ x: int | None = Field(None, description="X coordinate for click/move/scroll actions"),
103
+ y: int | None = Field(None, description="Y coordinate for click/move/scroll actions"),
104
+ # Button parameter
105
+ button: str | None = Field(
106
+ None, description="Mouse button for click actions (left, right, middle, wheel)"
107
+ ),
108
+ # Text parameter
109
+ text: str | None = Field(None, description="Text to type or response text"),
110
+ # Scroll parameters
111
+ scroll_x: int | None = Field(None, description="Horizontal scroll amount"),
112
+ scroll_y: int | None = Field(None, description="Vertical scroll amount"),
113
+ # Wait parameter
114
+ ms: int | None = Field(None, description="Time to wait in milliseconds"),
115
+ # Key press parameter
116
+ keys: list[str] | None = Field(None, description="Keys to press"),
117
+ # Drag parameter
118
+ path: list[dict[str, int]] | None = Field(
119
+ None, description="Path for drag actions as list of {x, y} dicts"
120
+ ),
121
+ # Custom action parameter
122
+ action: str | None = Field(None, description="Custom action name"),
123
+ ) -> list[ContentBlock]:
124
+ """
125
+ Handle OpenAI Computer Use API calls.
126
+
127
+ This converts OpenAI's action format (based on OperatorAdapter) to HudComputerTool's format.
128
+
129
+ Returns:
130
+ List of MCP content blocks
131
+ """
132
+ logger.info("OpenAIComputerTool received type: %s", type)
133
+
134
+ # Map button names
135
+ button_map = {"wheel": "middle"}
136
+ if button:
137
+ button = button_map.get(button, button)
138
+
139
+ # Process based on action type
140
+ if type == "screenshot":
141
+ screenshot_base64 = await self.executor.screenshot()
142
+ if screenshot_base64:
143
+ # Rescale screenshot if requested
144
+ result = ContentResult(base64_image=screenshot_base64)
145
+ else:
146
+ result = ContentResult(error="Failed to take screenshot")
147
+
148
+ elif type == "click":
149
+ if x is not None and y is not None:
150
+ # Cast button to proper literal type
151
+ button_literal = cast(
152
+ "Literal['left', 'right', 'middle', 'back', 'forward']", button or "left"
153
+ )
154
+ scaled_x, scaled_y = self._scale_coordinates(x, y)
155
+ logger.info("Scaled coordinates: %s, %s", scaled_x, scaled_y)
156
+ result = await self.executor.click(x=scaled_x, y=scaled_y, button=button_literal)
157
+ else:
158
+ raise McpError(
159
+ ErrorData(code=INVALID_PARAMS, message="x and y coordinates required for click")
160
+ )
161
+
162
+ elif type == "double_click":
163
+ if x is not None and y is not None:
164
+ # Use pattern for double-click
165
+ scaled_x, scaled_y = self._scale_coordinates(x, y)
166
+ result = await self.executor.click(
167
+ x=scaled_x, y=scaled_y, button="left", pattern=[100]
168
+ )
169
+ else:
170
+ raise McpError(
171
+ ErrorData(
172
+ code=INVALID_PARAMS, message="x and y coordinates required for double_click"
173
+ )
174
+ )
175
+
176
+ elif type == "scroll":
177
+ if x is None or y is None:
178
+ raise McpError(
179
+ ErrorData(
180
+ code=INVALID_PARAMS, message="x and y coordinates required for scroll"
181
+ )
182
+ )
183
+
184
+ # scroll_x and scroll_y default to 0 if not provided
185
+ scaled_x, scaled_y = self._scale_coordinates(x, y)
186
+ result = await self.executor.scroll(
187
+ x=scaled_x, y=scaled_y, scroll_x=scroll_x or 0, scroll_y=scroll_y or 0
188
+ )
189
+
190
+ elif type == "type":
191
+ if text is None:
192
+ raise McpError(ErrorData(code=INVALID_PARAMS, message="text is required for type"))
193
+ result = await self.executor.write(text=text, enter_after=False)
194
+
195
+ elif type == "wait":
196
+ wait_time = ms or 1000 # Default to 1 second
197
+ result = await self.executor.wait(time=wait_time)
198
+
199
+ elif type == "move":
200
+ if x is not None and y is not None:
201
+ scaled_x, scaled_y = self._scale_coordinates(x, y)
202
+ result = await self.executor.move(x=scaled_x, y=scaled_y)
203
+ else:
204
+ raise McpError(
205
+ ErrorData(code=INVALID_PARAMS, message="x and y coordinates required for move")
206
+ )
207
+
208
+ elif type == "keypress":
209
+ if keys is None or len(keys) == 0:
210
+ raise McpError(
211
+ ErrorData(code=INVALID_PARAMS, message="keys is required for keypress")
212
+ )
213
+
214
+ # Map OpenAI keys to CLA standard
215
+ cla_keys = []
216
+ for key in keys:
217
+ cla_key = self._map_openai_key_to_cla(key)
218
+ cla_keys.append(cla_key)
219
+
220
+ result = await self.executor.press(keys=cla_keys)
221
+
222
+ elif type == "drag":
223
+ if path is None or len(path) < 2:
224
+ raise McpError(
225
+ ErrorData(
226
+ code=INVALID_PARAMS, message="path with at least 2 points required for drag"
227
+ )
228
+ )
229
+
230
+ # Convert path from list of dicts to list of tuples
231
+ drag_path = []
232
+ for point in path:
233
+ if "x" in point and "y" in point:
234
+ drag_path.append((point["x"], point["y"]))
235
+ else:
236
+ raise McpError(
237
+ ErrorData(
238
+ code=INVALID_PARAMS, message="Each point in path must have x and y"
239
+ )
240
+ )
241
+
242
+ scaled_path = self._scale_path(drag_path)
243
+ result = await self.executor.drag(path=scaled_path)
244
+
245
+ elif type == "response":
246
+ if text is None:
247
+ raise McpError(
248
+ ErrorData(code=INVALID_PARAMS, message="text is required for response")
249
+ )
250
+ # Response returns content blocks directly
251
+ return [TextContent(text=text, type="text")]
252
+
253
+ elif type == "custom":
254
+ # For custom actions, we just return an error since HudComputerTool doesn't support them
255
+ raise McpError(
256
+ ErrorData(code=INVALID_PARAMS, message=f"Custom action not supported: {action}")
257
+ )
258
+
259
+ else:
260
+ raise McpError(ErrorData(code=INTERNAL_ERROR, message=f"Invalid action type: {type}"))
261
+
262
+ # Rescale screenshot in result if present
263
+ if isinstance(result, ContentResult) and result.base64_image and self.rescale_images:
264
+ rescaled_image = await self._rescale_screenshot(result.base64_image)
265
+ result.base64_image = rescaled_image
266
+
267
+ # Handle screenshot for actions that need it
268
+ screenshot_actions = {
269
+ "screenshot",
270
+ "click",
271
+ "double_click",
272
+ "scroll",
273
+ "type",
274
+ "move",
275
+ "keypress",
276
+ "drag",
277
+ "wait",
278
+ }
279
+
280
+ if (
281
+ type in screenshot_actions
282
+ and type != "screenshot"
283
+ and isinstance(result, ContentResult)
284
+ and not result.base64_image
285
+ ):
286
+ screenshot_base64 = await self.executor.screenshot()
287
+ if screenshot_base64:
288
+ # Rescale screenshot if requested
289
+ screenshot_base64 = await self._rescale_screenshot(screenshot_base64)
290
+ result = ContentResult(
291
+ output=result.output, error=result.error, base64_image=screenshot_base64
292
+ )
293
+
294
+ # Convert to content blocks
295
+ return result.to_content_blocks()