hud-python 0.3.4__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +17 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +379 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +354 -0
  45. hud/clients/fastmcp.py +202 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -414
  87. hud/tools/computer/hud.py +376 -328
  88. hud/tools/computer/openai.py +295 -286
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.0.dist-info/METADATA +474 -0
  126. hud_python-0.4.0.dist-info/RECORD +132 -0
  127. hud_python-0.4.0.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.4.dist-info/METADATA +0 -284
  190. hud_python-0.3.4.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.4.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
@@ -1,414 +1,437 @@
1
- # flake8: noqa: B008
2
- from __future__ import annotations
3
-
4
- import logging
5
- from typing import TYPE_CHECKING, Any, Literal, cast
6
-
7
- from mcp import ErrorData, McpError
8
- from mcp.types import INTERNAL_ERROR, INVALID_PARAMS, ImageContent, TextContent
9
- from pydantic import Field
10
-
11
- from hud.tools.base import ToolResult, tool_result_to_content_blocks
12
-
13
- from .hud import HudComputerTool
14
-
15
- if TYPE_CHECKING:
16
- from anthropic.types.beta import BetaToolComputerUse20250124Param
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
- # Map Anthropic key names to CLA standard keys
21
- ANTHROPIC_TO_CLA_KEYS = {
22
- # Common variations
23
- "Return": "enter",
24
- "Escape": "escape",
25
- "ArrowUp": "up",
26
- "ArrowDown": "down",
27
- "ArrowLeft": "left",
28
- "ArrowRight": "right",
29
- "Backspace": "backspace",
30
- "Delete": "delete",
31
- "Tab": "tab",
32
- "Space": "space",
33
- "Control": "ctrl",
34
- "Alt": "alt",
35
- "Shift": "shift",
36
- "Meta": "win", # Windows key
37
- "Command": "cmd", # macOS
38
- "Super": "win", # Linux
39
- "PageUp": "pageup",
40
- "PageDown": "pagedown",
41
- "Home": "home",
42
- "End": "end",
43
- "Insert": "insert",
44
- "F1": "f1",
45
- "F2": "f2",
46
- "F3": "f3",
47
- "F4": "f4",
48
- "F5": "f5",
49
- "F6": "f6",
50
- "F7": "f7",
51
- "F8": "f8",
52
- "F9": "f9",
53
- "F10": "f10",
54
- "F11": "f11",
55
- "F12": "f12",
56
- }
57
-
58
-
59
- class AnthropicComputerTool(HudComputerTool):
60
- """
61
- Anthropic Computer Use tool for interacting with the computer.
62
- """
63
-
64
- name: str = "computer"
65
- api_type: str = "computer_20250124"
66
-
67
- def __init__(
68
- self,
69
- width: int = 1400,
70
- height: int = 850,
71
- display_num: int | None = None,
72
- platform_type: Literal["auto", "xdo", "pyautogui"] = "auto",
73
- rescale_images: bool = False,
74
- **kwargs: Any,
75
- ) -> None:
76
- """
77
- Initialize with Anthropic's default dimensions.
78
-
79
- Args:
80
- width: Target width for rescaling (default: 1400 for Anthropic)
81
- height: Target height for rescaling (default: 850 for Anthropic)
82
- display_num: X display number
83
- platform_type: Which executor to use:
84
- - "auto": Automatically detect based on platform
85
- - "xdo": Use XDOExecutor (Linux/X11 only)
86
- - "pyautogui": Use PyAutoGUIExecutor (cross-platform)
87
- rescale_images: If True, rescale screenshots. If False, only rescale action coordinates
88
- **kwargs: Additional arguments passed to HudComputerTool (e.g., executor)
89
- """
90
- super().__init__(
91
- width=width,
92
- height=height,
93
- display_num=display_num,
94
- platform_type=platform_type,
95
- rescale_images=rescale_images,
96
- **kwargs,
97
- )
98
-
99
- def to_params(self) -> BetaToolComputerUse20250124Param:
100
- """Convert to Anthropic tool parameters."""
101
- return cast(
102
- "BetaToolComputerUse20250124Param",
103
- {
104
- "type": self.api_type,
105
- "name": self.name,
106
- "display_width_px": self.width,
107
- "display_height_px": self.height,
108
- },
109
- )
110
-
111
- def _map_anthropic_key_to_cla(self, key: str) -> str:
112
- """Map Anthropic key name to CLA standard key."""
113
- # Handle key combinations like "ctrl+a"
114
- if "+" in key:
115
- parts = key.split("+")
116
- mapped_parts = []
117
- for part in parts:
118
- # Try exact match first, then case-insensitive
119
- mapped = ANTHROPIC_TO_CLA_KEYS.get(
120
- part, ANTHROPIC_TO_CLA_KEYS.get(part.capitalize(), part.lower())
121
- )
122
- mapped_parts.append(mapped)
123
- return "+".join(mapped_parts)
124
- else:
125
- # Single key - try exact match first, then case-insensitive
126
- return ANTHROPIC_TO_CLA_KEYS.get(
127
- key, ANTHROPIC_TO_CLA_KEYS.get(key.capitalize(), key.lower())
128
- )
129
-
130
- async def __call__(
131
- self,
132
- action: str = Field(..., description="The action to perform on the computer"),
133
- coordinate: list[int] | tuple[int, int] | None = Field(
134
- None, description="The coordinate to interact with on the computer [x, y]"
135
- ),
136
- text: str | None = Field(
137
- None, description="The text to type on the computer or key to press"
138
- ),
139
- start_coordinate: list[int] | tuple[int, int] | None = Field(
140
- None, description="The starting coordinate for drag actions [x, y]"
141
- ),
142
- scroll_direction: str | None = Field(
143
- None, description="The direction to scroll (up, down, left, right)"
144
- ),
145
- scroll_amount: int | None = Field(None, description="The amount to scroll"),
146
- duration: float | None = Field(None, description="The duration of the action in seconds"),
147
- take_screenshot_on_click: bool = Field(
148
- True, description="Whether to take a screenshot after clicking"
149
- ),
150
- ) -> list[ImageContent | TextContent]:
151
- """
152
- Handle Anthropic Computer Use API calls.
153
-
154
- This converts Anthropic's action format to HudComputerTool's format.
155
-
156
- Returns:
157
- List of MCP content blocks
158
- """
159
- logger.info("AnthropicComputerTool received action: %s", action)
160
-
161
- # Convert lists to tuples if needed
162
- coord_tuple = None
163
- if coordinate:
164
- coord_tuple = tuple(coordinate) if isinstance(coordinate, list) else coordinate
165
-
166
- start_coord_tuple = None
167
- if start_coordinate:
168
- start_coord_tuple = (
169
- tuple(start_coordinate) if isinstance(start_coordinate, list) else start_coordinate
170
- )
171
-
172
- # Map Anthropic actions to HudComputerTool actions
173
- if action == "screenshot":
174
- screenshot_base64 = await self.executor.screenshot()
175
- if screenshot_base64:
176
- # Rescale screenshot if requested
177
- screenshot_base64 = await self._rescale_screenshot(screenshot_base64)
178
- result = ToolResult(base64_image=screenshot_base64)
179
- else:
180
- result = ToolResult(error="Failed to take screenshot")
181
-
182
- elif action == "left_click" or action == "click":
183
- if coord_tuple and len(coord_tuple) >= 2:
184
- scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
185
- logger.info("Scaled coordinates: %s, %s", scaled_x, scaled_y)
186
- result = await self.executor.click(x=scaled_x, y=scaled_y)
187
- else:
188
- result = await self.executor.click()
189
-
190
- elif action == "double_click":
191
- if coord_tuple and len(coord_tuple) >= 2:
192
- # Use pattern for double-click
193
- scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
194
- result = await self.executor.click(x=scaled_x, y=scaled_y, pattern=[100])
195
- else:
196
- result = await self.executor.click(pattern=[100])
197
-
198
- elif action == "triple_click":
199
- if coord_tuple and len(coord_tuple) >= 2:
200
- # Use pattern for triple-click
201
- scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
202
- result = await self.executor.click(x=scaled_x, y=scaled_y, pattern=[100, 100])
203
- else:
204
- result = await self.executor.click(pattern=[100, 100])
205
-
206
- elif action == "right_click":
207
- if coord_tuple and len(coord_tuple) >= 2:
208
- scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
209
- result = await self.executor.click(x=scaled_x, y=scaled_y, button="right")
210
- else:
211
- result = await self.executor.click(button="right")
212
-
213
- elif action == "middle_click":
214
- if coord_tuple and len(coord_tuple) >= 2:
215
- scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
216
- result = await self.executor.click(x=scaled_x, y=scaled_y, button="middle")
217
- else:
218
- result = await self.executor.click(button="middle")
219
-
220
- elif action == "mouse_move" or action == "move":
221
- if coord_tuple and len(coord_tuple) >= 2:
222
- scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
223
- result = await self.executor.move(x=scaled_x, y=scaled_y)
224
- else:
225
- raise McpError(
226
- ErrorData(code=INVALID_PARAMS, message="coordinate is required for mouse_move")
227
- )
228
-
229
- elif action == "type":
230
- if text:
231
- result = await self.executor.type(text=text)
232
- else:
233
- raise McpError(ErrorData(code=INVALID_PARAMS, message="text is required for type"))
234
-
235
- elif action == "key":
236
- if text:
237
- # Anthropic sends single key or combo like "ctrl+a"
238
- # Map to CLA standard key format
239
- mapped_key = self._map_anthropic_key_to_cla(text)
240
- result = await self.executor.press(keys=[mapped_key])
241
- else:
242
- raise McpError(ErrorData(code=INVALID_PARAMS, message="text is required for key"))
243
-
244
- elif action == "scroll":
245
- # Original implementation validates scroll_direction and scroll_amount
246
- if scroll_direction not in ["up", "down", "left", "right"]:
247
- raise McpError(
248
- ErrorData(
249
- code=INVALID_PARAMS,
250
- message="scroll_direction must be 'up', 'down', 'left', or 'right'",
251
- )
252
- )
253
-
254
- if scroll_amount is None or scroll_amount < 0:
255
- raise McpError(
256
- ErrorData(
257
- code=INVALID_PARAMS, message="scroll_amount must be a non-negative int"
258
- )
259
- )
260
-
261
- # Convert direction to scroll amounts
262
- scroll_x = None
263
- scroll_y = None
264
- if scroll_direction == "down":
265
- scroll_y = scroll_amount
266
- elif scroll_direction == "up":
267
- scroll_y = -scroll_amount
268
- elif scroll_direction == "right":
269
- scroll_x = scroll_amount
270
- elif scroll_direction == "left":
271
- scroll_x = -scroll_amount
272
-
273
- if coord_tuple and len(coord_tuple) >= 2:
274
- scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
275
- result = await self.executor.scroll(
276
- x=scaled_x, y=scaled_y, scroll_x=scroll_x, scroll_y=scroll_y
277
- )
278
- else:
279
- result = await self.executor.scroll(scroll_x=scroll_x, scroll_y=scroll_y)
280
-
281
- elif action == "left_click_drag" or action == "drag":
282
- # Anthropic sends drag with start and end coordinates
283
- if coord_tuple and len(coord_tuple) >= 2:
284
- if start_coord_tuple and len(start_coord_tuple) >= 2:
285
- # Full drag path
286
- path = [
287
- (start_coord_tuple[0], start_coord_tuple[1]),
288
- (coord_tuple[0], coord_tuple[1]),
289
- ]
290
- scaled_path = self._scale_path(path)
291
- result = await self.executor.drag(path=scaled_path)
292
- else:
293
- # Just end coordinate, drag from current position
294
- # Original spec allows this
295
- current_pos = [(0, 0), (coord_tuple[0], coord_tuple[1])] # Simplified
296
- scaled_path = self._scale_path(current_pos)
297
- result = await self.executor.drag(path=scaled_path)
298
- else:
299
- raise McpError(
300
- ErrorData(
301
- code=INVALID_PARAMS, message="coordinate is required for left_click_drag"
302
- )
303
- )
304
-
305
- elif action == "wait":
306
- # Original spec expects duration in seconds
307
- if duration is None:
308
- raise McpError(
309
- ErrorData(code=INVALID_PARAMS, message="duration is required for wait")
310
- )
311
- if duration < 0:
312
- raise McpError(
313
- ErrorData(code=INVALID_PARAMS, message="duration must be non-negative")
314
- )
315
- if duration > 100:
316
- raise McpError(ErrorData(code=INVALID_PARAMS, message="duration is too long"))
317
-
318
- # Convert seconds to milliseconds for HudComputerTool
319
- result = await self.executor.wait(time=int(duration * 1000))
320
-
321
- elif action == "hold_key":
322
- # Original spec has hold_key action
323
- if text is None:
324
- raise McpError(
325
- ErrorData(code=INVALID_PARAMS, message="text is required for hold_key")
326
- )
327
- if duration is None:
328
- raise McpError(
329
- ErrorData(code=INVALID_PARAMS, message="duration is required for hold_key")
330
- )
331
- if duration < 0:
332
- raise McpError(
333
- ErrorData(code=INVALID_PARAMS, message="duration must be non-negative")
334
- )
335
- if duration > 100:
336
- raise McpError(ErrorData(code=INVALID_PARAMS, message="duration is too long"))
337
-
338
- # Hold key action
339
- result = await self.executor.hold_key(key=text, duration=duration)
340
-
341
- elif action == "left_mouse_down":
342
- # These don't accept coordinates in original spec
343
- if coord_tuple is not None:
344
- raise McpError(
345
- ErrorData(
346
- code=INVALID_PARAMS,
347
- message="coordinate is not accepted for left_mouse_down",
348
- )
349
- )
350
- # Use generic mouse_down method
351
- result = await self.executor.mouse_down(button="left")
352
-
353
- elif action == "left_mouse_up":
354
- # These don't accept coordinates in original spec
355
- if coord_tuple is not None:
356
- raise McpError(
357
- ErrorData(
358
- code=INVALID_PARAMS, message="coordinate is not accepted for left_mouse_up"
359
- )
360
- )
361
- # Use generic mouse_up method
362
- result = await self.executor.mouse_up(button="left")
363
-
364
- elif action == "cursor_position":
365
- result = await self.executor.position()
366
-
367
- else:
368
- # Unknown action
369
- raise McpError(ErrorData(code=INTERNAL_ERROR, message=f"Invalid action: {action}"))
370
-
371
- # Rescale screenshot in result if present
372
- if isinstance(result, ToolResult) and result.base64_image and self.rescale_images:
373
- rescaled_image = await self._rescale_screenshot(result.base64_image)
374
- result = result.replace(base64_image=rescaled_image)
375
-
376
- # Handle screenshot for actions that need it
377
- screenshot_actions = {
378
- "screenshot",
379
- "left_click",
380
- "click",
381
- "double_click",
382
- "triple_click",
383
- "right_click",
384
- "middle_click",
385
- "mouse_move",
386
- "move",
387
- "type",
388
- "key",
389
- "scroll",
390
- "left_click_drag",
391
- "drag",
392
- "wait",
393
- "hold_key",
394
- "left_mouse_down",
395
- "left_mouse_up",
396
- }
397
-
398
- if (
399
- action in screenshot_actions
400
- and action != "screenshot"
401
- and take_screenshot_on_click
402
- and isinstance(result, ToolResult)
403
- and not result.base64_image
404
- ):
405
- screenshot_base64 = await self.executor.screenshot()
406
- if screenshot_base64:
407
- # Rescale screenshot if requested
408
- screenshot_base64 = await self._rescale_screenshot(screenshot_base64)
409
- result = ToolResult(
410
- output=result.output, error=result.error, base64_image=screenshot_base64
411
- )
412
-
413
- # Convert to content blocks
414
- return tool_result_to_content_blocks(result)
1
+ # flake8: noqa: B008
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ from typing import TYPE_CHECKING, Any, Literal, cast
6
+
7
+ from mcp import ErrorData, McpError
8
+ from mcp.types import INTERNAL_ERROR, INVALID_PARAMS, ContentBlock
9
+ from pydantic import Field
10
+
11
+ from hud.tools.types import ContentResult
12
+
13
+ from .hud import HudComputerTool
14
+ from .settings import computer_settings
15
+
16
+ if TYPE_CHECKING:
17
+ from anthropic.types.beta import BetaToolComputerUse20250124Param
18
+
19
+ from hud.tools.executors.base import BaseExecutor
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Map Anthropic key names to CLA standard keys
24
+ ANTHROPIC_TO_CLA_KEYS = {
25
+ # Common variations
26
+ "Return": "enter",
27
+ "Escape": "escape",
28
+ "ArrowUp": "up",
29
+ "ArrowDown": "down",
30
+ "ArrowLeft": "left",
31
+ "ArrowRight": "right",
32
+ "Backspace": "backspace",
33
+ "Delete": "delete",
34
+ "Tab": "tab",
35
+ "Space": "space",
36
+ "Control": "ctrl",
37
+ "Alt": "alt",
38
+ "Shift": "shift",
39
+ "Meta": "win", # Windows key
40
+ "Command": "cmd", # macOS
41
+ "Super": "win", # Linux
42
+ "PageUp": "pageup",
43
+ "PageDown": "pagedown",
44
+ "Home": "home",
45
+ "End": "end",
46
+ "Insert": "insert",
47
+ "F1": "f1",
48
+ "F2": "f2",
49
+ "F3": "f3",
50
+ "F4": "f4",
51
+ "F5": "f5",
52
+ "F6": "f6",
53
+ "F7": "f7",
54
+ "F8": "f8",
55
+ "F9": "f9",
56
+ "F10": "f10",
57
+ "F11": "f11",
58
+ "F12": "f12",
59
+ }
60
+
61
+
62
+ class AnthropicComputerTool(HudComputerTool):
63
+ """
64
+ Anthropic Computer Use tool for interacting with the computer.
65
+ """
66
+
67
+ name: str = "computer"
68
+ api_type: str = "computer_20250124"
69
+
70
+ def __init__(
71
+ self,
72
+ # Define within environment based on platform
73
+ executor: BaseExecutor | None = None,
74
+ platform_type: Literal["auto", "xdo", "pyautogui"] = "auto",
75
+ display_num: int | None = None,
76
+ # Overrides for what dimensions the agent thinks it operates in
77
+ width: int = computer_settings.ANTHROPIC_COMPUTER_WIDTH,
78
+ height: int = computer_settings.ANTHROPIC_COMPUTER_HEIGHT,
79
+ rescale_images: bool = computer_settings.ANTHROPIC_RESCALE_IMAGES,
80
+ # What the agent sees as the tool's name, title, and description
81
+ name: str | None = None,
82
+ title: str | None = None,
83
+ description: str | None = None,
84
+ **kwargs: Any,
85
+ ) -> None:
86
+ """
87
+ Initialize with Anthropic's default dimensions.
88
+
89
+ Args:
90
+ width: Target width for rescaling (None = use environment width)
91
+ height: Target height for rescaling (None = use environment height)
92
+ rescale_images: If True, rescale screenshots. If False, only rescale action coordinates
93
+ name: Tool name for MCP registration (auto-generated from class name if not provided)
94
+ title: Human-readable display name for the tool (auto-generated from class name)
95
+ description: Tool description (auto-generated from docstring if not provided)
96
+ """
97
+ super().__init__(
98
+ executor=executor,
99
+ platform_type=platform_type,
100
+ display_num=display_num,
101
+ width=width,
102
+ height=height,
103
+ rescale_images=rescale_images,
104
+ name=name or "anthropic_computer",
105
+ title=title or "Anthropic Computer Tool",
106
+ description=description or "Control computer with mouse, keyboard, and screenshot",
107
+ **kwargs,
108
+ )
109
+
110
+ def to_params(self) -> BetaToolComputerUse20250124Param:
111
+ """Convert to Anthropic tool parameters."""
112
+ return cast(
113
+ "BetaToolComputerUse20250124Param",
114
+ {
115
+ "type": self.api_type,
116
+ "name": self.name,
117
+ "display_width_px": self.width,
118
+ "display_height_px": self.height,
119
+ },
120
+ )
121
+
122
+ def _map_anthropic_key_to_cla(self, key: str) -> str:
123
+ """Map Anthropic key name to CLA standard key."""
124
+ # Handle key combinations like "ctrl+a"
125
+ if "+" in key:
126
+ parts = key.split("+")
127
+ mapped_parts = []
128
+ for part in parts:
129
+ # Try exact match first, then case-insensitive
130
+ mapped = ANTHROPIC_TO_CLA_KEYS.get(
131
+ part, ANTHROPIC_TO_CLA_KEYS.get(part.capitalize(), part.lower())
132
+ )
133
+ mapped_parts.append(mapped)
134
+ return "+".join(mapped_parts)
135
+ else:
136
+ # Single key - try exact match first, then case-insensitive
137
+ return ANTHROPIC_TO_CLA_KEYS.get(
138
+ key, ANTHROPIC_TO_CLA_KEYS.get(key.capitalize(), key.lower())
139
+ )
140
+
141
+ async def __call__(
142
+ self,
143
+ action: str = Field(..., description="The action to perform on the computer"),
144
+ coordinate: list[int] | tuple[int, int] | None = Field(
145
+ None, description="The coordinate to interact with on the computer [x, y]"
146
+ ),
147
+ text: str | None = Field(
148
+ None, description="The text to type on the computer or key to press"
149
+ ),
150
+ start_coordinate: list[int] | tuple[int, int] | None = Field(
151
+ None, description="The starting coordinate for drag actions [x, y]"
152
+ ),
153
+ scroll_direction: str | None = Field(
154
+ None, description="The direction to scroll (up, down, left, right)"
155
+ ),
156
+ scroll_amount: int | None = Field(None, description="The amount to scroll"),
157
+ duration: float | None = Field(None, description="The duration of the action in seconds"),
158
+ take_screenshot_on_click: bool = Field(
159
+ True, description="Whether to take a screenshot after clicking"
160
+ ),
161
+ ) -> list[ContentBlock]:
162
+ """
163
+ Handle Anthropic Computer Use API calls.
164
+
165
+ This converts Anthropic's action format to HudComputerTool's format.
166
+
167
+ Returns:
168
+ List of MCP content blocks
169
+ """
170
+ logger.info("AnthropicComputerTool received action: %s", action)
171
+
172
+ # Convert lists to tuples if needed
173
+ coord_tuple = None
174
+ if coordinate:
175
+ coord_tuple = tuple(coordinate) if isinstance(coordinate, list) else coordinate
176
+
177
+ start_coord_tuple = None
178
+ if start_coordinate:
179
+ start_coord_tuple = (
180
+ tuple(start_coordinate) if isinstance(start_coordinate, list) else start_coordinate
181
+ )
182
+
183
+ # Map Anthropic actions to HudComputerTool actions
184
+ if action == "screenshot":
185
+ screenshot_base64 = await self.executor.screenshot()
186
+ if screenshot_base64:
187
+ # Rescale screenshot if requested
188
+ result = ContentResult(base64_image=screenshot_base64)
189
+ else:
190
+ result = ContentResult(error="Failed to take screenshot")
191
+
192
+ elif action == "left_click" or action == "click":
193
+ if coord_tuple and len(coord_tuple) >= 2:
194
+ scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
195
+ logger.info("Scaled coordinates: %s, %s", scaled_x, scaled_y)
196
+ result = await self.executor.click(x=scaled_x, y=scaled_y)
197
+ else:
198
+ result = await self.executor.click()
199
+
200
+ elif action == "double_click":
201
+ if coord_tuple and len(coord_tuple) >= 2:
202
+ # Use pattern for double-click
203
+ scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
204
+ result = await self.executor.click(x=scaled_x, y=scaled_y, pattern=[100])
205
+ else:
206
+ result = await self.executor.click(pattern=[100])
207
+
208
+ elif action == "triple_click":
209
+ if coord_tuple and len(coord_tuple) >= 2:
210
+ # Use pattern for triple-click
211
+ scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
212
+ result = await self.executor.click(x=scaled_x, y=scaled_y, pattern=[100, 100])
213
+ else:
214
+ result = await self.executor.click(pattern=[100, 100])
215
+
216
+ elif action == "right_click":
217
+ if coord_tuple and len(coord_tuple) >= 2:
218
+ scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
219
+ result = await self.executor.click(x=scaled_x, y=scaled_y, button="right")
220
+ else:
221
+ result = await self.executor.click(button="right")
222
+
223
+ elif action == "middle_click":
224
+ if coord_tuple and len(coord_tuple) >= 2:
225
+ scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
226
+ result = await self.executor.click(x=scaled_x, y=scaled_y, button="middle")
227
+ else:
228
+ result = await self.executor.click(button="middle")
229
+
230
+ elif action == "mouse_move" or action == "move":
231
+ if coord_tuple and len(coord_tuple) >= 2:
232
+ scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
233
+ result = await self.executor.move(x=scaled_x, y=scaled_y)
234
+ else:
235
+ raise McpError(
236
+ ErrorData(code=INVALID_PARAMS, message="coordinate is required for mouse_move")
237
+ )
238
+
239
+ elif action == "type":
240
+ if text:
241
+ result = await self.executor.write(text=text)
242
+ else:
243
+ raise McpError(ErrorData(code=INVALID_PARAMS, message="text is required for type"))
244
+
245
+ elif action == "key":
246
+ if text:
247
+ # Anthropic sends single key or combo like "ctrl+a"
248
+ # Map to CLA standard key format
249
+ mapped_key = self._map_anthropic_key_to_cla(text)
250
+
251
+ # Split key combination into list of keys
252
+ if "+" in mapped_key:
253
+ keys_list = [k.strip() for k in mapped_key.split("+")]
254
+ else:
255
+ keys_list = [mapped_key]
256
+
257
+ result = await self.executor.press(keys=keys_list)
258
+ else:
259
+ raise McpError(ErrorData(code=INVALID_PARAMS, message="text is required for key"))
260
+
261
+ elif action == "scroll":
262
+ # Original implementation validates scroll_direction and scroll_amount
263
+ if scroll_direction not in ["up", "down", "left", "right"]:
264
+ raise McpError(
265
+ ErrorData(
266
+ code=INVALID_PARAMS,
267
+ message="scroll_direction must be 'up', 'down', 'left', or 'right'",
268
+ )
269
+ )
270
+
271
+ if scroll_amount is None or scroll_amount < 0:
272
+ raise McpError(
273
+ ErrorData(
274
+ code=INVALID_PARAMS, message="scroll_amount must be a non-negative int"
275
+ )
276
+ )
277
+
278
+ # Convert scroll amount from "clicks" to pixels
279
+ # Anthropic's scroll_amount represents wheel clicks, not pixels
280
+ # Standard conversion: 1 wheel click ≈ 100 pixels (3 lines of text)
281
+ PIXELS_PER_WHEEL_CLICK = 100
282
+ pixel_amount = scroll_amount * PIXELS_PER_WHEEL_CLICK
283
+
284
+ # Convert direction to scroll amounts
285
+ scroll_x = None
286
+ scroll_y = None
287
+ if scroll_direction == "down":
288
+ scroll_y = pixel_amount
289
+ elif scroll_direction == "up":
290
+ scroll_y = -pixel_amount
291
+ elif scroll_direction == "right":
292
+ scroll_x = pixel_amount
293
+ elif scroll_direction == "left":
294
+ scroll_x = -pixel_amount
295
+
296
+ if coord_tuple and len(coord_tuple) >= 2:
297
+ scaled_x, scaled_y = self._scale_coordinates(coord_tuple[0], coord_tuple[1])
298
+ result = await self.executor.scroll(
299
+ x=scaled_x, y=scaled_y, scroll_x=scroll_x, scroll_y=scroll_y
300
+ )
301
+ else:
302
+ result = await self.executor.scroll(scroll_x=scroll_x, scroll_y=scroll_y)
303
+
304
+ elif action == "left_click_drag" or action == "drag":
305
+ # Anthropic sends drag with start and end coordinates
306
+ if coord_tuple and len(coord_tuple) >= 2:
307
+ if start_coord_tuple and len(start_coord_tuple) >= 2:
308
+ # Full drag path
309
+ path = [
310
+ (start_coord_tuple[0], start_coord_tuple[1]),
311
+ (coord_tuple[0], coord_tuple[1]),
312
+ ]
313
+ scaled_path = self._scale_path(path)
314
+ result = await self.executor.drag(path=scaled_path)
315
+ else:
316
+ # Just end coordinate, drag from current position
317
+ # Original spec allows this
318
+ current_pos = [(0, 0), (coord_tuple[0], coord_tuple[1])] # Simplified
319
+ scaled_path = self._scale_path(current_pos)
320
+ result = await self.executor.drag(path=scaled_path)
321
+ else:
322
+ raise McpError(
323
+ ErrorData(
324
+ code=INVALID_PARAMS, message="coordinate is required for left_click_drag"
325
+ )
326
+ )
327
+
328
+ elif action == "wait":
329
+ # Original spec expects duration in seconds
330
+ if duration is None:
331
+ raise McpError(
332
+ ErrorData(code=INVALID_PARAMS, message="duration is required for wait")
333
+ )
334
+ if duration < 0:
335
+ raise McpError(
336
+ ErrorData(code=INVALID_PARAMS, message="duration must be non-negative")
337
+ )
338
+ if duration > 100:
339
+ raise McpError(ErrorData(code=INVALID_PARAMS, message="duration is too long"))
340
+
341
+ # Convert seconds to milliseconds for HudComputerTool
342
+ result = await self.executor.wait(time=int(duration * 1000))
343
+
344
+ elif action == "hold_key":
345
+ # Original spec has hold_key action
346
+ if text is None:
347
+ raise McpError(
348
+ ErrorData(code=INVALID_PARAMS, message="text is required for hold_key")
349
+ )
350
+ if duration is None:
351
+ raise McpError(
352
+ ErrorData(code=INVALID_PARAMS, message="duration is required for hold_key")
353
+ )
354
+ if duration < 0:
355
+ raise McpError(
356
+ ErrorData(code=INVALID_PARAMS, message="duration must be non-negative")
357
+ )
358
+ if duration > 100:
359
+ raise McpError(ErrorData(code=INVALID_PARAMS, message="duration is too long"))
360
+
361
+ # Hold key action
362
+ result = await self.executor.hold_key(key=text, duration=duration)
363
+
364
+ elif action == "left_mouse_down":
365
+ # These don't accept coordinates in original spec
366
+ if coord_tuple is not None:
367
+ raise McpError(
368
+ ErrorData(
369
+ code=INVALID_PARAMS,
370
+ message="coordinate is not accepted for left_mouse_down",
371
+ )
372
+ )
373
+ # Use generic mouse_down method
374
+ result = await self.executor.mouse_down(button="left")
375
+
376
+ elif action == "left_mouse_up":
377
+ # These don't accept coordinates in original spec
378
+ if coord_tuple is not None:
379
+ raise McpError(
380
+ ErrorData(
381
+ code=INVALID_PARAMS, message="coordinate is not accepted for left_mouse_up"
382
+ )
383
+ )
384
+ # Use generic mouse_up method
385
+ result = await self.executor.mouse_up(button="left")
386
+
387
+ elif action == "cursor_position":
388
+ result = await self.executor.position()
389
+
390
+ else:
391
+ # Unknown action
392
+ raise McpError(ErrorData(code=INTERNAL_ERROR, message=f"Invalid action: {action}"))
393
+
394
+ # Rescale screenshot in result if present
395
+ if isinstance(result, ContentResult) and result.base64_image and self.rescale_images:
396
+ rescaled_image = await self._rescale_screenshot(result.base64_image)
397
+ result.base64_image = rescaled_image
398
+
399
+ # Handle screenshot for actions that need it
400
+ screenshot_actions = {
401
+ "screenshot",
402
+ "left_click",
403
+ "click",
404
+ "double_click",
405
+ "triple_click",
406
+ "right_click",
407
+ "middle_click",
408
+ "mouse_move",
409
+ "move",
410
+ "type",
411
+ "key",
412
+ "scroll",
413
+ "left_click_drag",
414
+ "drag",
415
+ "wait",
416
+ "hold_key",
417
+ "left_mouse_down",
418
+ "left_mouse_up",
419
+ }
420
+
421
+ if (
422
+ action in screenshot_actions
423
+ and action != "screenshot"
424
+ and take_screenshot_on_click
425
+ and isinstance(result, ContentResult)
426
+ and not result.base64_image
427
+ ):
428
+ screenshot_base64 = await self.executor.screenshot()
429
+ if screenshot_base64:
430
+ # Rescale screenshot if requested
431
+ screenshot_base64 = await self._rescale_screenshot(screenshot_base64)
432
+ result = ContentResult(
433
+ output=result.output, error=result.error, base64_image=screenshot_base64
434
+ )
435
+
436
+ # Convert to content blocks
437
+ return result.to_content_blocks()