hud-python 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (192) hide show
  1. hud/__init__.py +22 -89
  2. hud/agents/__init__.py +17 -0
  3. hud/agents/art.py +101 -0
  4. hud/agents/base.py +599 -0
  5. hud/{mcp → agents}/claude.py +373 -321
  6. hud/{mcp → agents}/langchain.py +250 -250
  7. hud/agents/misc/__init__.py +7 -0
  8. hud/{agent → agents}/misc/response_agent.py +80 -80
  9. hud/{mcp → agents}/openai.py +352 -334
  10. hud/agents/openai_chat_generic.py +154 -0
  11. hud/{mcp → agents}/tests/__init__.py +1 -1
  12. hud/agents/tests/test_base.py +742 -0
  13. hud/agents/tests/test_claude.py +324 -0
  14. hud/{mcp → agents}/tests/test_client.py +363 -324
  15. hud/{mcp → agents}/tests/test_openai.py +237 -238
  16. hud/cli/__init__.py +617 -0
  17. hud/cli/__main__.py +8 -0
  18. hud/cli/analyze.py +371 -0
  19. hud/cli/analyze_metadata.py +230 -0
  20. hud/cli/build.py +427 -0
  21. hud/cli/clone.py +185 -0
  22. hud/cli/cursor.py +92 -0
  23. hud/cli/debug.py +392 -0
  24. hud/cli/docker_utils.py +83 -0
  25. hud/cli/init.py +281 -0
  26. hud/cli/interactive.py +353 -0
  27. hud/cli/mcp_server.py +756 -0
  28. hud/cli/pull.py +336 -0
  29. hud/cli/push.py +379 -0
  30. hud/cli/remote_runner.py +311 -0
  31. hud/cli/runner.py +160 -0
  32. hud/cli/tests/__init__.py +3 -0
  33. hud/cli/tests/test_analyze.py +284 -0
  34. hud/cli/tests/test_cli_init.py +265 -0
  35. hud/cli/tests/test_cli_main.py +27 -0
  36. hud/cli/tests/test_clone.py +142 -0
  37. hud/cli/tests/test_cursor.py +253 -0
  38. hud/cli/tests/test_debug.py +453 -0
  39. hud/cli/tests/test_mcp_server.py +139 -0
  40. hud/cli/tests/test_utils.py +388 -0
  41. hud/cli/utils.py +263 -0
  42. hud/clients/README.md +143 -0
  43. hud/clients/__init__.py +16 -0
  44. hud/clients/base.py +354 -0
  45. hud/clients/fastmcp.py +202 -0
  46. hud/clients/mcp_use.py +278 -0
  47. hud/clients/tests/__init__.py +1 -0
  48. hud/clients/tests/test_client_integration.py +111 -0
  49. hud/clients/tests/test_fastmcp.py +342 -0
  50. hud/clients/tests/test_protocol.py +188 -0
  51. hud/clients/utils/__init__.py +1 -0
  52. hud/clients/utils/retry_transport.py +160 -0
  53. hud/datasets.py +322 -192
  54. hud/misc/__init__.py +1 -0
  55. hud/{agent → misc}/claude_plays_pokemon.py +292 -283
  56. hud/otel/__init__.py +35 -0
  57. hud/otel/collector.py +142 -0
  58. hud/otel/config.py +164 -0
  59. hud/otel/context.py +536 -0
  60. hud/otel/exporters.py +366 -0
  61. hud/otel/instrumentation.py +97 -0
  62. hud/otel/processors.py +118 -0
  63. hud/otel/tests/__init__.py +1 -0
  64. hud/otel/tests/test_processors.py +197 -0
  65. hud/server/__init__.py +5 -5
  66. hud/server/context.py +114 -0
  67. hud/server/helper/__init__.py +5 -0
  68. hud/server/low_level.py +132 -0
  69. hud/server/server.py +166 -0
  70. hud/server/tests/__init__.py +3 -0
  71. hud/settings.py +73 -79
  72. hud/shared/__init__.py +5 -0
  73. hud/{exceptions.py → shared/exceptions.py} +180 -180
  74. hud/{server → shared}/requests.py +264 -264
  75. hud/shared/tests/test_exceptions.py +157 -0
  76. hud/{server → shared}/tests/test_requests.py +275 -275
  77. hud/telemetry/__init__.py +25 -30
  78. hud/telemetry/instrument.py +379 -0
  79. hud/telemetry/job.py +309 -141
  80. hud/telemetry/replay.py +74 -0
  81. hud/telemetry/trace.py +83 -0
  82. hud/tools/__init__.py +33 -34
  83. hud/tools/base.py +365 -65
  84. hud/tools/bash.py +161 -137
  85. hud/tools/computer/__init__.py +15 -13
  86. hud/tools/computer/anthropic.py +437 -420
  87. hud/tools/computer/hud.py +376 -334
  88. hud/tools/computer/openai.py +295 -292
  89. hud/tools/computer/settings.py +82 -0
  90. hud/tools/edit.py +314 -290
  91. hud/tools/executors/__init__.py +30 -30
  92. hud/tools/executors/base.py +539 -532
  93. hud/tools/executors/pyautogui.py +621 -619
  94. hud/tools/executors/tests/__init__.py +1 -1
  95. hud/tools/executors/tests/test_base_executor.py +338 -338
  96. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  97. hud/tools/executors/xdo.py +511 -503
  98. hud/tools/{playwright_tool.py → playwright.py} +412 -379
  99. hud/tools/tests/__init__.py +3 -3
  100. hud/tools/tests/test_base.py +282 -0
  101. hud/tools/tests/test_bash.py +158 -152
  102. hud/tools/tests/test_bash_extended.py +197 -0
  103. hud/tools/tests/test_computer.py +425 -52
  104. hud/tools/tests/test_computer_actions.py +34 -34
  105. hud/tools/tests/test_edit.py +259 -240
  106. hud/tools/tests/test_init.py +27 -27
  107. hud/tools/tests/test_playwright_tool.py +183 -183
  108. hud/tools/tests/test_tools.py +145 -157
  109. hud/tools/tests/test_utils.py +156 -156
  110. hud/tools/types.py +72 -0
  111. hud/tools/utils.py +50 -50
  112. hud/types.py +136 -89
  113. hud/utils/__init__.py +10 -16
  114. hud/utils/async_utils.py +65 -0
  115. hud/utils/design.py +168 -0
  116. hud/utils/mcp.py +55 -0
  117. hud/utils/progress.py +149 -149
  118. hud/utils/telemetry.py +66 -66
  119. hud/utils/tests/test_async_utils.py +173 -0
  120. hud/utils/tests/test_init.py +17 -21
  121. hud/utils/tests/test_progress.py +261 -225
  122. hud/utils/tests/test_telemetry.py +82 -37
  123. hud/utils/tests/test_version.py +8 -8
  124. hud/version.py +7 -7
  125. hud_python-0.4.0.dist-info/METADATA +474 -0
  126. hud_python-0.4.0.dist-info/RECORD +132 -0
  127. hud_python-0.4.0.dist-info/entry_points.txt +3 -0
  128. {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/licenses/LICENSE +21 -21
  129. hud/adapters/__init__.py +0 -8
  130. hud/adapters/claude/__init__.py +0 -5
  131. hud/adapters/claude/adapter.py +0 -180
  132. hud/adapters/claude/tests/__init__.py +0 -1
  133. hud/adapters/claude/tests/test_adapter.py +0 -519
  134. hud/adapters/common/__init__.py +0 -6
  135. hud/adapters/common/adapter.py +0 -178
  136. hud/adapters/common/tests/test_adapter.py +0 -289
  137. hud/adapters/common/types.py +0 -446
  138. hud/adapters/operator/__init__.py +0 -5
  139. hud/adapters/operator/adapter.py +0 -108
  140. hud/adapters/operator/tests/__init__.py +0 -1
  141. hud/adapters/operator/tests/test_adapter.py +0 -370
  142. hud/agent/__init__.py +0 -19
  143. hud/agent/base.py +0 -126
  144. hud/agent/claude.py +0 -271
  145. hud/agent/langchain.py +0 -215
  146. hud/agent/misc/__init__.py +0 -3
  147. hud/agent/operator.py +0 -268
  148. hud/agent/tests/__init__.py +0 -1
  149. hud/agent/tests/test_base.py +0 -202
  150. hud/env/__init__.py +0 -11
  151. hud/env/client.py +0 -35
  152. hud/env/docker_client.py +0 -349
  153. hud/env/environment.py +0 -446
  154. hud/env/local_docker_client.py +0 -358
  155. hud/env/remote_client.py +0 -212
  156. hud/env/remote_docker_client.py +0 -292
  157. hud/gym.py +0 -130
  158. hud/job.py +0 -773
  159. hud/mcp/__init__.py +0 -17
  160. hud/mcp/base.py +0 -631
  161. hud/mcp/client.py +0 -312
  162. hud/mcp/tests/test_base.py +0 -512
  163. hud/mcp/tests/test_claude.py +0 -294
  164. hud/task.py +0 -149
  165. hud/taskset.py +0 -237
  166. hud/telemetry/_trace.py +0 -347
  167. hud/telemetry/context.py +0 -230
  168. hud/telemetry/exporter.py +0 -575
  169. hud/telemetry/instrumentation/__init__.py +0 -3
  170. hud/telemetry/instrumentation/mcp.py +0 -259
  171. hud/telemetry/instrumentation/registry.py +0 -59
  172. hud/telemetry/mcp_models.py +0 -270
  173. hud/telemetry/tests/__init__.py +0 -1
  174. hud/telemetry/tests/test_context.py +0 -210
  175. hud/telemetry/tests/test_trace.py +0 -312
  176. hud/tools/helper/README.md +0 -56
  177. hud/tools/helper/__init__.py +0 -9
  178. hud/tools/helper/mcp_server.py +0 -78
  179. hud/tools/helper/server_initialization.py +0 -115
  180. hud/tools/helper/utils.py +0 -58
  181. hud/trajectory.py +0 -94
  182. hud/utils/agent.py +0 -37
  183. hud/utils/common.py +0 -256
  184. hud/utils/config.py +0 -120
  185. hud/utils/deprecation.py +0 -115
  186. hud/utils/misc.py +0 -53
  187. hud/utils/tests/test_common.py +0 -277
  188. hud/utils/tests/test_config.py +0 -129
  189. hud_python-0.3.5.dist-info/METADATA +0 -284
  190. hud_python-0.3.5.dist-info/RECORD +0 -120
  191. /hud/{adapters/common → shared}/tests/__init__.py +0 -0
  192. {hud_python-0.3.5.dist-info → hud_python-0.4.0.dist-info}/WHEEL +0 -0
@@ -1,379 +1,412 @@
1
- """Playwright web automation tool for HUD."""
2
-
3
- from __future__ import annotations
4
-
5
- import logging
6
- import os
7
- from typing import TYPE_CHECKING, Any, Literal
8
-
9
- from mcp import ErrorData, McpError
10
- from mcp.types import INVALID_PARAMS, ImageContent, TextContent
11
- from pydantic import Field
12
-
13
- from hud.tools.base import ToolResult, tool_result_to_content_blocks
14
-
15
- if TYPE_CHECKING:
16
- from playwright.async_api import Browser, BrowserContext, Page
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
-
21
- class PlaywrightTool:
22
- """Playwright tool for web automation."""
23
-
24
- def __init__(self, cdp_url: str | None = None) -> None:
25
- super().__init__()
26
- self._cdp_url = cdp_url
27
- self._playwright = None
28
- self._browser: Browser | None = None
29
- self._context: BrowserContext | None = None
30
- self._page: Page | None = None
31
-
32
- @property
33
- def page(self) -> Page:
34
- """Get the current page, raising an error if not initialized."""
35
- if self._page is None:
36
- raise RuntimeError("Browser page is not initialized. Call ensure_browser_launched().")
37
- return self._page
38
-
39
- async def __call__(
40
- self,
41
- action: str = Field(
42
- ...,
43
- description="The action to perform (navigate, screenshot, click, type, get_page_info, wait_for_element)", # noqa: E501
44
- ),
45
- url: str | None = Field(None, description="URL to navigate to (for navigate action)"),
46
- selector: str | None = Field(
47
- None, description="CSS selector for element (for click, type, wait_for_element actions)"
48
- ),
49
- text: str | None = Field(None, description="Text to type (for type action)"),
50
- wait_for_load_state: Literal["commit", "domcontentloaded", "load", "networkidle"]
51
- | None = Field(
52
- None,
53
- description="State to wait for: commit, domcontentloaded, load, networkidle (default: networkidle)", # noqa: E501
54
- ),
55
- ) -> list[ImageContent | TextContent]:
56
- """
57
- Execute a Playwright web automation action.
58
-
59
- Returns:
60
- List of MCP content blocks
61
- """
62
- logger.info("PlaywrightTool executing action: %s", action)
63
-
64
- try:
65
- if action == "navigate":
66
- if url is None:
67
- raise McpError(
68
- ErrorData(
69
- code=INVALID_PARAMS, message="url parameter is required for navigate"
70
- )
71
- )
72
- result = await self.navigate(url, wait_for_load_state or "networkidle")
73
-
74
- elif action == "screenshot":
75
- result = await self.screenshot()
76
-
77
- elif action == "click":
78
- if selector is None:
79
- raise McpError(
80
- ErrorData(
81
- code=INVALID_PARAMS, message="selector parameter is required for click"
82
- )
83
- )
84
- result = await self.click(selector)
85
-
86
- elif action == "type":
87
- if selector is None:
88
- raise McpError(
89
- ErrorData(
90
- code=INVALID_PARAMS, message="selector parameter is required for type"
91
- )
92
- )
93
- if text is None:
94
- raise McpError(
95
- ErrorData(
96
- code=INVALID_PARAMS, message="text parameter is required for type"
97
- )
98
- )
99
- result = await self.type_text(selector, text)
100
-
101
- elif action == "get_page_info":
102
- result = await self.get_page_info()
103
-
104
- elif action == "wait_for_element":
105
- if selector is None:
106
- raise McpError(
107
- ErrorData(
108
- code=INVALID_PARAMS,
109
- message="selector parameter is required for wait_for_element",
110
- )
111
- )
112
- result = await self.wait_for_element(selector)
113
-
114
- else:
115
- raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown action: {action}"))
116
-
117
- # Convert dict result to ToolResult
118
- if isinstance(result, dict):
119
- if result.get("success"):
120
- tool_result = ToolResult(output=result.get("message", ""))
121
- else:
122
- tool_result = ToolResult(error=result.get("error", "Unknown error"))
123
- elif isinstance(result, ToolResult):
124
- tool_result = result
125
- else:
126
- tool_result = ToolResult(output=str(result))
127
-
128
- # Convert result to content blocks
129
- return tool_result_to_content_blocks(tool_result)
130
-
131
- except McpError:
132
- raise
133
- except Exception as e:
134
- logger.error("PlaywrightTool error: %s", e)
135
- raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Playwright error: {e}")) from e
136
-
137
- async def _ensure_browser(self) -> None:
138
- """Ensure browser is launched and ready."""
139
- if self._browser is None or not self._browser.is_connected():
140
- if self._cdp_url:
141
- logger.info("Connecting to remote browser via CDP: %s", self._cdp_url)
142
- else:
143
- logger.info("Launching Playwright browser...")
144
-
145
- # Ensure DISPLAY is set (only needed for local browser)
146
- if not self._cdp_url:
147
- os.environ["DISPLAY"] = os.environ.get("DISPLAY", ":1")
148
-
149
- if self._playwright is None:
150
- try:
151
- from playwright.async_api import async_playwright
152
-
153
- self._playwright = await async_playwright().start()
154
- except ImportError:
155
- raise ImportError(
156
- "Playwright is not installed. Please install with: pip install playwright"
157
- ) from None
158
-
159
- # Connect via CDP URL or launch local browser
160
- if self._cdp_url:
161
- # Connect to remote browser via CDP
162
- self._browser = await self._playwright.chromium.connect_over_cdp(self._cdp_url)
163
-
164
- if self._browser is None:
165
- raise RuntimeError("Failed to connect to remote browser")
166
-
167
- # Use existing context or create new one
168
- contexts = self._browser.contexts
169
- if contexts:
170
- self._context = contexts[0]
171
- else:
172
- self._context = await self._browser.new_context(
173
- viewport={"width": 1920, "height": 1080},
174
- ignore_https_errors=True,
175
- )
176
- else:
177
- # Launch local browser
178
- self._browser = await self._playwright.chromium.launch(
179
- headless=False,
180
- args=[
181
- "--no-sandbox",
182
- "--disable-dev-shm-usage",
183
- "--disable-gpu",
184
- "--disable-web-security",
185
- "--disable-features=IsolateOrigins,site-per-process",
186
- "--disable-blink-features=AutomationControlled",
187
- "--window-size=1920,1080",
188
- "--window-position=0,0",
189
- "--start-maximized",
190
- "--disable-background-timer-throttling",
191
- "--disable-backgrounding-occluded-windows",
192
- "--disable-renderer-backgrounding",
193
- "--disable-features=TranslateUI",
194
- "--disable-ipc-flooding-protection",
195
- "--disable-default-apps",
196
- "--no-first-run",
197
- "--disable-sync",
198
- "--no-default-browser-check",
199
- ],
200
- )
201
-
202
- if self._browser is None:
203
- raise RuntimeError("Browser failed to initialize")
204
-
205
- self._context = await self._browser.new_context(
206
- viewport={"width": 1920, "height": 1080},
207
- ignore_https_errors=True,
208
- )
209
-
210
- if self._context is None:
211
- raise RuntimeError("Browser context failed to initialize")
212
-
213
- self._page = await self._context.new_page()
214
- logger.info("Playwright browser launched successfully")
215
-
216
- async def navigate(
217
- self,
218
- url: str,
219
- wait_for_load_state: Literal[
220
- "commit", "domcontentloaded", "load", "networkidle"
221
- ] = "networkidle",
222
- ) -> dict[str, Any]:
223
- """Navigate to a URL.
224
-
225
- Args:
226
- url: URL to navigate to
227
- wait_for_load_state: Load state to wait for (load, domcontentloaded, networkidle)
228
-
229
- Returns:
230
- Dict with navigation result
231
- """
232
- await self._ensure_browser()
233
-
234
- logger.info("Navigating to %s", url)
235
- try:
236
- await self.page.goto(url, wait_until=wait_for_load_state)
237
- current_url = self.page.url
238
- title = await self.page.title()
239
-
240
- return {
241
- "success": True,
242
- "url": current_url,
243
- "title": title,
244
- "message": f"Successfully navigated to {url}",
245
- }
246
- except Exception as e:
247
- logger.error("Navigation failed: %s", e)
248
- return {
249
- "success": False,
250
- "error": str(e),
251
- "message": f"Failed to navigate to {url}: {e}",
252
- }
253
-
254
- async def screenshot(self) -> ToolResult:
255
- """Take a screenshot of the current page.
256
-
257
- Returns:
258
- ToolResult with base64_image
259
- """
260
- await self._ensure_browser()
261
-
262
- try:
263
- # Always return base64 encoded screenshot as ToolResult
264
- screenshot_bytes = await self.page.screenshot(full_page=True)
265
- import base64
266
-
267
- screenshot_b64 = base64.b64encode(screenshot_bytes).decode()
268
- return ToolResult(base64_image=screenshot_b64)
269
- except Exception as e:
270
- logger.error("Screenshot failed: %s", e)
271
- return ToolResult(error=f"Failed to take screenshot: {e}")
272
-
273
- async def click(self, selector: str) -> dict[str, Any]:
274
- """Click an element by selector.
275
-
276
- Args:
277
- selector: CSS selector for element to click
278
-
279
- Returns:
280
- Dict with click result
281
- """
282
- await self._ensure_browser()
283
-
284
- try:
285
- await self.page.click(selector)
286
- return {"success": True, "message": f"Clicked element: {selector}"}
287
- except Exception as e:
288
- logger.error("Click failed: %s", e)
289
- return {
290
- "success": False,
291
- "error": str(e),
292
- "message": f"Failed to click {selector}: {e}",
293
- }
294
-
295
- async def type_text(self, selector: str, text: str) -> dict[str, Any]:
296
- """Type text into an element.
297
-
298
- Args:
299
- selector: CSS selector for input element
300
- text: Text to type
301
-
302
- Returns:
303
- Dict with type result
304
- """
305
- await self._ensure_browser()
306
-
307
- try:
308
- await self.page.fill(selector, text)
309
- return {"success": True, "message": f"Typed '{text}' into {selector}"}
310
- except Exception as e:
311
- logger.error("Type failed: %s", e)
312
- return {
313
- "success": False,
314
- "error": str(e),
315
- "message": f"Failed to type into {selector}: {e}",
316
- }
317
-
318
- async def get_page_info(self) -> dict[str, Any]:
319
- """Get current page information.
320
-
321
- Returns:
322
- Dict with page info
323
- """
324
- await self._ensure_browser()
325
-
326
- try:
327
- url = self.page.url
328
- title = await self.page.title()
329
- return {
330
- "success": True,
331
- "url": url,
332
- "title": title,
333
- "message": f"Current page: {title} ({url})",
334
- }
335
- except Exception as e:
336
- logger.error("Get page info failed: %s", e)
337
- return {"success": False, "error": str(e), "message": f"Failed to get page info: {e}"}
338
-
339
- async def wait_for_element(self, selector: str) -> dict[str, Any]:
340
- """Wait for an element to appear.
341
-
342
- Args:
343
- selector: CSS selector for element
344
-
345
- Returns:
346
- Dict with wait result
347
- """
348
- await self._ensure_browser()
349
-
350
- try:
351
- await self.page.wait_for_selector(selector, timeout=30000)
352
- return {"success": True, "message": f"Element {selector} appeared"}
353
- except Exception as e:
354
- logger.error("Wait for element failed: %s", e)
355
- return {
356
- "success": False,
357
- "error": str(e),
358
- "message": f"Element {selector} did not appear within 30000ms: {e}",
359
- }
360
-
361
- async def close(self) -> None:
362
- """Close browser and cleanup."""
363
- if self._browser:
364
- try:
365
- await self._browser.close()
366
- logger.info("Browser closed")
367
- except Exception as e:
368
- logger.error("Error closing browser: %s", e)
369
-
370
- if self._playwright:
371
- try:
372
- await self._playwright.stop()
373
- except Exception as e:
374
- logger.error("Error stopping playwright: %s", e)
375
-
376
- self._browser = None
377
- self._context = None
378
- self._page = None
379
- self._playwright = None
1
+ """Playwright web automation tool for HUD."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ from typing import TYPE_CHECKING, Any, Literal
8
+
9
+ from mcp import ErrorData, McpError
10
+ from mcp.types import INVALID_PARAMS, ContentBlock
11
+ from pydantic import Field
12
+
13
+ from .base import BaseTool
14
+ from .types import ContentResult
15
+
16
+ if TYPE_CHECKING:
17
+ from playwright.async_api import Browser, BrowserContext, Page
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class PlaywrightTool(BaseTool):
23
+ """Playwright tool for web automation."""
24
+
25
+ def __init__(self, page: Page | None = None, cdp_url: str | None = None) -> None:
26
+ """Initialize PlaywrightTool.
27
+
28
+ Args:
29
+ page: Optional existing Playwright Page to use as context
30
+ cdp_url: Optional Chrome DevTools Protocol URL for connecting to existing browser
31
+ """
32
+ super().__init__(
33
+ env=page,
34
+ name="playwright",
35
+ title="Playwright Browser",
36
+ description="Web automation tool using Playwright",
37
+ )
38
+ self._cdp_url = cdp_url
39
+ self._playwright = None
40
+ # Internal browser management - not exposed as context
41
+ self._browser: Browser | None = None
42
+ self._browser_context: BrowserContext | None = None
43
+
44
+ @property
45
+ def page(self) -> Page | None:
46
+ """Get the current page."""
47
+ return self.env
48
+
49
+ @page.setter
50
+ def page(self, value: Page | None) -> None:
51
+ """Set the page."""
52
+ self.env = value
53
+
54
+ async def __call__(
55
+ self,
56
+ action: str = Field(
57
+ ...,
58
+ description="The action to perform (navigate, screenshot, click, type, get_page_info, wait_for_element)", # noqa: E501
59
+ ),
60
+ url: str | None = Field(None, description="URL to navigate to (for navigate action)"),
61
+ selector: str | None = Field(
62
+ None, description="CSS selector for element (for click, type, wait_for_element actions)"
63
+ ),
64
+ text: str | None = Field(None, description="Text to type (for type action)"),
65
+ wait_for_load_state: Literal["commit", "domcontentloaded", "load", "networkidle"]
66
+ | None = Field(
67
+ None,
68
+ description="State to wait for: commit, domcontentloaded, load, networkidle (default: networkidle)", # noqa: E501
69
+ ),
70
+ ) -> list[ContentBlock]:
71
+ """
72
+ Execute a Playwright web automation action.
73
+
74
+ Returns:
75
+ List of MCP content blocks
76
+ """
77
+ logger.info("PlaywrightTool executing action: %s", action)
78
+
79
+ try:
80
+ if action == "navigate":
81
+ if url is None:
82
+ raise McpError(
83
+ ErrorData(
84
+ code=INVALID_PARAMS, message="url parameter is required for navigate"
85
+ )
86
+ )
87
+ result = await self.navigate(url, wait_for_load_state or "networkidle")
88
+
89
+ elif action == "screenshot":
90
+ result = await self.screenshot()
91
+
92
+ elif action == "click":
93
+ if selector is None:
94
+ raise McpError(
95
+ ErrorData(
96
+ code=INVALID_PARAMS, message="selector parameter is required for click"
97
+ )
98
+ )
99
+ result = await self.click(selector)
100
+
101
+ elif action == "type":
102
+ if selector is None:
103
+ raise McpError(
104
+ ErrorData(
105
+ code=INVALID_PARAMS, message="selector parameter is required for type"
106
+ )
107
+ )
108
+ if text is None:
109
+ raise McpError(
110
+ ErrorData(
111
+ code=INVALID_PARAMS, message="text parameter is required for type"
112
+ )
113
+ )
114
+ result = await self.type_text(selector, text)
115
+
116
+ elif action == "get_page_info":
117
+ result = await self.get_page_info()
118
+
119
+ elif action == "wait_for_element":
120
+ if selector is None:
121
+ raise McpError(
122
+ ErrorData(
123
+ code=INVALID_PARAMS,
124
+ message="selector parameter is required for wait_for_element",
125
+ )
126
+ )
127
+ result = await self.wait_for_element(selector)
128
+
129
+ else:
130
+ raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown action: {action}"))
131
+
132
+ # Convert dict result to ToolResult
133
+ if isinstance(result, dict):
134
+ if result.get("success"):
135
+ tool_result = ContentResult(output=result.get("message", ""))
136
+ else:
137
+ tool_result = ContentResult(error=result.get("error", "Unknown error"))
138
+ elif isinstance(result, ContentResult):
139
+ tool_result = result
140
+ else:
141
+ tool_result = ContentResult(output=str(result))
142
+
143
+ # Convert result to content blocks
144
+ return tool_result.to_content_blocks()
145
+
146
+ except McpError:
147
+ raise
148
+ except Exception as e:
149
+ logger.error("PlaywrightTool error: %s", e)
150
+ raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Playwright error: {e}")) from e
151
+
152
+ async def _ensure_browser(self) -> None:
153
+ """Ensure browser is launched and ready."""
154
+ if self._browser is None or not self._browser.is_connected():
155
+ if self._cdp_url:
156
+ logger.info("Connecting to remote browser via CDP: %s", self._cdp_url)
157
+ else:
158
+ logger.info("Launching Playwright browser...")
159
+
160
+ # Ensure DISPLAY is set (only needed for local browser)
161
+ if not self._cdp_url:
162
+ os.environ["DISPLAY"] = os.environ.get("DISPLAY", ":1")
163
+
164
+ if self._playwright is None:
165
+ try:
166
+ from playwright.async_api import async_playwright
167
+
168
+ self._playwright = await async_playwright().start()
169
+ except ImportError:
170
+ raise ImportError(
171
+ "Playwright is not installed. Please install with: pip install playwright"
172
+ ) from None
173
+
174
+ # Connect via CDP URL or launch local browser
175
+ if self._cdp_url:
176
+ # Connect to remote browser via CDP
177
+ self._browser = await self._playwright.chromium.connect_over_cdp(self._cdp_url)
178
+
179
+ if self._browser is None:
180
+ raise RuntimeError("Failed to connect to remote browser")
181
+
182
+ # Use existing context or create new one
183
+ contexts = self._browser.contexts
184
+ if contexts:
185
+ self._browser_context = contexts[0]
186
+ else:
187
+ self._browser_context = await self._browser.new_context(
188
+ viewport={"width": 1920, "height": 1080},
189
+ ignore_https_errors=True,
190
+ )
191
+ else:
192
+ # Launch local browser
193
+ self._browser = await self._playwright.chromium.launch(
194
+ headless=False,
195
+ args=[
196
+ "--no-sandbox",
197
+ "--disable-dev-shm-usage",
198
+ "--disable-gpu",
199
+ "--disable-web-security",
200
+ "--disable-features=IsolateOrigins,site-per-process",
201
+ "--disable-blink-features=AutomationControlled",
202
+ "--window-size=1920,1080",
203
+ "--window-position=0,0",
204
+ "--start-maximized",
205
+ "--disable-background-timer-throttling",
206
+ "--disable-backgrounding-occluded-windows",
207
+ "--disable-renderer-backgrounding",
208
+ "--disable-features=TranslateUI",
209
+ "--disable-ipc-flooding-protection",
210
+ "--disable-default-apps",
211
+ "--no-first-run",
212
+ "--disable-sync",
213
+ "--no-default-browser-check",
214
+ ],
215
+ )
216
+
217
+ if self._browser is None:
218
+ raise RuntimeError("Browser failed to initialize")
219
+
220
+ self._browser_context = await self._browser.new_context(
221
+ viewport={"width": 1920, "height": 1080},
222
+ ignore_https_errors=True,
223
+ )
224
+
225
+ if self._browser_context is None:
226
+ raise RuntimeError("Browser context failed to initialize")
227
+
228
+ self.page = await self._browser_context.new_page()
229
+ logger.info("Playwright browser launched successfully")
230
+
231
+ async def navigate(
232
+ self,
233
+ url: str,
234
+ wait_for_load_state: Literal[
235
+ "commit", "domcontentloaded", "load", "networkidle"
236
+ ] = "networkidle",
237
+ ) -> dict[str, Any]:
238
+ """Navigate to a URL.
239
+
240
+ Args:
241
+ url: URL to navigate to
242
+ wait_for_load_state: Load state to wait for (load, domcontentloaded, networkidle)
243
+
244
+ Returns:
245
+ Dict with navigation result
246
+ """
247
+ await self._ensure_browser()
248
+ if self.page is None:
249
+ raise RuntimeError("Page not initialized after _ensure_browser")
250
+
251
+ logger.info("Navigating to %s", url)
252
+ try:
253
+ await self.page.goto(url, wait_until=wait_for_load_state)
254
+ current_url = self.page.url
255
+ title = await self.page.title()
256
+
257
+ return {
258
+ "success": True,
259
+ "url": current_url,
260
+ "title": title,
261
+ "message": f"Successfully navigated to {url}",
262
+ }
263
+ except Exception as e:
264
+ logger.error("Navigation failed: %s", e)
265
+ return {
266
+ "success": False,
267
+ "error": str(e),
268
+ "message": f"Failed to navigate to {url}: {e}",
269
+ }
270
+
271
+ async def screenshot(self) -> ContentResult:
272
+ """Take a screenshot of the current page.
273
+
274
+ Returns:
275
+ ToolResult with base64_image
276
+ """
277
+ await self._ensure_browser()
278
+ if self.page is None:
279
+ raise RuntimeError("Page not initialized after _ensure_browser")
280
+
281
+ try:
282
+ # Always return base64 encoded screenshot as ToolResult
283
+ screenshot_bytes = await self.page.screenshot(full_page=True)
284
+ import base64
285
+
286
+ screenshot_b64 = base64.b64encode(screenshot_bytes).decode()
287
+ return ContentResult(base64_image=screenshot_b64)
288
+ except Exception as e:
289
+ logger.error("Screenshot failed: %s", e)
290
+ return ContentResult(error=f"Failed to take screenshot: {e}")
291
+
292
+ async def click(
293
+ self,
294
+ selector: str,
295
+ button: Literal["left", "right", "middle"] = "left",
296
+ count: int = 1,
297
+ wait_for_navigation: bool = True,
298
+ ) -> dict[str, Any]:
299
+ """Click an element by selector.
300
+
301
+ Args:
302
+ selector: CSS selector for element to click
303
+
304
+ Returns:
305
+ Dict with click result
306
+ """
307
+ await self._ensure_browser()
308
+ if self.page is None:
309
+ raise RuntimeError("Page not initialized after _ensure_browser")
310
+
311
+ try:
312
+ await self.page.click(selector, button=button, click_count=count)
313
+ return {"success": True, "message": f"Clicked element: {selector}"}
314
+ except Exception as e:
315
+ logger.error("Click failed: %s", e)
316
+ return {
317
+ "success": False,
318
+ "error": str(e),
319
+ "message": f"Failed to click {selector}: {e}",
320
+ }
321
+
322
+ async def type_text(self, selector: str, text: str) -> dict[str, Any]:
323
+ """Type text into an element.
324
+
325
+ Args:
326
+ selector: CSS selector for input element
327
+ text: Text to type
328
+
329
+ Returns:
330
+ Dict with type result
331
+ """
332
+ await self._ensure_browser()
333
+ if self.page is None:
334
+ raise RuntimeError("Page not initialized after _ensure_browser")
335
+
336
+ try:
337
+ await self.page.fill(selector, text)
338
+ return {"success": True, "message": f"Typed '{text}' into {selector}"}
339
+ except Exception as e:
340
+ logger.error("Type failed: %s", e)
341
+ return {
342
+ "success": False,
343
+ "error": str(e),
344
+ "message": f"Failed to type into {selector}: {e}",
345
+ }
346
+
347
+ async def get_page_info(self) -> dict[str, Any]:
348
+ """Get current page information.
349
+
350
+ Returns:
351
+ Dict with page info
352
+ """
353
+ await self._ensure_browser()
354
+ if self.page is None:
355
+ raise RuntimeError("Page not initialized after _ensure_browser")
356
+
357
+ try:
358
+ url = self.page.url
359
+ title = await self.page.title()
360
+ return {
361
+ "success": True,
362
+ "url": url,
363
+ "title": title,
364
+ "message": f"Current page: {title} ({url})",
365
+ }
366
+ except Exception as e:
367
+ logger.error("Get page info failed: %s", e)
368
+ return {"success": False, "error": str(e), "message": f"Failed to get page info: {e}"}
369
+
370
+ async def wait_for_element(self, selector: str) -> dict[str, Any]:
371
+ """Wait for an element to appear.
372
+
373
+ Args:
374
+ selector: CSS selector for element
375
+
376
+ Returns:
377
+ Dict with wait result
378
+ """
379
+ await self._ensure_browser()
380
+ if self.page is None:
381
+ raise RuntimeError("Page not initialized after _ensure_browser")
382
+
383
+ try:
384
+ await self.page.wait_for_selector(selector, timeout=30000)
385
+ return {"success": True, "message": f"Element {selector} appeared"}
386
+ except Exception as e:
387
+ logger.error("Wait for element failed: %s", e)
388
+ return {
389
+ "success": False,
390
+ "error": str(e),
391
+ "message": f"Element {selector} did not appear within 30000ms: {e}",
392
+ }
393
+
394
+ async def close(self) -> None:
395
+ """Close browser and cleanup."""
396
+ if self._browser:
397
+ try:
398
+ await self._browser.close()
399
+ logger.info("Browser closed")
400
+ except Exception as e:
401
+ logger.error("Error closing browser: %s", e)
402
+
403
+ if self._playwright:
404
+ try:
405
+ await self._playwright.stop()
406
+ except Exception as e:
407
+ logger.error("Error stopping playwright: %s", e)
408
+
409
+ self._browser = None
410
+ self._browser_context = None
411
+ self.env = None # Clear the page
412
+ self._playwright = None