hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hud-python might be problematic. Click here for more details.

Files changed (130) hide show
  1. hud/__init__.py +22 -22
  2. hud/agents/__init__.py +13 -15
  3. hud/agents/base.py +599 -599
  4. hud/agents/claude.py +373 -373
  5. hud/agents/langchain.py +261 -250
  6. hud/agents/misc/__init__.py +7 -7
  7. hud/agents/misc/response_agent.py +82 -80
  8. hud/agents/openai.py +352 -352
  9. hud/agents/openai_chat_generic.py +154 -154
  10. hud/agents/tests/__init__.py +1 -1
  11. hud/agents/tests/test_base.py +742 -742
  12. hud/agents/tests/test_claude.py +324 -324
  13. hud/agents/tests/test_client.py +363 -363
  14. hud/agents/tests/test_openai.py +237 -237
  15. hud/cli/__init__.py +617 -617
  16. hud/cli/__main__.py +8 -8
  17. hud/cli/analyze.py +371 -371
  18. hud/cli/analyze_metadata.py +230 -230
  19. hud/cli/build.py +498 -427
  20. hud/cli/clone.py +185 -185
  21. hud/cli/cursor.py +92 -92
  22. hud/cli/debug.py +392 -392
  23. hud/cli/docker_utils.py +83 -83
  24. hud/cli/init.py +280 -281
  25. hud/cli/interactive.py +353 -353
  26. hud/cli/mcp_server.py +764 -756
  27. hud/cli/pull.py +330 -336
  28. hud/cli/push.py +404 -370
  29. hud/cli/remote_runner.py +311 -311
  30. hud/cli/runner.py +160 -160
  31. hud/cli/tests/__init__.py +3 -3
  32. hud/cli/tests/test_analyze.py +284 -284
  33. hud/cli/tests/test_cli_init.py +265 -265
  34. hud/cli/tests/test_cli_main.py +27 -27
  35. hud/cli/tests/test_clone.py +142 -142
  36. hud/cli/tests/test_cursor.py +253 -253
  37. hud/cli/tests/test_debug.py +453 -453
  38. hud/cli/tests/test_mcp_server.py +139 -139
  39. hud/cli/tests/test_utils.py +388 -388
  40. hud/cli/utils.py +263 -263
  41. hud/clients/README.md +143 -143
  42. hud/clients/__init__.py +16 -16
  43. hud/clients/base.py +378 -379
  44. hud/clients/fastmcp.py +222 -222
  45. hud/clients/mcp_use.py +298 -278
  46. hud/clients/tests/__init__.py +1 -1
  47. hud/clients/tests/test_client_integration.py +111 -111
  48. hud/clients/tests/test_fastmcp.py +342 -342
  49. hud/clients/tests/test_protocol.py +188 -188
  50. hud/clients/utils/__init__.py +1 -1
  51. hud/clients/utils/retry_transport.py +160 -160
  52. hud/datasets.py +327 -322
  53. hud/misc/__init__.py +1 -1
  54. hud/misc/claude_plays_pokemon.py +292 -292
  55. hud/otel/__init__.py +35 -35
  56. hud/otel/collector.py +142 -142
  57. hud/otel/config.py +164 -164
  58. hud/otel/context.py +536 -536
  59. hud/otel/exporters.py +366 -366
  60. hud/otel/instrumentation.py +97 -97
  61. hud/otel/processors.py +118 -118
  62. hud/otel/tests/__init__.py +1 -1
  63. hud/otel/tests/test_processors.py +197 -197
  64. hud/server/__init__.py +5 -5
  65. hud/server/context.py +114 -114
  66. hud/server/helper/__init__.py +5 -5
  67. hud/server/low_level.py +132 -132
  68. hud/server/server.py +170 -166
  69. hud/server/tests/__init__.py +3 -3
  70. hud/settings.py +73 -73
  71. hud/shared/__init__.py +5 -5
  72. hud/shared/exceptions.py +180 -180
  73. hud/shared/requests.py +264 -264
  74. hud/shared/tests/test_exceptions.py +157 -157
  75. hud/shared/tests/test_requests.py +275 -275
  76. hud/telemetry/__init__.py +25 -25
  77. hud/telemetry/instrument.py +379 -379
  78. hud/telemetry/job.py +309 -309
  79. hud/telemetry/replay.py +74 -74
  80. hud/telemetry/trace.py +83 -83
  81. hud/tools/__init__.py +33 -33
  82. hud/tools/base.py +365 -365
  83. hud/tools/bash.py +161 -161
  84. hud/tools/computer/__init__.py +15 -15
  85. hud/tools/computer/anthropic.py +437 -437
  86. hud/tools/computer/hud.py +376 -376
  87. hud/tools/computer/openai.py +295 -295
  88. hud/tools/computer/settings.py +82 -82
  89. hud/tools/edit.py +314 -314
  90. hud/tools/executors/__init__.py +30 -30
  91. hud/tools/executors/base.py +539 -539
  92. hud/tools/executors/pyautogui.py +621 -621
  93. hud/tools/executors/tests/__init__.py +1 -1
  94. hud/tools/executors/tests/test_base_executor.py +338 -338
  95. hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
  96. hud/tools/executors/xdo.py +511 -511
  97. hud/tools/playwright.py +412 -412
  98. hud/tools/tests/__init__.py +3 -3
  99. hud/tools/tests/test_base.py +282 -282
  100. hud/tools/tests/test_bash.py +158 -158
  101. hud/tools/tests/test_bash_extended.py +197 -197
  102. hud/tools/tests/test_computer.py +425 -425
  103. hud/tools/tests/test_computer_actions.py +34 -34
  104. hud/tools/tests/test_edit.py +259 -259
  105. hud/tools/tests/test_init.py +27 -27
  106. hud/tools/tests/test_playwright_tool.py +183 -183
  107. hud/tools/tests/test_tools.py +145 -145
  108. hud/tools/tests/test_utils.py +156 -156
  109. hud/tools/types.py +72 -72
  110. hud/tools/utils.py +50 -50
  111. hud/types.py +136 -136
  112. hud/utils/__init__.py +10 -10
  113. hud/utils/async_utils.py +65 -65
  114. hud/utils/design.py +236 -168
  115. hud/utils/mcp.py +55 -55
  116. hud/utils/progress.py +149 -149
  117. hud/utils/telemetry.py +66 -66
  118. hud/utils/tests/test_async_utils.py +173 -173
  119. hud/utils/tests/test_init.py +17 -17
  120. hud/utils/tests/test_progress.py +261 -261
  121. hud/utils/tests/test_telemetry.py +82 -82
  122. hud/utils/tests/test_version.py +8 -8
  123. hud/version.py +7 -7
  124. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
  125. hud_python-0.4.3.dist-info/RECORD +131 -0
  126. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
  127. hud/agents/art.py +0 -101
  128. hud_python-0.4.1.dist-info/RECORD +0 -132
  129. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
  130. {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
hud/tools/playwright.py CHANGED
@@ -1,412 +1,412 @@
1
- """Playwright web automation tool for HUD."""
2
-
3
- from __future__ import annotations
4
-
5
- import logging
6
- import os
7
- from typing import TYPE_CHECKING, Any, Literal
8
-
9
- from mcp import ErrorData, McpError
10
- from mcp.types import INVALID_PARAMS, ContentBlock
11
- from pydantic import Field
12
-
13
- from .base import BaseTool
14
- from .types import ContentResult
15
-
16
- if TYPE_CHECKING:
17
- from playwright.async_api import Browser, BrowserContext, Page
18
-
19
- logger = logging.getLogger(__name__)
20
-
21
-
22
- class PlaywrightTool(BaseTool):
23
- """Playwright tool for web automation."""
24
-
25
- def __init__(self, page: Page | None = None, cdp_url: str | None = None) -> None:
26
- """Initialize PlaywrightTool.
27
-
28
- Args:
29
- page: Optional existing Playwright Page to use as context
30
- cdp_url: Optional Chrome DevTools Protocol URL for connecting to existing browser
31
- """
32
- super().__init__(
33
- env=page,
34
- name="playwright",
35
- title="Playwright Browser",
36
- description="Web automation tool using Playwright",
37
- )
38
- self._cdp_url = cdp_url
39
- self._playwright = None
40
- # Internal browser management - not exposed as context
41
- self._browser: Browser | None = None
42
- self._browser_context: BrowserContext | None = None
43
-
44
- @property
45
- def page(self) -> Page | None:
46
- """Get the current page."""
47
- return self.env
48
-
49
- @page.setter
50
- def page(self, value: Page | None) -> None:
51
- """Set the page."""
52
- self.env = value
53
-
54
- async def __call__(
55
- self,
56
- action: str = Field(
57
- ...,
58
- description="The action to perform (navigate, screenshot, click, type, get_page_info, wait_for_element)", # noqa: E501
59
- ),
60
- url: str | None = Field(None, description="URL to navigate to (for navigate action)"),
61
- selector: str | None = Field(
62
- None, description="CSS selector for element (for click, type, wait_for_element actions)"
63
- ),
64
- text: str | None = Field(None, description="Text to type (for type action)"),
65
- wait_for_load_state: Literal["commit", "domcontentloaded", "load", "networkidle"]
66
- | None = Field(
67
- None,
68
- description="State to wait for: commit, domcontentloaded, load, networkidle (default: networkidle)", # noqa: E501
69
- ),
70
- ) -> list[ContentBlock]:
71
- """
72
- Execute a Playwright web automation action.
73
-
74
- Returns:
75
- List of MCP content blocks
76
- """
77
- logger.info("PlaywrightTool executing action: %s", action)
78
-
79
- try:
80
- if action == "navigate":
81
- if url is None:
82
- raise McpError(
83
- ErrorData(
84
- code=INVALID_PARAMS, message="url parameter is required for navigate"
85
- )
86
- )
87
- result = await self.navigate(url, wait_for_load_state or "networkidle")
88
-
89
- elif action == "screenshot":
90
- result = await self.screenshot()
91
-
92
- elif action == "click":
93
- if selector is None:
94
- raise McpError(
95
- ErrorData(
96
- code=INVALID_PARAMS, message="selector parameter is required for click"
97
- )
98
- )
99
- result = await self.click(selector)
100
-
101
- elif action == "type":
102
- if selector is None:
103
- raise McpError(
104
- ErrorData(
105
- code=INVALID_PARAMS, message="selector parameter is required for type"
106
- )
107
- )
108
- if text is None:
109
- raise McpError(
110
- ErrorData(
111
- code=INVALID_PARAMS, message="text parameter is required for type"
112
- )
113
- )
114
- result = await self.type_text(selector, text)
115
-
116
- elif action == "get_page_info":
117
- result = await self.get_page_info()
118
-
119
- elif action == "wait_for_element":
120
- if selector is None:
121
- raise McpError(
122
- ErrorData(
123
- code=INVALID_PARAMS,
124
- message="selector parameter is required for wait_for_element",
125
- )
126
- )
127
- result = await self.wait_for_element(selector)
128
-
129
- else:
130
- raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown action: {action}"))
131
-
132
- # Convert dict result to ToolResult
133
- if isinstance(result, dict):
134
- if result.get("success"):
135
- tool_result = ContentResult(output=result.get("message", ""))
136
- else:
137
- tool_result = ContentResult(error=result.get("error", "Unknown error"))
138
- elif isinstance(result, ContentResult):
139
- tool_result = result
140
- else:
141
- tool_result = ContentResult(output=str(result))
142
-
143
- # Convert result to content blocks
144
- return tool_result.to_content_blocks()
145
-
146
- except McpError:
147
- raise
148
- except Exception as e:
149
- logger.error("PlaywrightTool error: %s", e)
150
- raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Playwright error: {e}")) from e
151
-
152
- async def _ensure_browser(self) -> None:
153
- """Ensure browser is launched and ready."""
154
- if self._browser is None or not self._browser.is_connected():
155
- if self._cdp_url:
156
- logger.info("Connecting to remote browser via CDP: %s", self._cdp_url)
157
- else:
158
- logger.info("Launching Playwright browser...")
159
-
160
- # Ensure DISPLAY is set (only needed for local browser)
161
- if not self._cdp_url:
162
- os.environ["DISPLAY"] = os.environ.get("DISPLAY", ":1")
163
-
164
- if self._playwright is None:
165
- try:
166
- from playwright.async_api import async_playwright
167
-
168
- self._playwright = await async_playwright().start()
169
- except ImportError:
170
- raise ImportError(
171
- "Playwright is not installed. Please install with: pip install playwright"
172
- ) from None
173
-
174
- # Connect via CDP URL or launch local browser
175
- if self._cdp_url:
176
- # Connect to remote browser via CDP
177
- self._browser = await self._playwright.chromium.connect_over_cdp(self._cdp_url)
178
-
179
- if self._browser is None:
180
- raise RuntimeError("Failed to connect to remote browser")
181
-
182
- # Use existing context or create new one
183
- contexts = self._browser.contexts
184
- if contexts:
185
- self._browser_context = contexts[0]
186
- else:
187
- self._browser_context = await self._browser.new_context(
188
- viewport={"width": 1920, "height": 1080},
189
- ignore_https_errors=True,
190
- )
191
- else:
192
- # Launch local browser
193
- self._browser = await self._playwright.chromium.launch(
194
- headless=False,
195
- args=[
196
- "--no-sandbox",
197
- "--disable-dev-shm-usage",
198
- "--disable-gpu",
199
- "--disable-web-security",
200
- "--disable-features=IsolateOrigins,site-per-process",
201
- "--disable-blink-features=AutomationControlled",
202
- "--window-size=1920,1080",
203
- "--window-position=0,0",
204
- "--start-maximized",
205
- "--disable-background-timer-throttling",
206
- "--disable-backgrounding-occluded-windows",
207
- "--disable-renderer-backgrounding",
208
- "--disable-features=TranslateUI",
209
- "--disable-ipc-flooding-protection",
210
- "--disable-default-apps",
211
- "--no-first-run",
212
- "--disable-sync",
213
- "--no-default-browser-check",
214
- ],
215
- )
216
-
217
- if self._browser is None:
218
- raise RuntimeError("Browser failed to initialize")
219
-
220
- self._browser_context = await self._browser.new_context(
221
- viewport={"width": 1920, "height": 1080},
222
- ignore_https_errors=True,
223
- )
224
-
225
- if self._browser_context is None:
226
- raise RuntimeError("Browser context failed to initialize")
227
-
228
- self.page = await self._browser_context.new_page()
229
- logger.info("Playwright browser launched successfully")
230
-
231
- async def navigate(
232
- self,
233
- url: str,
234
- wait_for_load_state: Literal[
235
- "commit", "domcontentloaded", "load", "networkidle"
236
- ] = "networkidle",
237
- ) -> dict[str, Any]:
238
- """Navigate to a URL.
239
-
240
- Args:
241
- url: URL to navigate to
242
- wait_for_load_state: Load state to wait for (load, domcontentloaded, networkidle)
243
-
244
- Returns:
245
- Dict with navigation result
246
- """
247
- await self._ensure_browser()
248
- if self.page is None:
249
- raise RuntimeError("Page not initialized after _ensure_browser")
250
-
251
- logger.info("Navigating to %s", url)
252
- try:
253
- await self.page.goto(url, wait_until=wait_for_load_state)
254
- current_url = self.page.url
255
- title = await self.page.title()
256
-
257
- return {
258
- "success": True,
259
- "url": current_url,
260
- "title": title,
261
- "message": f"Successfully navigated to {url}",
262
- }
263
- except Exception as e:
264
- logger.error("Navigation failed: %s", e)
265
- return {
266
- "success": False,
267
- "error": str(e),
268
- "message": f"Failed to navigate to {url}: {e}",
269
- }
270
-
271
- async def screenshot(self) -> ContentResult:
272
- """Take a screenshot of the current page.
273
-
274
- Returns:
275
- ToolResult with base64_image
276
- """
277
- await self._ensure_browser()
278
- if self.page is None:
279
- raise RuntimeError("Page not initialized after _ensure_browser")
280
-
281
- try:
282
- # Always return base64 encoded screenshot as ToolResult
283
- screenshot_bytes = await self.page.screenshot(full_page=True)
284
- import base64
285
-
286
- screenshot_b64 = base64.b64encode(screenshot_bytes).decode()
287
- return ContentResult(base64_image=screenshot_b64)
288
- except Exception as e:
289
- logger.error("Screenshot failed: %s", e)
290
- return ContentResult(error=f"Failed to take screenshot: {e}")
291
-
292
- async def click(
293
- self,
294
- selector: str,
295
- button: Literal["left", "right", "middle"] = "left",
296
- count: int = 1,
297
- wait_for_navigation: bool = True,
298
- ) -> dict[str, Any]:
299
- """Click an element by selector.
300
-
301
- Args:
302
- selector: CSS selector for element to click
303
-
304
- Returns:
305
- Dict with click result
306
- """
307
- await self._ensure_browser()
308
- if self.page is None:
309
- raise RuntimeError("Page not initialized after _ensure_browser")
310
-
311
- try:
312
- await self.page.click(selector, button=button, click_count=count)
313
- return {"success": True, "message": f"Clicked element: {selector}"}
314
- except Exception as e:
315
- logger.error("Click failed: %s", e)
316
- return {
317
- "success": False,
318
- "error": str(e),
319
- "message": f"Failed to click {selector}: {e}",
320
- }
321
-
322
- async def type_text(self, selector: str, text: str) -> dict[str, Any]:
323
- """Type text into an element.
324
-
325
- Args:
326
- selector: CSS selector for input element
327
- text: Text to type
328
-
329
- Returns:
330
- Dict with type result
331
- """
332
- await self._ensure_browser()
333
- if self.page is None:
334
- raise RuntimeError("Page not initialized after _ensure_browser")
335
-
336
- try:
337
- await self.page.fill(selector, text)
338
- return {"success": True, "message": f"Typed '{text}' into {selector}"}
339
- except Exception as e:
340
- logger.error("Type failed: %s", e)
341
- return {
342
- "success": False,
343
- "error": str(e),
344
- "message": f"Failed to type into {selector}: {e}",
345
- }
346
-
347
- async def get_page_info(self) -> dict[str, Any]:
348
- """Get current page information.
349
-
350
- Returns:
351
- Dict with page info
352
- """
353
- await self._ensure_browser()
354
- if self.page is None:
355
- raise RuntimeError("Page not initialized after _ensure_browser")
356
-
357
- try:
358
- url = self.page.url
359
- title = await self.page.title()
360
- return {
361
- "success": True,
362
- "url": url,
363
- "title": title,
364
- "message": f"Current page: {title} ({url})",
365
- }
366
- except Exception as e:
367
- logger.error("Get page info failed: %s", e)
368
- return {"success": False, "error": str(e), "message": f"Failed to get page info: {e}"}
369
-
370
- async def wait_for_element(self, selector: str) -> dict[str, Any]:
371
- """Wait for an element to appear.
372
-
373
- Args:
374
- selector: CSS selector for element
375
-
376
- Returns:
377
- Dict with wait result
378
- """
379
- await self._ensure_browser()
380
- if self.page is None:
381
- raise RuntimeError("Page not initialized after _ensure_browser")
382
-
383
- try:
384
- await self.page.wait_for_selector(selector, timeout=30000)
385
- return {"success": True, "message": f"Element {selector} appeared"}
386
- except Exception as e:
387
- logger.error("Wait for element failed: %s", e)
388
- return {
389
- "success": False,
390
- "error": str(e),
391
- "message": f"Element {selector} did not appear within 30000ms: {e}",
392
- }
393
-
394
- async def close(self) -> None:
395
- """Close browser and cleanup."""
396
- if self._browser:
397
- try:
398
- await self._browser.close()
399
- logger.info("Browser closed")
400
- except Exception as e:
401
- logger.error("Error closing browser: %s", e)
402
-
403
- if self._playwright:
404
- try:
405
- await self._playwright.stop()
406
- except Exception as e:
407
- logger.error("Error stopping playwright: %s", e)
408
-
409
- self._browser = None
410
- self._browser_context = None
411
- self.env = None # Clear the page
412
- self._playwright = None
1
+ """Playwright web automation tool for HUD."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ from typing import TYPE_CHECKING, Any, Literal
8
+
9
+ from mcp import ErrorData, McpError
10
+ from mcp.types import INVALID_PARAMS, ContentBlock
11
+ from pydantic import Field
12
+
13
+ from .base import BaseTool
14
+ from .types import ContentResult
15
+
16
+ if TYPE_CHECKING:
17
+ from playwright.async_api import Browser, BrowserContext, Page
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class PlaywrightTool(BaseTool):
23
+ """Playwright tool for web automation."""
24
+
25
+ def __init__(self, page: Page | None = None, cdp_url: str | None = None) -> None:
26
+ """Initialize PlaywrightTool.
27
+
28
+ Args:
29
+ page: Optional existing Playwright Page to use as context
30
+ cdp_url: Optional Chrome DevTools Protocol URL for connecting to existing browser
31
+ """
32
+ super().__init__(
33
+ env=page,
34
+ name="playwright",
35
+ title="Playwright Browser",
36
+ description="Web automation tool using Playwright",
37
+ )
38
+ self._cdp_url = cdp_url
39
+ self._playwright = None
40
+ # Internal browser management - not exposed as context
41
+ self._browser: Browser | None = None
42
+ self._browser_context: BrowserContext | None = None
43
+
44
+ @property
45
+ def page(self) -> Page | None:
46
+ """Get the current page."""
47
+ return self.env
48
+
49
+ @page.setter
50
+ def page(self, value: Page | None) -> None:
51
+ """Set the page."""
52
+ self.env = value
53
+
54
+ async def __call__(
55
+ self,
56
+ action: str = Field(
57
+ ...,
58
+ description="The action to perform (navigate, screenshot, click, type, get_page_info, wait_for_element)", # noqa: E501
59
+ ),
60
+ url: str | None = Field(None, description="URL to navigate to (for navigate action)"),
61
+ selector: str | None = Field(
62
+ None, description="CSS selector for element (for click, type, wait_for_element actions)"
63
+ ),
64
+ text: str | None = Field(None, description="Text to type (for type action)"),
65
+ wait_for_load_state: Literal["commit", "domcontentloaded", "load", "networkidle"]
66
+ | None = Field(
67
+ None,
68
+ description="State to wait for: commit, domcontentloaded, load, networkidle (default: networkidle)", # noqa: E501
69
+ ),
70
+ ) -> list[ContentBlock]:
71
+ """
72
+ Execute a Playwright web automation action.
73
+
74
+ Returns:
75
+ List of MCP content blocks
76
+ """
77
+ logger.info("PlaywrightTool executing action: %s", action)
78
+
79
+ try:
80
+ if action == "navigate":
81
+ if url is None:
82
+ raise McpError(
83
+ ErrorData(
84
+ code=INVALID_PARAMS, message="url parameter is required for navigate"
85
+ )
86
+ )
87
+ result = await self.navigate(url, wait_for_load_state or "networkidle")
88
+
89
+ elif action == "screenshot":
90
+ result = await self.screenshot()
91
+
92
+ elif action == "click":
93
+ if selector is None:
94
+ raise McpError(
95
+ ErrorData(
96
+ code=INVALID_PARAMS, message="selector parameter is required for click"
97
+ )
98
+ )
99
+ result = await self.click(selector)
100
+
101
+ elif action == "type":
102
+ if selector is None:
103
+ raise McpError(
104
+ ErrorData(
105
+ code=INVALID_PARAMS, message="selector parameter is required for type"
106
+ )
107
+ )
108
+ if text is None:
109
+ raise McpError(
110
+ ErrorData(
111
+ code=INVALID_PARAMS, message="text parameter is required for type"
112
+ )
113
+ )
114
+ result = await self.type_text(selector, text)
115
+
116
+ elif action == "get_page_info":
117
+ result = await self.get_page_info()
118
+
119
+ elif action == "wait_for_element":
120
+ if selector is None:
121
+ raise McpError(
122
+ ErrorData(
123
+ code=INVALID_PARAMS,
124
+ message="selector parameter is required for wait_for_element",
125
+ )
126
+ )
127
+ result = await self.wait_for_element(selector)
128
+
129
+ else:
130
+ raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown action: {action}"))
131
+
132
+ # Convert dict result to ToolResult
133
+ if isinstance(result, dict):
134
+ if result.get("success"):
135
+ tool_result = ContentResult(output=result.get("message", ""))
136
+ else:
137
+ tool_result = ContentResult(error=result.get("error", "Unknown error"))
138
+ elif isinstance(result, ContentResult):
139
+ tool_result = result
140
+ else:
141
+ tool_result = ContentResult(output=str(result))
142
+
143
+ # Convert result to content blocks
144
+ return tool_result.to_content_blocks()
145
+
146
+ except McpError:
147
+ raise
148
+ except Exception as e:
149
+ logger.error("PlaywrightTool error: %s", e)
150
+ raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Playwright error: {e}")) from e
151
+
152
+ async def _ensure_browser(self) -> None:
153
+ """Ensure browser is launched and ready."""
154
+ if self._browser is None or not self._browser.is_connected():
155
+ if self._cdp_url:
156
+ logger.info("Connecting to remote browser via CDP: %s", self._cdp_url)
157
+ else:
158
+ logger.info("Launching Playwright browser...")
159
+
160
+ # Ensure DISPLAY is set (only needed for local browser)
161
+ if not self._cdp_url:
162
+ os.environ["DISPLAY"] = os.environ.get("DISPLAY", ":1")
163
+
164
+ if self._playwright is None:
165
+ try:
166
+ from playwright.async_api import async_playwright
167
+
168
+ self._playwright = await async_playwright().start()
169
+ except ImportError:
170
+ raise ImportError(
171
+ "Playwright is not installed. Please install with: pip install playwright"
172
+ ) from None
173
+
174
+ # Connect via CDP URL or launch local browser
175
+ if self._cdp_url:
176
+ # Connect to remote browser via CDP
177
+ self._browser = await self._playwright.chromium.connect_over_cdp(self._cdp_url)
178
+
179
+ if self._browser is None:
180
+ raise RuntimeError("Failed to connect to remote browser")
181
+
182
+ # Use existing context or create new one
183
+ contexts = self._browser.contexts
184
+ if contexts:
185
+ self._browser_context = contexts[0]
186
+ else:
187
+ self._browser_context = await self._browser.new_context(
188
+ viewport={"width": 1920, "height": 1080},
189
+ ignore_https_errors=True,
190
+ )
191
+ else:
192
+ # Launch local browser
193
+ self._browser = await self._playwright.chromium.launch(
194
+ headless=False,
195
+ args=[
196
+ "--no-sandbox",
197
+ "--disable-dev-shm-usage",
198
+ "--disable-gpu",
199
+ "--disable-web-security",
200
+ "--disable-features=IsolateOrigins,site-per-process",
201
+ "--disable-blink-features=AutomationControlled",
202
+ "--window-size=1920,1080",
203
+ "--window-position=0,0",
204
+ "--start-maximized",
205
+ "--disable-background-timer-throttling",
206
+ "--disable-backgrounding-occluded-windows",
207
+ "--disable-renderer-backgrounding",
208
+ "--disable-features=TranslateUI",
209
+ "--disable-ipc-flooding-protection",
210
+ "--disable-default-apps",
211
+ "--no-first-run",
212
+ "--disable-sync",
213
+ "--no-default-browser-check",
214
+ ],
215
+ )
216
+
217
+ if self._browser is None:
218
+ raise RuntimeError("Browser failed to initialize")
219
+
220
+ self._browser_context = await self._browser.new_context(
221
+ viewport={"width": 1920, "height": 1080},
222
+ ignore_https_errors=True,
223
+ )
224
+
225
+ if self._browser_context is None:
226
+ raise RuntimeError("Browser context failed to initialize")
227
+
228
+ self.page = await self._browser_context.new_page()
229
+ logger.info("Playwright browser launched successfully")
230
+
231
+ async def navigate(
232
+ self,
233
+ url: str,
234
+ wait_for_load_state: Literal[
235
+ "commit", "domcontentloaded", "load", "networkidle"
236
+ ] = "networkidle",
237
+ ) -> dict[str, Any]:
238
+ """Navigate to a URL.
239
+
240
+ Args:
241
+ url: URL to navigate to
242
+ wait_for_load_state: Load state to wait for (load, domcontentloaded, networkidle)
243
+
244
+ Returns:
245
+ Dict with navigation result
246
+ """
247
+ await self._ensure_browser()
248
+ if self.page is None:
249
+ raise RuntimeError("Page not initialized after _ensure_browser")
250
+
251
+ logger.info("Navigating to %s", url)
252
+ try:
253
+ await self.page.goto(url, wait_until=wait_for_load_state)
254
+ current_url = self.page.url
255
+ title = await self.page.title()
256
+
257
+ return {
258
+ "success": True,
259
+ "url": current_url,
260
+ "title": title,
261
+ "message": f"Successfully navigated to {url}",
262
+ }
263
+ except Exception as e:
264
+ logger.error("Navigation failed: %s", e)
265
+ return {
266
+ "success": False,
267
+ "error": str(e),
268
+ "message": f"Failed to navigate to {url}: {e}",
269
+ }
270
+
271
+ async def screenshot(self) -> ContentResult:
272
+ """Take a screenshot of the current page.
273
+
274
+ Returns:
275
+ ToolResult with base64_image
276
+ """
277
+ await self._ensure_browser()
278
+ if self.page is None:
279
+ raise RuntimeError("Page not initialized after _ensure_browser")
280
+
281
+ try:
282
+ # Always return base64 encoded screenshot as ToolResult
283
+ screenshot_bytes = await self.page.screenshot(full_page=True)
284
+ import base64
285
+
286
+ screenshot_b64 = base64.b64encode(screenshot_bytes).decode()
287
+ return ContentResult(base64_image=screenshot_b64)
288
+ except Exception as e:
289
+ logger.error("Screenshot failed: %s", e)
290
+ return ContentResult(error=f"Failed to take screenshot: {e}")
291
+
292
+ async def click(
293
+ self,
294
+ selector: str,
295
+ button: Literal["left", "right", "middle"] = "left",
296
+ count: int = 1,
297
+ wait_for_navigation: bool = True,
298
+ ) -> dict[str, Any]:
299
+ """Click an element by selector.
300
+
301
+ Args:
302
+ selector: CSS selector for element to click
303
+
304
+ Returns:
305
+ Dict with click result
306
+ """
307
+ await self._ensure_browser()
308
+ if self.page is None:
309
+ raise RuntimeError("Page not initialized after _ensure_browser")
310
+
311
+ try:
312
+ await self.page.click(selector, button=button, click_count=count)
313
+ return {"success": True, "message": f"Clicked element: {selector}"}
314
+ except Exception as e:
315
+ logger.error("Click failed: %s", e)
316
+ return {
317
+ "success": False,
318
+ "error": str(e),
319
+ "message": f"Failed to click {selector}: {e}",
320
+ }
321
+
322
+ async def type_text(self, selector: str, text: str) -> dict[str, Any]:
323
+ """Type text into an element.
324
+
325
+ Args:
326
+ selector: CSS selector for input element
327
+ text: Text to type
328
+
329
+ Returns:
330
+ Dict with type result
331
+ """
332
+ await self._ensure_browser()
333
+ if self.page is None:
334
+ raise RuntimeError("Page not initialized after _ensure_browser")
335
+
336
+ try:
337
+ await self.page.fill(selector, text)
338
+ return {"success": True, "message": f"Typed '{text}' into {selector}"}
339
+ except Exception as e:
340
+ logger.error("Type failed: %s", e)
341
+ return {
342
+ "success": False,
343
+ "error": str(e),
344
+ "message": f"Failed to type into {selector}: {e}",
345
+ }
346
+
347
+ async def get_page_info(self) -> dict[str, Any]:
348
+ """Get current page information.
349
+
350
+ Returns:
351
+ Dict with page info
352
+ """
353
+ await self._ensure_browser()
354
+ if self.page is None:
355
+ raise RuntimeError("Page not initialized after _ensure_browser")
356
+
357
+ try:
358
+ url = self.page.url
359
+ title = await self.page.title()
360
+ return {
361
+ "success": True,
362
+ "url": url,
363
+ "title": title,
364
+ "message": f"Current page: {title} ({url})",
365
+ }
366
+ except Exception as e:
367
+ logger.error("Get page info failed: %s", e)
368
+ return {"success": False, "error": str(e), "message": f"Failed to get page info: {e}"}
369
+
370
+ async def wait_for_element(self, selector: str) -> dict[str, Any]:
371
+ """Wait for an element to appear.
372
+
373
+ Args:
374
+ selector: CSS selector for element
375
+
376
+ Returns:
377
+ Dict with wait result
378
+ """
379
+ await self._ensure_browser()
380
+ if self.page is None:
381
+ raise RuntimeError("Page not initialized after _ensure_browser")
382
+
383
+ try:
384
+ await self.page.wait_for_selector(selector, timeout=30000)
385
+ return {"success": True, "message": f"Element {selector} appeared"}
386
+ except Exception as e:
387
+ logger.error("Wait for element failed: %s", e)
388
+ return {
389
+ "success": False,
390
+ "error": str(e),
391
+ "message": f"Element {selector} did not appear within 30000ms: {e}",
392
+ }
393
+
394
+ async def close(self) -> None:
395
+ """Close browser and cleanup."""
396
+ if self._browser:
397
+ try:
398
+ await self._browser.close()
399
+ logger.info("Browser closed")
400
+ except Exception as e:
401
+ logger.error("Error closing browser: %s", e)
402
+
403
+ if self._playwright:
404
+ try:
405
+ await self._playwright.stop()
406
+ except Exception as e:
407
+ logger.error("Error stopping playwright: %s", e)
408
+
409
+ self._browser = None
410
+ self._browser_context = None
411
+ self.env = None # Clear the page
412
+ self._playwright = None