hud-python 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hud-python might be problematic. Click here for more details.
- hud/__init__.py +22 -22
- hud/agents/__init__.py +13 -15
- hud/agents/base.py +599 -599
- hud/agents/claude.py +373 -373
- hud/agents/langchain.py +261 -250
- hud/agents/misc/__init__.py +7 -7
- hud/agents/misc/response_agent.py +82 -80
- hud/agents/openai.py +352 -352
- hud/agents/openai_chat_generic.py +154 -154
- hud/agents/tests/__init__.py +1 -1
- hud/agents/tests/test_base.py +742 -742
- hud/agents/tests/test_claude.py +324 -324
- hud/agents/tests/test_client.py +363 -363
- hud/agents/tests/test_openai.py +237 -237
- hud/cli/__init__.py +617 -617
- hud/cli/__main__.py +8 -8
- hud/cli/analyze.py +371 -371
- hud/cli/analyze_metadata.py +230 -230
- hud/cli/build.py +498 -427
- hud/cli/clone.py +185 -185
- hud/cli/cursor.py +92 -92
- hud/cli/debug.py +392 -392
- hud/cli/docker_utils.py +83 -83
- hud/cli/init.py +280 -281
- hud/cli/interactive.py +353 -353
- hud/cli/mcp_server.py +764 -756
- hud/cli/pull.py +330 -336
- hud/cli/push.py +404 -370
- hud/cli/remote_runner.py +311 -311
- hud/cli/runner.py +160 -160
- hud/cli/tests/__init__.py +3 -3
- hud/cli/tests/test_analyze.py +284 -284
- hud/cli/tests/test_cli_init.py +265 -265
- hud/cli/tests/test_cli_main.py +27 -27
- hud/cli/tests/test_clone.py +142 -142
- hud/cli/tests/test_cursor.py +253 -253
- hud/cli/tests/test_debug.py +453 -453
- hud/cli/tests/test_mcp_server.py +139 -139
- hud/cli/tests/test_utils.py +388 -388
- hud/cli/utils.py +263 -263
- hud/clients/README.md +143 -143
- hud/clients/__init__.py +16 -16
- hud/clients/base.py +378 -379
- hud/clients/fastmcp.py +222 -222
- hud/clients/mcp_use.py +298 -278
- hud/clients/tests/__init__.py +1 -1
- hud/clients/tests/test_client_integration.py +111 -111
- hud/clients/tests/test_fastmcp.py +342 -342
- hud/clients/tests/test_protocol.py +188 -188
- hud/clients/utils/__init__.py +1 -1
- hud/clients/utils/retry_transport.py +160 -160
- hud/datasets.py +327 -322
- hud/misc/__init__.py +1 -1
- hud/misc/claude_plays_pokemon.py +292 -292
- hud/otel/__init__.py +35 -35
- hud/otel/collector.py +142 -142
- hud/otel/config.py +164 -164
- hud/otel/context.py +536 -536
- hud/otel/exporters.py +366 -366
- hud/otel/instrumentation.py +97 -97
- hud/otel/processors.py +118 -118
- hud/otel/tests/__init__.py +1 -1
- hud/otel/tests/test_processors.py +197 -197
- hud/server/__init__.py +5 -5
- hud/server/context.py +114 -114
- hud/server/helper/__init__.py +5 -5
- hud/server/low_level.py +132 -132
- hud/server/server.py +170 -166
- hud/server/tests/__init__.py +3 -3
- hud/settings.py +73 -73
- hud/shared/__init__.py +5 -5
- hud/shared/exceptions.py +180 -180
- hud/shared/requests.py +264 -264
- hud/shared/tests/test_exceptions.py +157 -157
- hud/shared/tests/test_requests.py +275 -275
- hud/telemetry/__init__.py +25 -25
- hud/telemetry/instrument.py +379 -379
- hud/telemetry/job.py +309 -309
- hud/telemetry/replay.py +74 -74
- hud/telemetry/trace.py +83 -83
- hud/tools/__init__.py +33 -33
- hud/tools/base.py +365 -365
- hud/tools/bash.py +161 -161
- hud/tools/computer/__init__.py +15 -15
- hud/tools/computer/anthropic.py +437 -437
- hud/tools/computer/hud.py +376 -376
- hud/tools/computer/openai.py +295 -295
- hud/tools/computer/settings.py +82 -82
- hud/tools/edit.py +314 -314
- hud/tools/executors/__init__.py +30 -30
- hud/tools/executors/base.py +539 -539
- hud/tools/executors/pyautogui.py +621 -621
- hud/tools/executors/tests/__init__.py +1 -1
- hud/tools/executors/tests/test_base_executor.py +338 -338
- hud/tools/executors/tests/test_pyautogui_executor.py +165 -165
- hud/tools/executors/xdo.py +511 -511
- hud/tools/playwright.py +412 -412
- hud/tools/tests/__init__.py +3 -3
- hud/tools/tests/test_base.py +282 -282
- hud/tools/tests/test_bash.py +158 -158
- hud/tools/tests/test_bash_extended.py +197 -197
- hud/tools/tests/test_computer.py +425 -425
- hud/tools/tests/test_computer_actions.py +34 -34
- hud/tools/tests/test_edit.py +259 -259
- hud/tools/tests/test_init.py +27 -27
- hud/tools/tests/test_playwright_tool.py +183 -183
- hud/tools/tests/test_tools.py +145 -145
- hud/tools/tests/test_utils.py +156 -156
- hud/tools/types.py +72 -72
- hud/tools/utils.py +50 -50
- hud/types.py +136 -136
- hud/utils/__init__.py +10 -10
- hud/utils/async_utils.py +65 -65
- hud/utils/design.py +236 -168
- hud/utils/mcp.py +55 -55
- hud/utils/progress.py +149 -149
- hud/utils/telemetry.py +66 -66
- hud/utils/tests/test_async_utils.py +173 -173
- hud/utils/tests/test_init.py +17 -17
- hud/utils/tests/test_progress.py +261 -261
- hud/utils/tests/test_telemetry.py +82 -82
- hud/utils/tests/test_version.py +8 -8
- hud/version.py +7 -7
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/METADATA +10 -8
- hud_python-0.4.3.dist-info/RECORD +131 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/licenses/LICENSE +21 -21
- hud/agents/art.py +0 -101
- hud_python-0.4.1.dist-info/RECORD +0 -132
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/WHEEL +0 -0
- {hud_python-0.4.1.dist-info → hud_python-0.4.3.dist-info}/entry_points.txt +0 -0
hud/tools/playwright.py
CHANGED
|
@@ -1,412 +1,412 @@
|
|
|
1
|
-
"""Playwright web automation tool for HUD."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import logging
|
|
6
|
-
import os
|
|
7
|
-
from typing import TYPE_CHECKING, Any, Literal
|
|
8
|
-
|
|
9
|
-
from mcp import ErrorData, McpError
|
|
10
|
-
from mcp.types import INVALID_PARAMS, ContentBlock
|
|
11
|
-
from pydantic import Field
|
|
12
|
-
|
|
13
|
-
from .base import BaseTool
|
|
14
|
-
from .types import ContentResult
|
|
15
|
-
|
|
16
|
-
if TYPE_CHECKING:
|
|
17
|
-
from playwright.async_api import Browser, BrowserContext, Page
|
|
18
|
-
|
|
19
|
-
logger = logging.getLogger(__name__)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class PlaywrightTool(BaseTool):
|
|
23
|
-
"""Playwright tool for web automation."""
|
|
24
|
-
|
|
25
|
-
def __init__(self, page: Page | None = None, cdp_url: str | None = None) -> None:
|
|
26
|
-
"""Initialize PlaywrightTool.
|
|
27
|
-
|
|
28
|
-
Args:
|
|
29
|
-
page: Optional existing Playwright Page to use as context
|
|
30
|
-
cdp_url: Optional Chrome DevTools Protocol URL for connecting to existing browser
|
|
31
|
-
"""
|
|
32
|
-
super().__init__(
|
|
33
|
-
env=page,
|
|
34
|
-
name="playwright",
|
|
35
|
-
title="Playwright Browser",
|
|
36
|
-
description="Web automation tool using Playwright",
|
|
37
|
-
)
|
|
38
|
-
self._cdp_url = cdp_url
|
|
39
|
-
self._playwright = None
|
|
40
|
-
# Internal browser management - not exposed as context
|
|
41
|
-
self._browser: Browser | None = None
|
|
42
|
-
self._browser_context: BrowserContext | None = None
|
|
43
|
-
|
|
44
|
-
@property
|
|
45
|
-
def page(self) -> Page | None:
|
|
46
|
-
"""Get the current page."""
|
|
47
|
-
return self.env
|
|
48
|
-
|
|
49
|
-
@page.setter
|
|
50
|
-
def page(self, value: Page | None) -> None:
|
|
51
|
-
"""Set the page."""
|
|
52
|
-
self.env = value
|
|
53
|
-
|
|
54
|
-
async def __call__(
|
|
55
|
-
self,
|
|
56
|
-
action: str = Field(
|
|
57
|
-
...,
|
|
58
|
-
description="The action to perform (navigate, screenshot, click, type, get_page_info, wait_for_element)", # noqa: E501
|
|
59
|
-
),
|
|
60
|
-
url: str | None = Field(None, description="URL to navigate to (for navigate action)"),
|
|
61
|
-
selector: str | None = Field(
|
|
62
|
-
None, description="CSS selector for element (for click, type, wait_for_element actions)"
|
|
63
|
-
),
|
|
64
|
-
text: str | None = Field(None, description="Text to type (for type action)"),
|
|
65
|
-
wait_for_load_state: Literal["commit", "domcontentloaded", "load", "networkidle"]
|
|
66
|
-
| None = Field(
|
|
67
|
-
None,
|
|
68
|
-
description="State to wait for: commit, domcontentloaded, load, networkidle (default: networkidle)", # noqa: E501
|
|
69
|
-
),
|
|
70
|
-
) -> list[ContentBlock]:
|
|
71
|
-
"""
|
|
72
|
-
Execute a Playwright web automation action.
|
|
73
|
-
|
|
74
|
-
Returns:
|
|
75
|
-
List of MCP content blocks
|
|
76
|
-
"""
|
|
77
|
-
logger.info("PlaywrightTool executing action: %s", action)
|
|
78
|
-
|
|
79
|
-
try:
|
|
80
|
-
if action == "navigate":
|
|
81
|
-
if url is None:
|
|
82
|
-
raise McpError(
|
|
83
|
-
ErrorData(
|
|
84
|
-
code=INVALID_PARAMS, message="url parameter is required for navigate"
|
|
85
|
-
)
|
|
86
|
-
)
|
|
87
|
-
result = await self.navigate(url, wait_for_load_state or "networkidle")
|
|
88
|
-
|
|
89
|
-
elif action == "screenshot":
|
|
90
|
-
result = await self.screenshot()
|
|
91
|
-
|
|
92
|
-
elif action == "click":
|
|
93
|
-
if selector is None:
|
|
94
|
-
raise McpError(
|
|
95
|
-
ErrorData(
|
|
96
|
-
code=INVALID_PARAMS, message="selector parameter is required for click"
|
|
97
|
-
)
|
|
98
|
-
)
|
|
99
|
-
result = await self.click(selector)
|
|
100
|
-
|
|
101
|
-
elif action == "type":
|
|
102
|
-
if selector is None:
|
|
103
|
-
raise McpError(
|
|
104
|
-
ErrorData(
|
|
105
|
-
code=INVALID_PARAMS, message="selector parameter is required for type"
|
|
106
|
-
)
|
|
107
|
-
)
|
|
108
|
-
if text is None:
|
|
109
|
-
raise McpError(
|
|
110
|
-
ErrorData(
|
|
111
|
-
code=INVALID_PARAMS, message="text parameter is required for type"
|
|
112
|
-
)
|
|
113
|
-
)
|
|
114
|
-
result = await self.type_text(selector, text)
|
|
115
|
-
|
|
116
|
-
elif action == "get_page_info":
|
|
117
|
-
result = await self.get_page_info()
|
|
118
|
-
|
|
119
|
-
elif action == "wait_for_element":
|
|
120
|
-
if selector is None:
|
|
121
|
-
raise McpError(
|
|
122
|
-
ErrorData(
|
|
123
|
-
code=INVALID_PARAMS,
|
|
124
|
-
message="selector parameter is required for wait_for_element",
|
|
125
|
-
)
|
|
126
|
-
)
|
|
127
|
-
result = await self.wait_for_element(selector)
|
|
128
|
-
|
|
129
|
-
else:
|
|
130
|
-
raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown action: {action}"))
|
|
131
|
-
|
|
132
|
-
# Convert dict result to ToolResult
|
|
133
|
-
if isinstance(result, dict):
|
|
134
|
-
if result.get("success"):
|
|
135
|
-
tool_result = ContentResult(output=result.get("message", ""))
|
|
136
|
-
else:
|
|
137
|
-
tool_result = ContentResult(error=result.get("error", "Unknown error"))
|
|
138
|
-
elif isinstance(result, ContentResult):
|
|
139
|
-
tool_result = result
|
|
140
|
-
else:
|
|
141
|
-
tool_result = ContentResult(output=str(result))
|
|
142
|
-
|
|
143
|
-
# Convert result to content blocks
|
|
144
|
-
return tool_result.to_content_blocks()
|
|
145
|
-
|
|
146
|
-
except McpError:
|
|
147
|
-
raise
|
|
148
|
-
except Exception as e:
|
|
149
|
-
logger.error("PlaywrightTool error: %s", e)
|
|
150
|
-
raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Playwright error: {e}")) from e
|
|
151
|
-
|
|
152
|
-
async def _ensure_browser(self) -> None:
|
|
153
|
-
"""Ensure browser is launched and ready."""
|
|
154
|
-
if self._browser is None or not self._browser.is_connected():
|
|
155
|
-
if self._cdp_url:
|
|
156
|
-
logger.info("Connecting to remote browser via CDP: %s", self._cdp_url)
|
|
157
|
-
else:
|
|
158
|
-
logger.info("Launching Playwright browser...")
|
|
159
|
-
|
|
160
|
-
# Ensure DISPLAY is set (only needed for local browser)
|
|
161
|
-
if not self._cdp_url:
|
|
162
|
-
os.environ["DISPLAY"] = os.environ.get("DISPLAY", ":1")
|
|
163
|
-
|
|
164
|
-
if self._playwright is None:
|
|
165
|
-
try:
|
|
166
|
-
from playwright.async_api import async_playwright
|
|
167
|
-
|
|
168
|
-
self._playwright = await async_playwright().start()
|
|
169
|
-
except ImportError:
|
|
170
|
-
raise ImportError(
|
|
171
|
-
"Playwright is not installed. Please install with: pip install playwright"
|
|
172
|
-
) from None
|
|
173
|
-
|
|
174
|
-
# Connect via CDP URL or launch local browser
|
|
175
|
-
if self._cdp_url:
|
|
176
|
-
# Connect to remote browser via CDP
|
|
177
|
-
self._browser = await self._playwright.chromium.connect_over_cdp(self._cdp_url)
|
|
178
|
-
|
|
179
|
-
if self._browser is None:
|
|
180
|
-
raise RuntimeError("Failed to connect to remote browser")
|
|
181
|
-
|
|
182
|
-
# Use existing context or create new one
|
|
183
|
-
contexts = self._browser.contexts
|
|
184
|
-
if contexts:
|
|
185
|
-
self._browser_context = contexts[0]
|
|
186
|
-
else:
|
|
187
|
-
self._browser_context = await self._browser.new_context(
|
|
188
|
-
viewport={"width": 1920, "height": 1080},
|
|
189
|
-
ignore_https_errors=True,
|
|
190
|
-
)
|
|
191
|
-
else:
|
|
192
|
-
# Launch local browser
|
|
193
|
-
self._browser = await self._playwright.chromium.launch(
|
|
194
|
-
headless=False,
|
|
195
|
-
args=[
|
|
196
|
-
"--no-sandbox",
|
|
197
|
-
"--disable-dev-shm-usage",
|
|
198
|
-
"--disable-gpu",
|
|
199
|
-
"--disable-web-security",
|
|
200
|
-
"--disable-features=IsolateOrigins,site-per-process",
|
|
201
|
-
"--disable-blink-features=AutomationControlled",
|
|
202
|
-
"--window-size=1920,1080",
|
|
203
|
-
"--window-position=0,0",
|
|
204
|
-
"--start-maximized",
|
|
205
|
-
"--disable-background-timer-throttling",
|
|
206
|
-
"--disable-backgrounding-occluded-windows",
|
|
207
|
-
"--disable-renderer-backgrounding",
|
|
208
|
-
"--disable-features=TranslateUI",
|
|
209
|
-
"--disable-ipc-flooding-protection",
|
|
210
|
-
"--disable-default-apps",
|
|
211
|
-
"--no-first-run",
|
|
212
|
-
"--disable-sync",
|
|
213
|
-
"--no-default-browser-check",
|
|
214
|
-
],
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
if self._browser is None:
|
|
218
|
-
raise RuntimeError("Browser failed to initialize")
|
|
219
|
-
|
|
220
|
-
self._browser_context = await self._browser.new_context(
|
|
221
|
-
viewport={"width": 1920, "height": 1080},
|
|
222
|
-
ignore_https_errors=True,
|
|
223
|
-
)
|
|
224
|
-
|
|
225
|
-
if self._browser_context is None:
|
|
226
|
-
raise RuntimeError("Browser context failed to initialize")
|
|
227
|
-
|
|
228
|
-
self.page = await self._browser_context.new_page()
|
|
229
|
-
logger.info("Playwright browser launched successfully")
|
|
230
|
-
|
|
231
|
-
async def navigate(
|
|
232
|
-
self,
|
|
233
|
-
url: str,
|
|
234
|
-
wait_for_load_state: Literal[
|
|
235
|
-
"commit", "domcontentloaded", "load", "networkidle"
|
|
236
|
-
] = "networkidle",
|
|
237
|
-
) -> dict[str, Any]:
|
|
238
|
-
"""Navigate to a URL.
|
|
239
|
-
|
|
240
|
-
Args:
|
|
241
|
-
url: URL to navigate to
|
|
242
|
-
wait_for_load_state: Load state to wait for (load, domcontentloaded, networkidle)
|
|
243
|
-
|
|
244
|
-
Returns:
|
|
245
|
-
Dict with navigation result
|
|
246
|
-
"""
|
|
247
|
-
await self._ensure_browser()
|
|
248
|
-
if self.page is None:
|
|
249
|
-
raise RuntimeError("Page not initialized after _ensure_browser")
|
|
250
|
-
|
|
251
|
-
logger.info("Navigating to %s", url)
|
|
252
|
-
try:
|
|
253
|
-
await self.page.goto(url, wait_until=wait_for_load_state)
|
|
254
|
-
current_url = self.page.url
|
|
255
|
-
title = await self.page.title()
|
|
256
|
-
|
|
257
|
-
return {
|
|
258
|
-
"success": True,
|
|
259
|
-
"url": current_url,
|
|
260
|
-
"title": title,
|
|
261
|
-
"message": f"Successfully navigated to {url}",
|
|
262
|
-
}
|
|
263
|
-
except Exception as e:
|
|
264
|
-
logger.error("Navigation failed: %s", e)
|
|
265
|
-
return {
|
|
266
|
-
"success": False,
|
|
267
|
-
"error": str(e),
|
|
268
|
-
"message": f"Failed to navigate to {url}: {e}",
|
|
269
|
-
}
|
|
270
|
-
|
|
271
|
-
async def screenshot(self) -> ContentResult:
|
|
272
|
-
"""Take a screenshot of the current page.
|
|
273
|
-
|
|
274
|
-
Returns:
|
|
275
|
-
ToolResult with base64_image
|
|
276
|
-
"""
|
|
277
|
-
await self._ensure_browser()
|
|
278
|
-
if self.page is None:
|
|
279
|
-
raise RuntimeError("Page not initialized after _ensure_browser")
|
|
280
|
-
|
|
281
|
-
try:
|
|
282
|
-
# Always return base64 encoded screenshot as ToolResult
|
|
283
|
-
screenshot_bytes = await self.page.screenshot(full_page=True)
|
|
284
|
-
import base64
|
|
285
|
-
|
|
286
|
-
screenshot_b64 = base64.b64encode(screenshot_bytes).decode()
|
|
287
|
-
return ContentResult(base64_image=screenshot_b64)
|
|
288
|
-
except Exception as e:
|
|
289
|
-
logger.error("Screenshot failed: %s", e)
|
|
290
|
-
return ContentResult(error=f"Failed to take screenshot: {e}")
|
|
291
|
-
|
|
292
|
-
async def click(
|
|
293
|
-
self,
|
|
294
|
-
selector: str,
|
|
295
|
-
button: Literal["left", "right", "middle"] = "left",
|
|
296
|
-
count: int = 1,
|
|
297
|
-
wait_for_navigation: bool = True,
|
|
298
|
-
) -> dict[str, Any]:
|
|
299
|
-
"""Click an element by selector.
|
|
300
|
-
|
|
301
|
-
Args:
|
|
302
|
-
selector: CSS selector for element to click
|
|
303
|
-
|
|
304
|
-
Returns:
|
|
305
|
-
Dict with click result
|
|
306
|
-
"""
|
|
307
|
-
await self._ensure_browser()
|
|
308
|
-
if self.page is None:
|
|
309
|
-
raise RuntimeError("Page not initialized after _ensure_browser")
|
|
310
|
-
|
|
311
|
-
try:
|
|
312
|
-
await self.page.click(selector, button=button, click_count=count)
|
|
313
|
-
return {"success": True, "message": f"Clicked element: {selector}"}
|
|
314
|
-
except Exception as e:
|
|
315
|
-
logger.error("Click failed: %s", e)
|
|
316
|
-
return {
|
|
317
|
-
"success": False,
|
|
318
|
-
"error": str(e),
|
|
319
|
-
"message": f"Failed to click {selector}: {e}",
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
async def type_text(self, selector: str, text: str) -> dict[str, Any]:
|
|
323
|
-
"""Type text into an element.
|
|
324
|
-
|
|
325
|
-
Args:
|
|
326
|
-
selector: CSS selector for input element
|
|
327
|
-
text: Text to type
|
|
328
|
-
|
|
329
|
-
Returns:
|
|
330
|
-
Dict with type result
|
|
331
|
-
"""
|
|
332
|
-
await self._ensure_browser()
|
|
333
|
-
if self.page is None:
|
|
334
|
-
raise RuntimeError("Page not initialized after _ensure_browser")
|
|
335
|
-
|
|
336
|
-
try:
|
|
337
|
-
await self.page.fill(selector, text)
|
|
338
|
-
return {"success": True, "message": f"Typed '{text}' into {selector}"}
|
|
339
|
-
except Exception as e:
|
|
340
|
-
logger.error("Type failed: %s", e)
|
|
341
|
-
return {
|
|
342
|
-
"success": False,
|
|
343
|
-
"error": str(e),
|
|
344
|
-
"message": f"Failed to type into {selector}: {e}",
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
async def get_page_info(self) -> dict[str, Any]:
|
|
348
|
-
"""Get current page information.
|
|
349
|
-
|
|
350
|
-
Returns:
|
|
351
|
-
Dict with page info
|
|
352
|
-
"""
|
|
353
|
-
await self._ensure_browser()
|
|
354
|
-
if self.page is None:
|
|
355
|
-
raise RuntimeError("Page not initialized after _ensure_browser")
|
|
356
|
-
|
|
357
|
-
try:
|
|
358
|
-
url = self.page.url
|
|
359
|
-
title = await self.page.title()
|
|
360
|
-
return {
|
|
361
|
-
"success": True,
|
|
362
|
-
"url": url,
|
|
363
|
-
"title": title,
|
|
364
|
-
"message": f"Current page: {title} ({url})",
|
|
365
|
-
}
|
|
366
|
-
except Exception as e:
|
|
367
|
-
logger.error("Get page info failed: %s", e)
|
|
368
|
-
return {"success": False, "error": str(e), "message": f"Failed to get page info: {e}"}
|
|
369
|
-
|
|
370
|
-
async def wait_for_element(self, selector: str) -> dict[str, Any]:
|
|
371
|
-
"""Wait for an element to appear.
|
|
372
|
-
|
|
373
|
-
Args:
|
|
374
|
-
selector: CSS selector for element
|
|
375
|
-
|
|
376
|
-
Returns:
|
|
377
|
-
Dict with wait result
|
|
378
|
-
"""
|
|
379
|
-
await self._ensure_browser()
|
|
380
|
-
if self.page is None:
|
|
381
|
-
raise RuntimeError("Page not initialized after _ensure_browser")
|
|
382
|
-
|
|
383
|
-
try:
|
|
384
|
-
await self.page.wait_for_selector(selector, timeout=30000)
|
|
385
|
-
return {"success": True, "message": f"Element {selector} appeared"}
|
|
386
|
-
except Exception as e:
|
|
387
|
-
logger.error("Wait for element failed: %s", e)
|
|
388
|
-
return {
|
|
389
|
-
"success": False,
|
|
390
|
-
"error": str(e),
|
|
391
|
-
"message": f"Element {selector} did not appear within 30000ms: {e}",
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
async def close(self) -> None:
|
|
395
|
-
"""Close browser and cleanup."""
|
|
396
|
-
if self._browser:
|
|
397
|
-
try:
|
|
398
|
-
await self._browser.close()
|
|
399
|
-
logger.info("Browser closed")
|
|
400
|
-
except Exception as e:
|
|
401
|
-
logger.error("Error closing browser: %s", e)
|
|
402
|
-
|
|
403
|
-
if self._playwright:
|
|
404
|
-
try:
|
|
405
|
-
await self._playwright.stop()
|
|
406
|
-
except Exception as e:
|
|
407
|
-
logger.error("Error stopping playwright: %s", e)
|
|
408
|
-
|
|
409
|
-
self._browser = None
|
|
410
|
-
self._browser_context = None
|
|
411
|
-
self.env = None # Clear the page
|
|
412
|
-
self._playwright = None
|
|
1
|
+
"""Playwright web automation tool for HUD."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
8
|
+
|
|
9
|
+
from mcp import ErrorData, McpError
|
|
10
|
+
from mcp.types import INVALID_PARAMS, ContentBlock
|
|
11
|
+
from pydantic import Field
|
|
12
|
+
|
|
13
|
+
from .base import BaseTool
|
|
14
|
+
from .types import ContentResult
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from playwright.async_api import Browser, BrowserContext, Page
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class PlaywrightTool(BaseTool):
|
|
23
|
+
"""Playwright tool for web automation."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, page: Page | None = None, cdp_url: str | None = None) -> None:
|
|
26
|
+
"""Initialize PlaywrightTool.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
page: Optional existing Playwright Page to use as context
|
|
30
|
+
cdp_url: Optional Chrome DevTools Protocol URL for connecting to existing browser
|
|
31
|
+
"""
|
|
32
|
+
super().__init__(
|
|
33
|
+
env=page,
|
|
34
|
+
name="playwright",
|
|
35
|
+
title="Playwright Browser",
|
|
36
|
+
description="Web automation tool using Playwright",
|
|
37
|
+
)
|
|
38
|
+
self._cdp_url = cdp_url
|
|
39
|
+
self._playwright = None
|
|
40
|
+
# Internal browser management - not exposed as context
|
|
41
|
+
self._browser: Browser | None = None
|
|
42
|
+
self._browser_context: BrowserContext | None = None
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def page(self) -> Page | None:
|
|
46
|
+
"""Get the current page."""
|
|
47
|
+
return self.env
|
|
48
|
+
|
|
49
|
+
@page.setter
|
|
50
|
+
def page(self, value: Page | None) -> None:
|
|
51
|
+
"""Set the page."""
|
|
52
|
+
self.env = value
|
|
53
|
+
|
|
54
|
+
async def __call__(
|
|
55
|
+
self,
|
|
56
|
+
action: str = Field(
|
|
57
|
+
...,
|
|
58
|
+
description="The action to perform (navigate, screenshot, click, type, get_page_info, wait_for_element)", # noqa: E501
|
|
59
|
+
),
|
|
60
|
+
url: str | None = Field(None, description="URL to navigate to (for navigate action)"),
|
|
61
|
+
selector: str | None = Field(
|
|
62
|
+
None, description="CSS selector for element (for click, type, wait_for_element actions)"
|
|
63
|
+
),
|
|
64
|
+
text: str | None = Field(None, description="Text to type (for type action)"),
|
|
65
|
+
wait_for_load_state: Literal["commit", "domcontentloaded", "load", "networkidle"]
|
|
66
|
+
| None = Field(
|
|
67
|
+
None,
|
|
68
|
+
description="State to wait for: commit, domcontentloaded, load, networkidle (default: networkidle)", # noqa: E501
|
|
69
|
+
),
|
|
70
|
+
) -> list[ContentBlock]:
|
|
71
|
+
"""
|
|
72
|
+
Execute a Playwright web automation action.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List of MCP content blocks
|
|
76
|
+
"""
|
|
77
|
+
logger.info("PlaywrightTool executing action: %s", action)
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
if action == "navigate":
|
|
81
|
+
if url is None:
|
|
82
|
+
raise McpError(
|
|
83
|
+
ErrorData(
|
|
84
|
+
code=INVALID_PARAMS, message="url parameter is required for navigate"
|
|
85
|
+
)
|
|
86
|
+
)
|
|
87
|
+
result = await self.navigate(url, wait_for_load_state or "networkidle")
|
|
88
|
+
|
|
89
|
+
elif action == "screenshot":
|
|
90
|
+
result = await self.screenshot()
|
|
91
|
+
|
|
92
|
+
elif action == "click":
|
|
93
|
+
if selector is None:
|
|
94
|
+
raise McpError(
|
|
95
|
+
ErrorData(
|
|
96
|
+
code=INVALID_PARAMS, message="selector parameter is required for click"
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
result = await self.click(selector)
|
|
100
|
+
|
|
101
|
+
elif action == "type":
|
|
102
|
+
if selector is None:
|
|
103
|
+
raise McpError(
|
|
104
|
+
ErrorData(
|
|
105
|
+
code=INVALID_PARAMS, message="selector parameter is required for type"
|
|
106
|
+
)
|
|
107
|
+
)
|
|
108
|
+
if text is None:
|
|
109
|
+
raise McpError(
|
|
110
|
+
ErrorData(
|
|
111
|
+
code=INVALID_PARAMS, message="text parameter is required for type"
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
result = await self.type_text(selector, text)
|
|
115
|
+
|
|
116
|
+
elif action == "get_page_info":
|
|
117
|
+
result = await self.get_page_info()
|
|
118
|
+
|
|
119
|
+
elif action == "wait_for_element":
|
|
120
|
+
if selector is None:
|
|
121
|
+
raise McpError(
|
|
122
|
+
ErrorData(
|
|
123
|
+
code=INVALID_PARAMS,
|
|
124
|
+
message="selector parameter is required for wait_for_element",
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
result = await self.wait_for_element(selector)
|
|
128
|
+
|
|
129
|
+
else:
|
|
130
|
+
raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Unknown action: {action}"))
|
|
131
|
+
|
|
132
|
+
# Convert dict result to ToolResult
|
|
133
|
+
if isinstance(result, dict):
|
|
134
|
+
if result.get("success"):
|
|
135
|
+
tool_result = ContentResult(output=result.get("message", ""))
|
|
136
|
+
else:
|
|
137
|
+
tool_result = ContentResult(error=result.get("error", "Unknown error"))
|
|
138
|
+
elif isinstance(result, ContentResult):
|
|
139
|
+
tool_result = result
|
|
140
|
+
else:
|
|
141
|
+
tool_result = ContentResult(output=str(result))
|
|
142
|
+
|
|
143
|
+
# Convert result to content blocks
|
|
144
|
+
return tool_result.to_content_blocks()
|
|
145
|
+
|
|
146
|
+
except McpError:
|
|
147
|
+
raise
|
|
148
|
+
except Exception as e:
|
|
149
|
+
logger.error("PlaywrightTool error: %s", e)
|
|
150
|
+
raise McpError(ErrorData(code=INVALID_PARAMS, message=f"Playwright error: {e}")) from e
|
|
151
|
+
|
|
152
|
+
async def _ensure_browser(self) -> None:
|
|
153
|
+
"""Ensure browser is launched and ready."""
|
|
154
|
+
if self._browser is None or not self._browser.is_connected():
|
|
155
|
+
if self._cdp_url:
|
|
156
|
+
logger.info("Connecting to remote browser via CDP: %s", self._cdp_url)
|
|
157
|
+
else:
|
|
158
|
+
logger.info("Launching Playwright browser...")
|
|
159
|
+
|
|
160
|
+
# Ensure DISPLAY is set (only needed for local browser)
|
|
161
|
+
if not self._cdp_url:
|
|
162
|
+
os.environ["DISPLAY"] = os.environ.get("DISPLAY", ":1")
|
|
163
|
+
|
|
164
|
+
if self._playwright is None:
|
|
165
|
+
try:
|
|
166
|
+
from playwright.async_api import async_playwright
|
|
167
|
+
|
|
168
|
+
self._playwright = await async_playwright().start()
|
|
169
|
+
except ImportError:
|
|
170
|
+
raise ImportError(
|
|
171
|
+
"Playwright is not installed. Please install with: pip install playwright"
|
|
172
|
+
) from None
|
|
173
|
+
|
|
174
|
+
# Connect via CDP URL or launch local browser
|
|
175
|
+
if self._cdp_url:
|
|
176
|
+
# Connect to remote browser via CDP
|
|
177
|
+
self._browser = await self._playwright.chromium.connect_over_cdp(self._cdp_url)
|
|
178
|
+
|
|
179
|
+
if self._browser is None:
|
|
180
|
+
raise RuntimeError("Failed to connect to remote browser")
|
|
181
|
+
|
|
182
|
+
# Use existing context or create new one
|
|
183
|
+
contexts = self._browser.contexts
|
|
184
|
+
if contexts:
|
|
185
|
+
self._browser_context = contexts[0]
|
|
186
|
+
else:
|
|
187
|
+
self._browser_context = await self._browser.new_context(
|
|
188
|
+
viewport={"width": 1920, "height": 1080},
|
|
189
|
+
ignore_https_errors=True,
|
|
190
|
+
)
|
|
191
|
+
else:
|
|
192
|
+
# Launch local browser
|
|
193
|
+
self._browser = await self._playwright.chromium.launch(
|
|
194
|
+
headless=False,
|
|
195
|
+
args=[
|
|
196
|
+
"--no-sandbox",
|
|
197
|
+
"--disable-dev-shm-usage",
|
|
198
|
+
"--disable-gpu",
|
|
199
|
+
"--disable-web-security",
|
|
200
|
+
"--disable-features=IsolateOrigins,site-per-process",
|
|
201
|
+
"--disable-blink-features=AutomationControlled",
|
|
202
|
+
"--window-size=1920,1080",
|
|
203
|
+
"--window-position=0,0",
|
|
204
|
+
"--start-maximized",
|
|
205
|
+
"--disable-background-timer-throttling",
|
|
206
|
+
"--disable-backgrounding-occluded-windows",
|
|
207
|
+
"--disable-renderer-backgrounding",
|
|
208
|
+
"--disable-features=TranslateUI",
|
|
209
|
+
"--disable-ipc-flooding-protection",
|
|
210
|
+
"--disable-default-apps",
|
|
211
|
+
"--no-first-run",
|
|
212
|
+
"--disable-sync",
|
|
213
|
+
"--no-default-browser-check",
|
|
214
|
+
],
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
if self._browser is None:
|
|
218
|
+
raise RuntimeError("Browser failed to initialize")
|
|
219
|
+
|
|
220
|
+
self._browser_context = await self._browser.new_context(
|
|
221
|
+
viewport={"width": 1920, "height": 1080},
|
|
222
|
+
ignore_https_errors=True,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
if self._browser_context is None:
|
|
226
|
+
raise RuntimeError("Browser context failed to initialize")
|
|
227
|
+
|
|
228
|
+
self.page = await self._browser_context.new_page()
|
|
229
|
+
logger.info("Playwright browser launched successfully")
|
|
230
|
+
|
|
231
|
+
async def navigate(
|
|
232
|
+
self,
|
|
233
|
+
url: str,
|
|
234
|
+
wait_for_load_state: Literal[
|
|
235
|
+
"commit", "domcontentloaded", "load", "networkidle"
|
|
236
|
+
] = "networkidle",
|
|
237
|
+
) -> dict[str, Any]:
|
|
238
|
+
"""Navigate to a URL.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
url: URL to navigate to
|
|
242
|
+
wait_for_load_state: Load state to wait for (load, domcontentloaded, networkidle)
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
Dict with navigation result
|
|
246
|
+
"""
|
|
247
|
+
await self._ensure_browser()
|
|
248
|
+
if self.page is None:
|
|
249
|
+
raise RuntimeError("Page not initialized after _ensure_browser")
|
|
250
|
+
|
|
251
|
+
logger.info("Navigating to %s", url)
|
|
252
|
+
try:
|
|
253
|
+
await self.page.goto(url, wait_until=wait_for_load_state)
|
|
254
|
+
current_url = self.page.url
|
|
255
|
+
title = await self.page.title()
|
|
256
|
+
|
|
257
|
+
return {
|
|
258
|
+
"success": True,
|
|
259
|
+
"url": current_url,
|
|
260
|
+
"title": title,
|
|
261
|
+
"message": f"Successfully navigated to {url}",
|
|
262
|
+
}
|
|
263
|
+
except Exception as e:
|
|
264
|
+
logger.error("Navigation failed: %s", e)
|
|
265
|
+
return {
|
|
266
|
+
"success": False,
|
|
267
|
+
"error": str(e),
|
|
268
|
+
"message": f"Failed to navigate to {url}: {e}",
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
async def screenshot(self) -> ContentResult:
|
|
272
|
+
"""Take a screenshot of the current page.
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
ToolResult with base64_image
|
|
276
|
+
"""
|
|
277
|
+
await self._ensure_browser()
|
|
278
|
+
if self.page is None:
|
|
279
|
+
raise RuntimeError("Page not initialized after _ensure_browser")
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
# Always return base64 encoded screenshot as ToolResult
|
|
283
|
+
screenshot_bytes = await self.page.screenshot(full_page=True)
|
|
284
|
+
import base64
|
|
285
|
+
|
|
286
|
+
screenshot_b64 = base64.b64encode(screenshot_bytes).decode()
|
|
287
|
+
return ContentResult(base64_image=screenshot_b64)
|
|
288
|
+
except Exception as e:
|
|
289
|
+
logger.error("Screenshot failed: %s", e)
|
|
290
|
+
return ContentResult(error=f"Failed to take screenshot: {e}")
|
|
291
|
+
|
|
292
|
+
async def click(
|
|
293
|
+
self,
|
|
294
|
+
selector: str,
|
|
295
|
+
button: Literal["left", "right", "middle"] = "left",
|
|
296
|
+
count: int = 1,
|
|
297
|
+
wait_for_navigation: bool = True,
|
|
298
|
+
) -> dict[str, Any]:
|
|
299
|
+
"""Click an element by selector.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
selector: CSS selector for element to click
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
Dict with click result
|
|
306
|
+
"""
|
|
307
|
+
await self._ensure_browser()
|
|
308
|
+
if self.page is None:
|
|
309
|
+
raise RuntimeError("Page not initialized after _ensure_browser")
|
|
310
|
+
|
|
311
|
+
try:
|
|
312
|
+
await self.page.click(selector, button=button, click_count=count)
|
|
313
|
+
return {"success": True, "message": f"Clicked element: {selector}"}
|
|
314
|
+
except Exception as e:
|
|
315
|
+
logger.error("Click failed: %s", e)
|
|
316
|
+
return {
|
|
317
|
+
"success": False,
|
|
318
|
+
"error": str(e),
|
|
319
|
+
"message": f"Failed to click {selector}: {e}",
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
async def type_text(self, selector: str, text: str) -> dict[str, Any]:
|
|
323
|
+
"""Type text into an element.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
selector: CSS selector for input element
|
|
327
|
+
text: Text to type
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
Dict with type result
|
|
331
|
+
"""
|
|
332
|
+
await self._ensure_browser()
|
|
333
|
+
if self.page is None:
|
|
334
|
+
raise RuntimeError("Page not initialized after _ensure_browser")
|
|
335
|
+
|
|
336
|
+
try:
|
|
337
|
+
await self.page.fill(selector, text)
|
|
338
|
+
return {"success": True, "message": f"Typed '{text}' into {selector}"}
|
|
339
|
+
except Exception as e:
|
|
340
|
+
logger.error("Type failed: %s", e)
|
|
341
|
+
return {
|
|
342
|
+
"success": False,
|
|
343
|
+
"error": str(e),
|
|
344
|
+
"message": f"Failed to type into {selector}: {e}",
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
async def get_page_info(self) -> dict[str, Any]:
|
|
348
|
+
"""Get current page information.
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
Dict with page info
|
|
352
|
+
"""
|
|
353
|
+
await self._ensure_browser()
|
|
354
|
+
if self.page is None:
|
|
355
|
+
raise RuntimeError("Page not initialized after _ensure_browser")
|
|
356
|
+
|
|
357
|
+
try:
|
|
358
|
+
url = self.page.url
|
|
359
|
+
title = await self.page.title()
|
|
360
|
+
return {
|
|
361
|
+
"success": True,
|
|
362
|
+
"url": url,
|
|
363
|
+
"title": title,
|
|
364
|
+
"message": f"Current page: {title} ({url})",
|
|
365
|
+
}
|
|
366
|
+
except Exception as e:
|
|
367
|
+
logger.error("Get page info failed: %s", e)
|
|
368
|
+
return {"success": False, "error": str(e), "message": f"Failed to get page info: {e}"}
|
|
369
|
+
|
|
370
|
+
async def wait_for_element(self, selector: str) -> dict[str, Any]:
|
|
371
|
+
"""Wait for an element to appear.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
selector: CSS selector for element
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
Dict with wait result
|
|
378
|
+
"""
|
|
379
|
+
await self._ensure_browser()
|
|
380
|
+
if self.page is None:
|
|
381
|
+
raise RuntimeError("Page not initialized after _ensure_browser")
|
|
382
|
+
|
|
383
|
+
try:
|
|
384
|
+
await self.page.wait_for_selector(selector, timeout=30000)
|
|
385
|
+
return {"success": True, "message": f"Element {selector} appeared"}
|
|
386
|
+
except Exception as e:
|
|
387
|
+
logger.error("Wait for element failed: %s", e)
|
|
388
|
+
return {
|
|
389
|
+
"success": False,
|
|
390
|
+
"error": str(e),
|
|
391
|
+
"message": f"Element {selector} did not appear within 30000ms: {e}",
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
async def close(self) -> None:
|
|
395
|
+
"""Close browser and cleanup."""
|
|
396
|
+
if self._browser:
|
|
397
|
+
try:
|
|
398
|
+
await self._browser.close()
|
|
399
|
+
logger.info("Browser closed")
|
|
400
|
+
except Exception as e:
|
|
401
|
+
logger.error("Error closing browser: %s", e)
|
|
402
|
+
|
|
403
|
+
if self._playwright:
|
|
404
|
+
try:
|
|
405
|
+
await self._playwright.stop()
|
|
406
|
+
except Exception as e:
|
|
407
|
+
logger.error("Error stopping playwright: %s", e)
|
|
408
|
+
|
|
409
|
+
self._browser = None
|
|
410
|
+
self._browser_context = None
|
|
411
|
+
self.env = None # Clear the page
|
|
412
|
+
self._playwright = None
|