browsercontrol 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ """
2
+ BrowserControl - Browser automation MCP server with Set of Marks.
3
+ """
4
+
5
+ from browsercontrol.server import mcp
6
+
7
+ __version__ = "0.1.0"
8
+ __all__ = ["mcp"]
@@ -0,0 +1,19 @@
1
+ """
2
+ Entry point for running BrowserControl as a module.
3
+
4
+ Usage:
5
+ python -m browsercontrol
6
+ # or
7
+ browsercontrol
8
+ """
9
+
10
+ from browsercontrol.server import mcp
11
+
12
+
13
+ def main():
14
+ """Run the MCP server."""
15
+ mcp.run()
16
+
17
+
18
+ if __name__ == "__main__":
19
+ main()
@@ -0,0 +1,417 @@
1
+ """
2
+ Browser lifecycle management with Set of Marks (SoM) annotation.
3
+ Includes console, network, and error capture for developer tools.
4
+ """
5
+
6
+ import logging
7
+ import time
8
+ from io import BytesIO
9
+ from pathlib import Path
10
+
11
+ from playwright.async_api import async_playwright, Browser, BrowserContext, Page
12
+ from PIL import Image as PILImage, ImageDraw, ImageFont
13
+
14
+ from browsercontrol.config import config
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Store element mapping for click-by-ID
19
+ element_map: dict[int, dict] = {}
20
+
21
+
22
+ class BrowserManager:
23
+ """Manages the browser lifecycle and provides access to pages."""
24
+
25
+ def __init__(self):
26
+ self._playwright = None
27
+ self._browser: Browser | None = None
28
+ self._context: BrowserContext | None = None
29
+ self._page: Page | None = None
30
+ self._started = False
31
+
32
+ # Developer tools storage
33
+ self._console_logs: list[dict] = []
34
+ self._network_requests: list[dict] = []
35
+ self._page_errors: list[dict] = []
36
+ self._request_map: dict[str, dict] = {} # Track in-flight requests
37
+
38
+ @property
39
+ def is_started(self) -> bool:
40
+ """Check if browser is started."""
41
+ return self._started and self._context is not None
42
+
43
+ async def _ensure_browser_installed(self) -> None:
44
+ """Ensure Chromium browser is installed, auto-install if missing."""
45
+ import subprocess
46
+ import sys
47
+
48
+ # Check if Chromium is already installed by looking for the executable
49
+ try:
50
+ from playwright._impl._driver import compute_driver_executable
51
+ driver_executable = compute_driver_executable()
52
+
53
+ # Try to get browser path - this will fail if not installed
54
+ result = subprocess.run(
55
+ [driver_executable, "install", "--dry-run", "chromium"],
56
+ capture_output=True,
57
+ text=True,
58
+ timeout=10
59
+ )
60
+
61
+ # If dry-run shows it needs installation, do it
62
+ if "chromium" in result.stdout.lower() or result.returncode != 0:
63
+ logger.info("Chromium not found, installing automatically...")
64
+ self._install_chromium()
65
+ else:
66
+ logger.debug("Chromium already installed")
67
+
68
+ except Exception as e:
69
+ # If check fails, try to install anyway
70
+ logger.info(f"Checking browser installation: {e}")
71
+ self._install_chromium()
72
+
73
+ def _install_chromium(self) -> None:
74
+ """Install Chromium browser using Playwright."""
75
+ import subprocess
76
+ import sys
77
+
78
+ logger.info("Installing Chromium browser (one-time setup)...")
79
+
80
+ try:
81
+ # Use playwright install command
82
+ result = subprocess.run(
83
+ [sys.executable, "-m", "playwright", "install", "chromium"],
84
+ capture_output=True,
85
+ text=True,
86
+ timeout=300 # 5 minutes timeout for download
87
+ )
88
+
89
+ if result.returncode == 0:
90
+ logger.info("Chromium installed successfully!")
91
+ else:
92
+ logger.warning(f"Chromium installation output: {result.stderr}")
93
+ # Don't fail - let Playwright try to launch and give better error
94
+
95
+ except subprocess.TimeoutExpired:
96
+ logger.error("Chromium installation timed out. Please run: playwright install chromium")
97
+ except Exception as e:
98
+ logger.error(f"Failed to install Chromium: {e}")
99
+ logger.info("Please run manually: playwright install chromium")
100
+
101
+ def _setup_page_listeners(self, page: Page) -> None:
102
+ """Set up event listeners for console, network, and errors."""
103
+
104
+ # Console messages
105
+ def on_console(msg):
106
+ self._console_logs.append({
107
+ "level": msg.type,
108
+ "text": msg.text,
109
+ "location": f"{msg.location.get('url', '')}:{msg.location.get('lineNumber', '')}" if msg.location else "",
110
+ "timestamp": time.time()
111
+ })
112
+ # Keep only last 200 logs
113
+ if len(self._console_logs) > 200:
114
+ self._console_logs = self._console_logs[-200:]
115
+
116
+ # Page errors (uncaught exceptions)
117
+ def on_page_error(error):
118
+ self._page_errors.append({
119
+ "message": str(error),
120
+ "stack": getattr(error, 'stack', ''),
121
+ "timestamp": time.time()
122
+ })
123
+ if len(self._page_errors) > 100:
124
+ self._page_errors = self._page_errors[-100:]
125
+
126
+ # Network request started
127
+ def on_request(request):
128
+ self._request_map[request.url] = {
129
+ "method": request.method,
130
+ "url": request.url,
131
+ "start_time": time.time(),
132
+ "status": "pending",
133
+ "resource_type": request.resource_type
134
+ }
135
+
136
+ # Network request completed
137
+ def on_response(response):
138
+ url = response.url
139
+ if url in self._request_map:
140
+ req = self._request_map[url]
141
+ req["status"] = response.status
142
+ req["duration"] = int((time.time() - req["start_time"]) * 1000)
143
+ self._network_requests.append(req)
144
+ del self._request_map[url]
145
+ else:
146
+ self._network_requests.append({
147
+ "method": response.request.method,
148
+ "url": url,
149
+ "status": response.status,
150
+ "resource_type": response.request.resource_type
151
+ })
152
+
153
+ # Keep only last 100 requests
154
+ if len(self._network_requests) > 100:
155
+ self._network_requests = self._network_requests[-100:]
156
+
157
+ # Network request failed
158
+ def on_request_failed(request):
159
+ url = request.url
160
+ if url in self._request_map:
161
+ req = self._request_map[url]
162
+ req["status"] = "failed"
163
+ req["duration"] = int((time.time() - req["start_time"]) * 1000)
164
+ self._network_requests.append(req)
165
+ del self._request_map[url]
166
+
167
+ page.on("console", on_console)
168
+ page.on("pageerror", on_page_error)
169
+ page.on("request", on_request)
170
+ page.on("response", on_response)
171
+ page.on("requestfailed", on_request_failed)
172
+
173
+ async def start(self) -> None:
174
+ """Start the browser with persistent context."""
175
+ if self._started:
176
+ logger.warning("Browser already started")
177
+ return
178
+
179
+ await self._ensure_browser_installed()
180
+
181
+ config.user_data_dir.mkdir(parents=True, exist_ok=True)
182
+ logger.info(f"Starting browser with user data dir: {config.user_data_dir}")
183
+
184
+ self._playwright = await async_playwright().start()
185
+
186
+ # Build launch args
187
+ # Add proxy bypass for localhost to fix connection refused errors
188
+ args = [
189
+ "--no-first-run",
190
+ "--no-default-browser-check",
191
+ "--proxy-bypass-list=<-loopback>",
192
+ "--no-proxy-server"
193
+ ]
194
+ if config.extension_path and config.extension_path.exists():
195
+ args.extend([
196
+ f"--disable-extensions-except={config.extension_path}",
197
+ f"--load-extension={config.extension_path}",
198
+ ])
199
+ logger.info(f"Loading extension from: {config.extension_path}")
200
+
201
+ try:
202
+ self._context = await self._playwright.chromium.launch_persistent_context(
203
+ user_data_dir=str(config.user_data_dir),
204
+ headless=config.headless,
205
+ args=args,
206
+ viewport={"width": config.viewport_width, "height": config.viewport_height},
207
+ )
208
+
209
+ # Get or create initial page
210
+ if self._context.pages:
211
+ self._page = self._context.pages[0]
212
+ else:
213
+ self._page = await self._context.new_page()
214
+
215
+ # Set up event listeners
216
+ self._setup_page_listeners(self._page)
217
+
218
+ self._started = True
219
+ logger.info("Browser started successfully")
220
+
221
+ except Exception as e:
222
+ logger.error(f"Failed to start browser: {e}")
223
+ await self.stop()
224
+ raise
225
+
226
+ async def stop(self) -> None:
227
+ """Stop the browser."""
228
+ logger.info("Stopping browser")
229
+ self._started = False
230
+
231
+ if self._context:
232
+ try:
233
+ await self._context.close()
234
+ except Exception as e:
235
+ logger.warning(f"Error closing context: {e}")
236
+ self._context = None
237
+
238
+ if self._playwright:
239
+ try:
240
+ await self._playwright.stop()
241
+ except Exception as e:
242
+ logger.warning(f"Error stopping playwright: {e}")
243
+ self._playwright = None
244
+
245
+ self._page = None
246
+
247
+ # Clear dev tools data
248
+ self._console_logs.clear()
249
+ self._network_requests.clear()
250
+ self._page_errors.clear()
251
+ self._request_map.clear()
252
+
253
+ async def ensure_started(self) -> None:
254
+ """Ensure browser is started, restart if needed."""
255
+ if not self.is_started:
256
+ logger.info("Browser not started, starting now")
257
+ await self.start()
258
+
259
+ @property
260
+ def page(self) -> Page:
261
+ """Get the current active page."""
262
+ if not self._page:
263
+ raise RuntimeError("Browser not started. Call start() first.")
264
+ return self._page
265
+
266
+ # Developer tools methods
267
+ def get_console_logs(self) -> list[dict]:
268
+ """Get captured console logs."""
269
+ return self._console_logs.copy()
270
+
271
+ def clear_console_logs(self) -> None:
272
+ """Clear captured console logs."""
273
+ self._console_logs.clear()
274
+
275
+ def get_network_requests(self) -> list[dict]:
276
+ """Get captured network requests."""
277
+ return self._network_requests.copy()
278
+
279
+ def clear_network_requests(self) -> None:
280
+ """Clear captured network requests."""
281
+ self._network_requests.clear()
282
+ self._request_map.clear()
283
+
284
+ def get_page_errors(self) -> list[dict]:
285
+ """Get captured page errors."""
286
+ return self._page_errors.copy()
287
+
288
+ def clear_page_errors(self) -> None:
289
+ """Clear captured page errors."""
290
+ self._page_errors.clear()
291
+
292
+ async def get_interactive_elements(self) -> list[dict]:
293
+ """Get all interactive elements with their bounding boxes."""
294
+ js_code = """
295
+ () => {
296
+ const interactiveSelectors = [
297
+ 'a[href]',
298
+ 'button',
299
+ 'input:not([type="hidden"])',
300
+ 'select',
301
+ 'textarea',
302
+ '[role="button"]',
303
+ '[role="link"]',
304
+ '[role="menuitem"]',
305
+ '[role="tab"]',
306
+ '[onclick]',
307
+ '[tabindex]:not([tabindex="-1"])',
308
+ 'label[for]',
309
+ '[contenteditable="true"]'
310
+ ];
311
+
312
+ const elements = [];
313
+ const seen = new Set();
314
+
315
+ for (const selector of interactiveSelectors) {
316
+ for (const el of document.querySelectorAll(selector)) {
317
+ if (seen.has(el)) continue;
318
+ seen.add(el);
319
+
320
+ const rect = el.getBoundingClientRect();
321
+ if (rect.width === 0 || rect.height === 0) continue;
322
+ if (rect.bottom < 0 || rect.top > window.innerHeight) continue;
323
+ if (rect.right < 0 || rect.left > window.innerWidth) continue;
324
+
325
+ let text = el.innerText?.trim()?.substring(0, 50) || '';
326
+ let placeholder = el.placeholder || '';
327
+ let ariaLabel = el.getAttribute('aria-label') || '';
328
+ let title = el.title || '';
329
+ let type = el.type || el.tagName.toLowerCase();
330
+ let href = el.href || '';
331
+
332
+ elements.push({
333
+ x: rect.x,
334
+ y: rect.y,
335
+ width: rect.width,
336
+ height: rect.height,
337
+ centerX: rect.x + rect.width / 2,
338
+ centerY: rect.y + rect.height / 2,
339
+ tag: el.tagName.toLowerCase(),
340
+ type: type,
341
+ text: text || placeholder || ariaLabel || title,
342
+ href: href,
343
+ id: el.id || null,
344
+ className: el.className || null
345
+ });
346
+ }
347
+ }
348
+
349
+ return elements;
350
+ }
351
+ """
352
+ return await self.page.evaluate(js_code)
353
+
354
+ async def screenshot_with_som(self) -> tuple[bytes, dict[int, dict]]:
355
+ """
356
+ Take a screenshot and overlay Set of Marks (numbered bounding boxes).
357
+ Returns the annotated image bytes and the element mapping.
358
+ """
359
+ global element_map
360
+
361
+ screenshot_bytes = await self.page.screenshot(type="png")
362
+ elements = await self.get_interactive_elements()
363
+
364
+ img = PILImage.open(BytesIO(screenshot_bytes))
365
+ draw = ImageDraw.Draw(img, "RGBA")
366
+
367
+ # Try to use a reasonable font
368
+ try:
369
+ font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", 12)
370
+ except Exception:
371
+ try:
372
+ font = ImageFont.truetype("arial.ttf", 12)
373
+ except Exception:
374
+ font = ImageFont.load_default()
375
+
376
+ element_map = {}
377
+
378
+ for idx, elem in enumerate(elements):
379
+ element_id = idx + 1
380
+ element_map[element_id] = elem
381
+
382
+ x, y = elem["x"], elem["y"]
383
+ w, h = elem["width"], elem["height"]
384
+
385
+ # Draw semi-transparent box
386
+ box_color = (255, 0, 0, 60)
387
+ draw.rectangle([x, y, x + w, y + h], outline="red", width=2, fill=box_color)
388
+
389
+ # Draw label
390
+ label = str(element_id)
391
+ label_bbox = draw.textbbox((0, 0), label, font=font)
392
+ label_w = label_bbox[2] - label_bbox[0] + 6
393
+ label_h = label_bbox[3] - label_bbox[1] + 4
394
+
395
+ label_x = max(0, x)
396
+ label_y = max(0, y - label_h - 2)
397
+
398
+ draw.rectangle(
399
+ [label_x, label_y, label_x + label_w, label_y + label_h],
400
+ fill="red"
401
+ )
402
+ draw.text((label_x + 3, label_y + 2), label, fill="white", font=font)
403
+
404
+ output = BytesIO()
405
+ img.save(output, format="PNG")
406
+
407
+ logger.debug(f"Captured screenshot with {len(element_map)} elements")
408
+ return output.getvalue(), element_map
409
+
410
+
411
+ # Global browser manager instance
412
+ browser = BrowserManager()
413
+
414
+
415
+ def get_element_map() -> dict[int, dict]:
416
+ """Get the current element map."""
417
+ return element_map
@@ -0,0 +1,61 @@
1
+ """
2
+ Configuration for Browser Control MCP server.
3
+
4
+ Settings can be configured via environment variables.
5
+ """
6
+
7
+ import os
8
+ from pathlib import Path
9
+ from dataclasses import dataclass
10
+
11
+
12
+ @dataclass
13
+ class Config:
14
+ """Browser control configuration."""
15
+
16
+ # Browser settings
17
+ headless: bool = True
18
+ viewport_width: int = 1280
19
+ viewport_height: int = 720
20
+ timeout_ms: int = 30000
21
+
22
+ # Paths
23
+ user_data_dir: Path = Path.home() / ".browsercontrol" / "user_data"
24
+ extension_path: Path | None = None
25
+
26
+ # Logging
27
+ log_level: str = "INFO"
28
+
29
+ @classmethod
30
+ def from_env(cls) -> "Config":
31
+ """Load configuration from environment variables."""
32
+ config = cls()
33
+
34
+ # Browser settings
35
+ if os.getenv("BROWSER_HEADLESS"):
36
+ config.headless = os.getenv("BROWSER_HEADLESS", "true").lower() == "true"
37
+
38
+ if os.getenv("BROWSER_VIEWPORT_WIDTH"):
39
+ config.viewport_width = int(os.getenv("BROWSER_VIEWPORT_WIDTH", "1280"))
40
+
41
+ if os.getenv("BROWSER_VIEWPORT_HEIGHT"):
42
+ config.viewport_height = int(os.getenv("BROWSER_VIEWPORT_HEIGHT", "720"))
43
+
44
+ if os.getenv("BROWSER_TIMEOUT"):
45
+ config.timeout_ms = int(os.getenv("BROWSER_TIMEOUT", "30000"))
46
+
47
+ # Paths
48
+ if os.getenv("BROWSER_USER_DATA_DIR"):
49
+ config.user_data_dir = Path(os.getenv("BROWSER_USER_DATA_DIR"))
50
+
51
+ if os.getenv("BROWSER_EXTENSION_PATH"):
52
+ config.extension_path = Path(os.getenv("BROWSER_EXTENSION_PATH"))
53
+
54
+ # Logging
55
+ config.log_level = os.getenv("LOG_LEVEL", "INFO")
56
+
57
+ return config
58
+
59
+
60
+ # Global configuration instance
61
+ config = Config.from_env()
@@ -0,0 +1,89 @@
1
+ """
2
+ Browser Control MCP Server
3
+
4
+ Main server module that sets up the MCP server with all tools.
5
+ """
6
+
7
+ import logging
8
+ from contextlib import asynccontextmanager
9
+
10
+ from fastmcp import FastMCP
11
+
12
+ from browsercontrol.browser import browser
13
+ from browsercontrol.config import config
14
+ from browsercontrol.tools import (
15
+ register_navigation_tools,
16
+ register_interaction_tools,
17
+ register_form_tools,
18
+ register_content_tools,
19
+ register_devtools,
20
+ register_recording_tools,
21
+ )
22
+
23
+ # Configure logging
24
+ logging.basicConfig(
25
+ level=getattr(logging, config.log_level.upper(), logging.INFO),
26
+ format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
27
+ datefmt="%Y-%m-%d %H:%M:%S",
28
+ )
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ @asynccontextmanager
33
+ async def lifespan(app: FastMCP):
34
+ """Manage browser lifecycle with the MCP server."""
35
+ logger.info("Starting Browser Control MCP server")
36
+ try:
37
+ await browser.start()
38
+ yield
39
+ except Exception as e:
40
+ logger.error(f"Failed to start browser: {e}")
41
+ raise
42
+ finally:
43
+ logger.info("Shutting down Browser Control MCP server")
44
+ await browser.stop()
45
+
46
+
47
+ # Create the MCP server
48
+ mcp = FastMCP(
49
+ "BrowserControl",
50
+ instructions="""Full-featured browser automation for AI agents.
51
+
52
+ Features:
53
+ - Set of Marks (SoM): Screenshots show numbered interactive elements.
54
+ - Developer Tools: Console logs, network requests, errors, and performance metrics.
55
+ - Session Recording: Capture video traces and snapshots for debugging.
56
+ - Persistent Session: Cookies and login state are saved automatically.
57
+ - Smart Navigation: Auto-handles localhost/127.0.0.1 and bypasses proxies.
58
+
59
+ Core Actions:
60
+ - navigate_to(url)
61
+ - click(element_id)
62
+ - type_text(element_id, text)
63
+ - scroll(direction, amount)
64
+
65
+ Developer Tools:
66
+ - get_console_logs()
67
+ - get_network_requests()
68
+ - get_page_errors()
69
+ - inspect_element(id)
70
+
71
+ Session Recording:
72
+ - start_recording()
73
+ - stop_recording()
74
+ - take_snapshot()
75
+ - list_recordings()""",
76
+ lifespan=lifespan,
77
+ )
78
+
79
+ # Register all tools
80
+ register_navigation_tools(mcp)
81
+ register_interaction_tools(mcp)
82
+ register_form_tools(mcp)
83
+ register_content_tools(mcp)
84
+ register_devtools(mcp)
85
+ register_recording_tools(mcp)
86
+
87
+ logger.info("Browser Control MCP server initialized with all tools")
88
+
89
+
@@ -0,0 +1,17 @@
1
+ """Browser control tools package."""
2
+
3
+ from browsercontrol.tools.navigation import register_navigation_tools
4
+ from browsercontrol.tools.interaction import register_interaction_tools
5
+ from browsercontrol.tools.forms import register_form_tools
6
+ from browsercontrol.tools.content import register_content_tools
7
+ from browsercontrol.tools.devtools import register_devtools
8
+ from browsercontrol.tools.recording import register_recording_tools
9
+
10
+ __all__ = [
11
+ "register_navigation_tools",
12
+ "register_interaction_tools",
13
+ "register_form_tools",
14
+ "register_content_tools",
15
+ "register_devtools",
16
+ "register_recording_tools",
17
+ ]