browsercontrol 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,204 @@
1
+ """Interaction tools for browser control."""
2
+
3
+ import logging
4
+ from fastmcp import FastMCP
5
+ from fastmcp.utilities.types import Image
6
+
7
+ from browsercontrol.browser import browser, get_element_map
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ async def _get_screenshot_with_summary() -> tuple[Image, str]:
13
+ """Helper to get annotated screenshot with element summary."""
14
+ screenshot_bytes, elem_map = await browser.screenshot_with_som()
15
+ image = Image(data=screenshot_bytes, format="png")
16
+
17
+ summary_lines = [f"Found {len(elem_map)} interactive elements:"]
18
+ for eid, elem in list(elem_map.items())[:30]:
19
+ desc = elem["text"][:40] if elem["text"] else elem["tag"]
20
+ summary_lines.append(f" [{eid}] {elem['tag']} - {desc}")
21
+
22
+ if len(elem_map) > 30:
23
+ summary_lines.append(f" ... and {len(elem_map) - 30} more")
24
+
25
+ return image, "\n".join(summary_lines)
26
+
27
+
28
+ def register_interaction_tools(mcp: FastMCP) -> None:
29
+ """Register interaction tools with the MCP server."""
30
+
31
+ @mcp.tool()
32
+ async def click(element_id: int) -> tuple[str, Image]:
33
+ """
34
+ Click on an element by its ID number shown in the screenshot.
35
+
36
+ Args:
37
+ element_id: The number label shown on the element in the screenshot
38
+ """
39
+ try:
40
+ await browser.ensure_started()
41
+ elem_map = get_element_map()
42
+
43
+ if element_id not in elem_map:
44
+ image, summary = await _get_screenshot_with_summary()
45
+ return f"Error: Element {element_id} not found. Valid IDs: {list(elem_map.keys())[:20]}\n\n{summary}", image
46
+
47
+ elem = elem_map[element_id]
48
+ logger.info(f"Clicking element {element_id}: {elem['tag']} - {elem.get('text', '')[:30]}")
49
+ await browser.page.mouse.click(elem["centerX"], elem["centerY"])
50
+ await browser.page.wait_for_timeout(500)
51
+
52
+ image, summary = await _get_screenshot_with_summary()
53
+ return f"Clicked element {element_id} ({elem['tag']}: {elem['text'][:30] if elem['text'] else 'no text'})\n\n{summary}", image
54
+
55
+ except Exception as e:
56
+ logger.error(f"Click failed: {e}")
57
+ try:
58
+ image, summary = await _get_screenshot_with_summary()
59
+ return f"Error clicking element {element_id}: {e}\n\n{summary}", image
60
+ except Exception:
61
+ raise RuntimeError(f"Click failed: {e}")
62
+
63
+ @mcp.tool()
64
+ async def click_at(x: int, y: int) -> tuple[str, Image]:
65
+ """
66
+ Click at specific x,y coordinates.
67
+
68
+ Args:
69
+ x: X coordinate
70
+ y: Y coordinate
71
+ """
72
+ try:
73
+ await browser.ensure_started()
74
+ logger.info(f"Clicking at ({x}, {y})")
75
+ await browser.page.mouse.click(x, y)
76
+ await browser.page.wait_for_timeout(500)
77
+ image, summary = await _get_screenshot_with_summary()
78
+ return f"Clicked at ({x}, {y})\n\n{summary}", image
79
+ except Exception as e:
80
+ logger.error(f"Click at coordinates failed: {e}")
81
+ raise RuntimeError(f"Click at ({x}, {y}) failed: {e}")
82
+
83
+ @mcp.tool()
84
+ async def type_text(element_id: int, text: str) -> tuple[str, Image]:
85
+ """
86
+ Type text into an input element by its ID number.
87
+
88
+ Args:
89
+ element_id: The number label shown on the element
90
+ text: Text to type
91
+ """
92
+ try:
93
+ await browser.ensure_started()
94
+ elem_map = get_element_map()
95
+
96
+ if element_id not in elem_map:
97
+ image, summary = await _get_screenshot_with_summary()
98
+ return f"Error: Element {element_id} not found.\n\n{summary}", image
99
+
100
+ elem = elem_map[element_id]
101
+ logger.info(f"Typing into element {element_id}")
102
+ await browser.page.mouse.click(elem["centerX"], elem["centerY"])
103
+ await browser.page.keyboard.press("Control+a")
104
+ await browser.page.keyboard.type(text)
105
+
106
+ image, summary = await _get_screenshot_with_summary()
107
+ return f"Typed '{text}' into element {element_id}\n\n{summary}", image
108
+
109
+ except Exception as e:
110
+ logger.error(f"Type text failed: {e}")
111
+ raise RuntimeError(f"Type text failed: {e}")
112
+
113
+ @mcp.tool()
114
+ async def press_key(key: str) -> tuple[str, Image]:
115
+ """
116
+ Press a keyboard key.
117
+
118
+ Args:
119
+ key: Key to press (e.g., "Enter", "Tab", "Escape", "ArrowDown", "Backspace")
120
+ """
121
+ try:
122
+ await browser.ensure_started()
123
+ logger.info(f"Pressing key: {key}")
124
+ await browser.page.keyboard.press(key)
125
+ await browser.page.wait_for_timeout(300)
126
+ image, summary = await _get_screenshot_with_summary()
127
+ return f"Pressed key '{key}'\n\n{summary}", image
128
+ except Exception as e:
129
+ logger.error(f"Press key failed: {e}")
130
+ raise RuntimeError(f"Press key '{key}' failed: {e}")
131
+
132
+ @mcp.tool()
133
+ async def hover(element_id: int) -> tuple[str, Image]:
134
+ """
135
+ Hover over an element by its ID number.
136
+
137
+ Args:
138
+ element_id: The number label shown on the element
139
+ """
140
+ try:
141
+ await browser.ensure_started()
142
+ elem_map = get_element_map()
143
+
144
+ if element_id not in elem_map:
145
+ image, summary = await _get_screenshot_with_summary()
146
+ return f"Error: Element {element_id} not found.\n\n{summary}", image
147
+
148
+ elem = elem_map[element_id]
149
+ logger.info(f"Hovering over element {element_id}")
150
+ await browser.page.mouse.move(elem["centerX"], elem["centerY"])
151
+ await browser.page.wait_for_timeout(300)
152
+
153
+ image, summary = await _get_screenshot_with_summary()
154
+ return f"Hovering over element {element_id}\n\n{summary}", image
155
+
156
+ except Exception as e:
157
+ logger.error(f"Hover failed: {e}")
158
+ raise RuntimeError(f"Hover failed: {e}")
159
+
160
+ @mcp.tool()
161
+ async def scroll_to_element(element_id: int) -> tuple[str, Image]:
162
+ """
163
+ Scroll to bring an element into view.
164
+
165
+ Args:
166
+ element_id: The number label shown on the element
167
+ """
168
+ try:
169
+ await browser.ensure_started()
170
+ elem_map = get_element_map()
171
+
172
+ if element_id not in elem_map:
173
+ image, summary = await _get_screenshot_with_summary()
174
+ return f"Error: Element {element_id} not found.\n\n{summary}", image
175
+
176
+ elem = elem_map[element_id]
177
+ await browser.page.evaluate(f"window.scrollTo(0, {elem['y'] - 100})")
178
+ await browser.page.wait_for_timeout(300)
179
+
180
+ image, summary = await _get_screenshot_with_summary()
181
+ return f"Scrolled to element {element_id}\n\n{summary}", image
182
+
183
+ except Exception as e:
184
+ logger.error(f"Scroll to element failed: {e}")
185
+ raise RuntimeError(f"Scroll to element failed: {e}")
186
+
187
+ @mcp.tool()
188
+ async def wait(seconds: float = 1.0) -> tuple[str, Image]:
189
+ """
190
+ Wait for a specified time (useful for pages with animations or loading).
191
+
192
+ Args:
193
+ seconds: Time to wait in seconds (default: 1.0)
194
+ """
195
+ try:
196
+ await browser.ensure_started()
197
+ await browser.page.wait_for_timeout(int(seconds * 1000))
198
+ image, summary = await _get_screenshot_with_summary()
199
+ return f"Waited {seconds}s\n\n{summary}", image
200
+ except Exception as e:
201
+ logger.error(f"Wait failed: {e}")
202
+ raise RuntimeError(f"Wait failed: {e}")
203
+
204
+ logger.debug("Registered interaction tools")
@@ -0,0 +1,163 @@
1
+ """Navigation tools for browser control."""
2
+
3
+ import logging
4
+ from fastmcp import FastMCP
5
+ from fastmcp.utilities.types import Image
6
+
7
+ from browsercontrol.browser import browser
8
+ from browsercontrol.config import config
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ async def _get_screenshot_with_summary() -> tuple[Image, str]:
14
+ """Helper to get annotated screenshot with element summary."""
15
+ screenshot_bytes, elem_map = await browser.screenshot_with_som()
16
+ image = Image(data=screenshot_bytes, format="png")
17
+
18
+ summary_lines = [f"Found {len(elem_map)} interactive elements:"]
19
+ for eid, elem in list(elem_map.items())[:30]:
20
+ desc = elem["text"][:40] if elem["text"] else elem["tag"]
21
+ summary_lines.append(f" [{eid}] {elem['tag']} - {desc}")
22
+
23
+ if len(elem_map) > 30:
24
+ summary_lines.append(f" ... and {len(elem_map) - 30} more")
25
+
26
+ return image, "\n".join(summary_lines)
27
+
28
+
29
+ def register_navigation_tools(mcp: FastMCP) -> None:
30
+ """Register navigation tools with the MCP server."""
31
+
32
+ @mcp.tool()
33
+ async def navigate_to(url: str) -> tuple[str, Image]:
34
+ """
35
+ Navigate to a URL. Returns an annotated screenshot with numbered interactive elements.
36
+
37
+ Args:
38
+ url: The URL to navigate to
39
+
40
+ Returns:
41
+ Element summary and annotated screenshot
42
+ """
43
+ try:
44
+ await browser.ensure_started()
45
+ logger.info(f"Navigating to: {url}")
46
+
47
+ try:
48
+ await browser.page.goto(url, wait_until="domcontentloaded", timeout=config.timeout_ms)
49
+ except Exception as e:
50
+ # Handle localhost vs 127.0.0.1 resolution issues
51
+ if "ERR_CONNECTION_REFUSED" in str(e) and "localhost" in url:
52
+ fallback_url = url.replace("localhost", "127.0.0.1")
53
+ logger.info(f"Navigation to localhost failed, retrying with: {fallback_url}")
54
+ await browser.page.goto(fallback_url, wait_until="domcontentloaded", timeout=config.timeout_ms)
55
+ url = fallback_url # Update for success message
56
+ else:
57
+ raise e
58
+
59
+ await browser.page.wait_for_timeout(500)
60
+ image, summary = await _get_screenshot_with_summary()
61
+ return f"Navigated to {url}\n\n{summary}", image
62
+ except Exception as e:
63
+ logger.error(f"Navigation failed: {e}")
64
+ try:
65
+ image, summary = await _get_screenshot_with_summary()
66
+ return f"Error navigating to {url}: {e}\n\n{summary}", image
67
+ except Exception:
68
+ raise RuntimeError(f"Navigation failed: {e}")
69
+
70
+ @mcp.tool()
71
+ async def go_back() -> tuple[str, Image]:
72
+ """Navigate back to the previous page."""
73
+ try:
74
+ await browser.ensure_started()
75
+ await browser.page.go_back(timeout=config.timeout_ms)
76
+ await browser.page.wait_for_timeout(500)
77
+ image, summary = await _get_screenshot_with_summary()
78
+ return f"Navigated back\n\n{summary}", image
79
+ except Exception as e:
80
+ logger.error(f"Go back failed: {e}")
81
+ image, summary = await _get_screenshot_with_summary()
82
+ return f"Error going back: {e}\n\n{summary}", image
83
+
84
+ @mcp.tool()
85
+ async def go_forward() -> tuple[str, Image]:
86
+ """Navigate forward to the next page."""
87
+ try:
88
+ await browser.ensure_started()
89
+ await browser.page.go_forward(timeout=config.timeout_ms)
90
+ await browser.page.wait_for_timeout(500)
91
+ image, summary = await _get_screenshot_with_summary()
92
+ return f"Navigated forward\n\n{summary}", image
93
+ except Exception as e:
94
+ logger.error(f"Go forward failed: {e}")
95
+ image, summary = await _get_screenshot_with_summary()
96
+ return f"Error going forward: {e}\n\n{summary}", image
97
+
98
+ @mcp.tool()
99
+ async def refresh_page() -> tuple[str, Image]:
100
+ """Refresh the current page."""
101
+ try:
102
+ await browser.ensure_started()
103
+ await browser.page.reload(timeout=config.timeout_ms)
104
+ await browser.page.wait_for_timeout(500)
105
+ image, summary = await _get_screenshot_with_summary()
106
+ return f"Page refreshed\n\n{summary}", image
107
+ except Exception as e:
108
+ logger.error(f"Refresh failed: {e}")
109
+ image, summary = await _get_screenshot_with_summary()
110
+ return f"Error refreshing: {e}\n\n{summary}", image
111
+
112
+ @mcp.tool()
113
+ async def scroll(
114
+ direction: str = "down",
115
+ amount: str = "medium"
116
+ ) -> tuple[str, Image]:
117
+ """
118
+ Scroll the page.
119
+
120
+ Args:
121
+ direction: "up", "down", "left", or "right"
122
+ amount: "small" (100px), "medium" (400px), "large" (800px),
123
+ "page" (full viewport), "top", "bottom", or pixels like "500"
124
+ """
125
+ try:
126
+ await browser.ensure_started()
127
+
128
+ amount_map = {"small": 100, "medium": 400, "large": 800, "page": 720}
129
+
130
+ if amount == "top":
131
+ await browser.page.evaluate("window.scrollTo(0, 0)")
132
+ image, summary = await _get_screenshot_with_summary()
133
+ return f"Scrolled to top\n\n{summary}", image
134
+
135
+ if amount == "bottom":
136
+ await browser.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
137
+ image, summary = await _get_screenshot_with_summary()
138
+ return f"Scrolled to bottom\n\n{summary}", image
139
+
140
+ pixels = amount_map.get(amount)
141
+ if pixels is None:
142
+ try:
143
+ pixels = int(amount)
144
+ except ValueError:
145
+ pixels = 400
146
+
147
+ if direction == "up":
148
+ await browser.page.evaluate(f"window.scrollBy(0, -{pixels})")
149
+ elif direction == "down":
150
+ await browser.page.evaluate(f"window.scrollBy(0, {pixels})")
151
+ elif direction == "left":
152
+ await browser.page.evaluate(f"window.scrollBy(-{pixels}, 0)")
153
+ elif direction == "right":
154
+ await browser.page.evaluate(f"window.scrollBy({pixels}, 0)")
155
+
156
+ image, summary = await _get_screenshot_with_summary()
157
+ return f"Scrolled {direction} by {pixels}px\n\n{summary}", image
158
+
159
+ except Exception as e:
160
+ logger.error(f"Scroll failed: {e}")
161
+ raise RuntimeError(f"Scroll failed: {e}")
162
+
163
+ logger.debug("Registered navigation tools")
@@ -0,0 +1,221 @@
1
+ """Session recording tools for browser control."""
2
+
3
+ import logging
4
+ import os
5
+ from pathlib import Path
6
+ from datetime import datetime
7
+
8
+ from fastmcp import FastMCP
9
+ from fastmcp.utilities.types import Image
10
+
11
+ from browsercontrol.browser import browser
12
+ from browsercontrol.config import config
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Recording state
17
+ _recording_path: Path | None = None
18
+ _recording_active: bool = False
19
+
20
+
21
+ def register_recording_tools(mcp: FastMCP) -> None:
22
+ """Register session recording tools with the MCP server."""
23
+
24
+ @mcp.tool()
25
+ async def start_recording(name: str = "") -> tuple[str, Image]:
26
+ """
27
+ Start recording the browser session as a video.
28
+ The video will be saved when stop_recording is called.
29
+
30
+ Args:
31
+ name: Optional name for the recording (default: timestamp)
32
+
33
+ Returns:
34
+ Status message and screenshot
35
+ """
36
+ global _recording_path, _recording_active
37
+
38
+ try:
39
+ await browser.ensure_started()
40
+
41
+ if _recording_active:
42
+ screenshot_bytes, elem_map = await browser.screenshot_with_som()
43
+ image = Image(data=screenshot_bytes, format="png")
44
+ return "Recording already in progress. Call stop_recording() first.", image
45
+
46
+ # Create recordings directory
47
+ recordings_dir = config.user_data_dir.parent / "recordings"
48
+ recordings_dir.mkdir(parents=True, exist_ok=True)
49
+
50
+ # Generate filename
51
+ if not name:
52
+ name = datetime.now().strftime("%Y%m%d_%H%M%S")
53
+
54
+ _recording_path = recordings_dir / f"{name}.webm"
55
+
56
+ # Start video recording via CDP
57
+ cdp = await browser.page.context.new_cdp_session(browser.page)
58
+ await cdp.send("Page.startScreencast", {
59
+ "format": "png",
60
+ "quality": 80,
61
+ "everyNthFrame": 2
62
+ })
63
+
64
+ _recording_active = True
65
+ logger.info(f"Started recording: {_recording_path}")
66
+
67
+ screenshot_bytes, elem_map = await browser.screenshot_with_som()
68
+ image = Image(data=screenshot_bytes, format="png")
69
+ return f"🔴 Recording started: {_recording_path.name}\n\nCall stop_recording() when done.", image
70
+
71
+ except Exception as e:
72
+ logger.error(f"Start recording failed: {e}")
73
+ # Fallback: use Playwright's built-in tracing
74
+ try:
75
+ await browser.page.context.tracing.start(screenshots=True, snapshots=True)
76
+ _recording_active = True
77
+
78
+ screenshot_bytes, elem_map = await browser.screenshot_with_som()
79
+ image = Image(data=screenshot_bytes, format="png")
80
+ return f"🔴 Recording started (trace mode)\n\nCall stop_recording() when done.", image
81
+ except Exception as e2:
82
+ raise RuntimeError(f"Failed to start recording: {e2}")
83
+
84
+ @mcp.tool()
85
+ async def stop_recording() -> tuple[str, Image]:
86
+ """
87
+ Stop recording and save the session.
88
+
89
+ Returns:
90
+ Path to saved recording and screenshot
91
+ """
92
+ global _recording_path, _recording_active
93
+
94
+ try:
95
+ await browser.ensure_started()
96
+
97
+ if not _recording_active:
98
+ screenshot_bytes, elem_map = await browser.screenshot_with_som()
99
+ image = Image(data=screenshot_bytes, format="png")
100
+ return "No recording in progress. Call start_recording() first.", image
101
+
102
+ # Stop tracing and save
103
+ recordings_dir = config.user_data_dir.parent / "recordings"
104
+ recordings_dir.mkdir(parents=True, exist_ok=True)
105
+
106
+ if _recording_path is None:
107
+ _recording_path = recordings_dir / f"recording_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
108
+
109
+ trace_path = _recording_path.with_suffix(".zip")
110
+
111
+ try:
112
+ await browser.page.context.tracing.stop(path=str(trace_path))
113
+ logger.info(f"Recording saved: {trace_path}")
114
+ result_path = trace_path
115
+ except Exception:
116
+ # If tracing wasn't active, just note it
117
+ result_path = _recording_path
118
+
119
+ _recording_active = False
120
+ _recording_path = None
121
+
122
+ screenshot_bytes, elem_map = await browser.screenshot_with_som()
123
+ image = Image(data=screenshot_bytes, format="png")
124
+ return f"âšī¸ Recording saved: {result_path}\n\nView with: npx playwright show-trace {result_path}", image
125
+
126
+ except Exception as e:
127
+ _recording_active = False
128
+ logger.error(f"Stop recording failed: {e}")
129
+ raise RuntimeError(f"Failed to stop recording: {e}")
130
+
131
+ @mcp.tool()
132
+ async def take_snapshot(name: str = "") -> tuple[str, Image]:
133
+ """
134
+ Take a named snapshot (screenshot + HTML) for later reference.
135
+
136
+ Args:
137
+ name: Optional name for the snapshot (default: timestamp)
138
+
139
+ Returns:
140
+ Path to saved snapshot and screenshot
141
+ """
142
+ try:
143
+ await browser.ensure_started()
144
+
145
+ # Create snapshots directory
146
+ snapshots_dir = config.user_data_dir.parent / "snapshots"
147
+ snapshots_dir.mkdir(parents=True, exist_ok=True)
148
+
149
+ # Generate filename
150
+ if not name:
151
+ name = datetime.now().strftime("%Y%m%d_%H%M%S")
152
+
153
+ # Save screenshot
154
+ screenshot_path = snapshots_dir / f"{name}.png"
155
+ await browser.page.screenshot(path=str(screenshot_path))
156
+
157
+ # Save HTML
158
+ html_path = snapshots_dir / f"{name}.html"
159
+ html_content = await browser.page.content()
160
+ html_path.write_text(html_content)
161
+
162
+ # Save URL
163
+ url_path = snapshots_dir / f"{name}.url"
164
+ url_path.write_text(browser.page.url)
165
+
166
+ logger.info(f"Snapshot saved: {screenshot_path}")
167
+
168
+ screenshot_bytes, elem_map = await browser.screenshot_with_som()
169
+ image = Image(data=screenshot_bytes, format="png")
170
+ return f"📸 Snapshot saved:\n - {screenshot_path.name}\n - {html_path.name}\n - {url_path.name}", image
171
+
172
+ except Exception as e:
173
+ logger.error(f"Take snapshot failed: {e}")
174
+ raise RuntimeError(f"Failed to take snapshot: {e}")
175
+
176
+ @mcp.tool()
177
+ async def list_recordings() -> tuple[str, Image]:
178
+ """
179
+ List all saved recordings and snapshots.
180
+
181
+ Returns:
182
+ List of recordings and screenshot
183
+ """
184
+ try:
185
+ await browser.ensure_started()
186
+
187
+ base_dir = config.user_data_dir.parent
188
+ recordings_dir = base_dir / "recordings"
189
+ snapshots_dir = base_dir / "snapshots"
190
+
191
+ lines = ["📁 Saved Sessions:\n"]
192
+
193
+ # List recordings
194
+ if recordings_dir.exists():
195
+ recordings = list(recordings_dir.glob("*"))
196
+ if recordings:
197
+ lines.append("Recordings:")
198
+ for r in sorted(recordings)[-10:]: # Last 10
199
+ size = r.stat().st_size // 1024
200
+ lines.append(f" 📹 {r.name} ({size}KB)")
201
+
202
+ # List snapshots
203
+ if snapshots_dir.exists():
204
+ snapshots = list(snapshots_dir.glob("*.png"))
205
+ if snapshots:
206
+ lines.append("\nSnapshots:")
207
+ for s in sorted(snapshots)[-10:]: # Last 10
208
+ lines.append(f" 📸 {s.stem}")
209
+
210
+ if len(lines) == 1:
211
+ lines.append("No recordings or snapshots found.")
212
+
213
+ screenshot_bytes, elem_map = await browser.screenshot_with_som()
214
+ image = Image(data=screenshot_bytes, format="png")
215
+ return "\n".join(lines), image
216
+
217
+ except Exception as e:
218
+ logger.error(f"List recordings failed: {e}")
219
+ raise RuntimeError(f"Failed to list recordings: {e}")
220
+
221
+ logger.debug("Registered recording tools")