iflow-mcp_janspoerer-mcp_browser_use 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. iflow_mcp_janspoerer_mcp_browser_use-0.1.0.dist-info/METADATA +26 -0
  2. iflow_mcp_janspoerer_mcp_browser_use-0.1.0.dist-info/RECORD +50 -0
  3. iflow_mcp_janspoerer_mcp_browser_use-0.1.0.dist-info/WHEEL +5 -0
  4. iflow_mcp_janspoerer_mcp_browser_use-0.1.0.dist-info/entry_points.txt +2 -0
  5. iflow_mcp_janspoerer_mcp_browser_use-0.1.0.dist-info/licenses/LICENSE +201 -0
  6. iflow_mcp_janspoerer_mcp_browser_use-0.1.0.dist-info/top_level.txt +1 -0
  7. mcp_browser_use/__init__.py +2 -0
  8. mcp_browser_use/__main__.py +1347 -0
  9. mcp_browser_use/actions/__init__.py +1 -0
  10. mcp_browser_use/actions/elements.py +173 -0
  11. mcp_browser_use/actions/extraction.py +864 -0
  12. mcp_browser_use/actions/keyboard.py +43 -0
  13. mcp_browser_use/actions/navigation.py +73 -0
  14. mcp_browser_use/actions/screenshots.py +85 -0
  15. mcp_browser_use/browser/__init__.py +1 -0
  16. mcp_browser_use/browser/chrome.py +150 -0
  17. mcp_browser_use/browser/chrome_executable.py +204 -0
  18. mcp_browser_use/browser/chrome_launcher.py +330 -0
  19. mcp_browser_use/browser/chrome_process.py +104 -0
  20. mcp_browser_use/browser/devtools.py +230 -0
  21. mcp_browser_use/browser/driver.py +322 -0
  22. mcp_browser_use/browser/process.py +133 -0
  23. mcp_browser_use/cleaners.py +530 -0
  24. mcp_browser_use/config/__init__.py +30 -0
  25. mcp_browser_use/config/environment.py +155 -0
  26. mcp_browser_use/config/paths.py +97 -0
  27. mcp_browser_use/constants.py +68 -0
  28. mcp_browser_use/context.py +150 -0
  29. mcp_browser_use/context_pack.py +85 -0
  30. mcp_browser_use/decorators/__init__.py +13 -0
  31. mcp_browser_use/decorators/ensure.py +84 -0
  32. mcp_browser_use/decorators/envelope.py +83 -0
  33. mcp_browser_use/decorators/locking.py +172 -0
  34. mcp_browser_use/helpers.py +173 -0
  35. mcp_browser_use/helpers_context.py +261 -0
  36. mcp_browser_use/locking/__init__.py +1 -0
  37. mcp_browser_use/locking/action_lock.py +190 -0
  38. mcp_browser_use/locking/file_mutex.py +139 -0
  39. mcp_browser_use/locking/window_registry.py +178 -0
  40. mcp_browser_use/tools/__init__.py +59 -0
  41. mcp_browser_use/tools/browser_management.py +260 -0
  42. mcp_browser_use/tools/debugging.py +195 -0
  43. mcp_browser_use/tools/extraction.py +58 -0
  44. mcp_browser_use/tools/interaction.py +323 -0
  45. mcp_browser_use/tools/navigation.py +84 -0
  46. mcp_browser_use/tools/screenshots.py +116 -0
  47. mcp_browser_use/utils/__init__.py +1 -0
  48. mcp_browser_use/utils/diagnostics.py +85 -0
  49. mcp_browser_use/utils/html_utils.py +118 -0
  50. mcp_browser_use/utils/retry.py +57 -0
@@ -0,0 +1,1347 @@
1
+ #region Overview
2
+ """
3
+ ## Known Limitation: Iframe Context
4
+
5
+ Multi-step iframe interactions require specifying iframe_selector for each action.
6
+ This is intentional design to prevent context state bugs.
7
+
8
+ ## Price, Stock Quantity, and Delivery Times
9
+
10
+ If you cannot see detailed prices, stock quanities, and delivery times and you suspect that data might be available behind a login, please ask Jan on Slack for help. He can probably log you in.
11
+
12
+ ## Performance Considerations
13
+
14
+ We do not mind additional overhead from validations. The most important thing is that the code is robust.
15
+
16
+ ## Tip for Debugging
17
+
18
+ Do you find any obvious errors in the code? Please do rubber duck
19
+ debugging. Imagine you are the first agent that establishes a
20
+ connection. You connect and want to navigate. You call the function
21
+ to go to a website, but probably receive an error, because you have
22
+ to open the browser first. Or do you not receive and error and the
23
+ MCP server automatically opens a browser? That would also be fine.
24
+ Then you open the browse, if not open yet. Then you click
25
+ around a bit. Then another agent
26
+ establishes a separate MCP server connection and does the same.
27
+ Then the first agent is done with his work and closes the connection.
28
+ The second continues working. In this rubber duck
29
+ journey, is there anything that does not work well?
30
+
31
+ The MCP should allow multiple browser windows to be opened. Each AI agent can call the "start_browser" tool. If the browser is not open at all, it is opened, using the specified persistent user profile. If a browser is already open, so if the second agent calls "start_browser", a new window is opened. Each agent only uses their own window. The windows are identified by tags.
32
+
33
+ When an agent performs an action, the browser should be briefly locked until 10 seconds are over or until the agent unlocks the browser. This can be done with a lock file.
34
+
35
+ The MCP returns a cleaned HTML version of the page after each action, so the agent can see what changed and what it can do to further interact with the page or find information from the page.
36
+
37
+ ## How Multiple Agents are Handled
38
+
39
+ We do not manage multiple sessions in one MCP connection.
40
+
41
+ While each agent will connect to this very same mcp_browser_use code,
42
+ they will still connect independently. They can start and stop their MCP server
43
+ connections at will without affecting the functioning of the browser. The
44
+ agents are agnostic to whether other agents are currently running.
45
+ The MCP for browser use that we develop here should abstract the browser
46
+ handling away from the agents.
47
+
48
+ When a second agent opens a browser, the agent gets its own browser window. IT MUST NOT USE THE SAME BROWSER WINDOW! The second agent WILL NOT open another browser session.
49
+
50
+ ## Feature Highlights
51
+
52
+ * **Content Pagination:** The MCP supports paginating through large HTML pages using `html_offset` (for HTML mode) and `text_offset` (for TEXT mode). When pages exceed token limits, agents can make multiple calls with increasing offsets to retrieve all content. The offset is applied to cleaned content (after removing scripts/styles/ads), enabling efficient pagination through content-rich pages. Check the `hard_capped` flag in responses to detect truncation.
53
+
54
+ * **HTML Truncation & Token Management:** The MCP allows you to configure truncation of HTML pages via `token_budget` parameter on all tools. Other scraping MCPs may overwhelm the AI with accessibility snapshots or HTML dumps that are larger than the context window. This MCP provides precise control over snapshot size through configurable token budgets and cleaning levels.
55
+
56
+ * **Multiple Browser Windows and Multiple Agents:** You can connect multiple agents to this MCP independently, without requiring coordination on behalf of the agents. Each agent can work with **the same** browser profile, which is helpful when logins should persist across agents. Each agent gets their own browser window, so they do not interfere with each other.
57
+
58
+ * **Flexible Snapshot Modes:** Every tool returns configurable snapshots in multiple formats: `outline` (headings), `text` (extracted text), `html` (cleaned HTML), `dompaths` (element paths), or `mixed` (combination). Choose the representation that best fits your use case.
59
+
60
+ * **Fine-Grained Element Extraction:** The `extract_elements` tool allows you to extract specific data from the page using CSS selectors or XPath expressions. Specify multiple selectors and choose whether to extract HTML or text content from each matched element. This enables precise data extraction without overwhelming context with full page snapshots. The tool supports iframe and shadow DOM contexts, and can also serve as a standalone "get current page state" tool when called without selectors.
61
+
62
+
63
+ """
64
+ #endregion
65
+
66
+ #region Required Tools
67
+ """
68
+ ```
69
+ start_browser
70
+ ```
71
+ > Starts a browser if no browser session is open yet for the given user profile.
72
+ Opens a new window if an existing browser session is already there.
73
+ Multiple agents can share one browser profile (user directory) by each opening a different browser.
74
+ This has no impact on the individual agents. For them, they just open a browser
75
+ and they do not need to know if other agents are also working
76
+ alongside them. The browser handling is abstracted away by the MCP.
77
+
78
+ ```
79
+ navigate
80
+ ```
81
+ > Navigates the browser to a specified URL.
82
+ >
83
+ > Args:
84
+ > url (str): The URL to navigate to.
85
+ >
86
+ > Returns:
87
+ > str: A message indicating successful navigation, along with the page title and HTML.
88
+
89
+ ```
90
+ click_element
91
+ ```
92
+ > Clicks an element on the web page, with iframe and shadow root support.
93
+ >
94
+ > Note: For multi-step iframe interactions, specify iframe_selector in each call.
95
+ > Browser context resets after each action for reliability.
96
+ >
97
+ > Args:
98
+ > selector (str): The selector for the element to click.
99
+ > selector_type (str, optional): The type of selector. Defaults to 'css'.
100
+ > timeout (int, optional): Maximum wait time for the element to be clickable. Defaults to 10.
101
+ > force_js (bool, optional): If True, uses JavaScript to click the element. Defaults to False.
102
+ > iframe_selector (str, optional): Selector for the iframe. Defaults to None.
103
+ > iframe_selector_type (str, optional): Selector type for the iframe. Defaults to 'css'.
104
+ > shadow_root_selector (str, optional): Selector for the shadow root. Defaults to None.
105
+ > shadow_root_selector_type (str, optional): Selector type for the shadow root. Defaults to 'css'.
106
+ >
107
+ > Returns:
108
+ > str: A message indicating successful click, along with the current URL and page title.
109
+
110
+ ```
111
+ fill_text
112
+ ```
113
+ > Input text into an element.
114
+ >
115
+ > Note: For multi-step iframe interactions, specify iframe_selector in each call.
116
+ > Browser context resets after each action for reliability.
117
+ >
118
+ > Args:
119
+ > selector: CSS selector, XPath, or ID of the input field
120
+ > text: Text to enter into the field
121
+ > selector_type: Type of selector (css, xpath, id)
122
+ > clear_first: Whether to clear the field before entering text
123
+ > timeout: Maximum time to wait for the element in seconds
124
+ > iframe_selector: Selector for the iframe (if element is inside iframe)
125
+ > iframe_selector_type: Selector type for the iframe
126
+ > shadow_root_selector: Optional selector for shadow root containing the element
127
+ > shadow_root_selector_type: Selector type for the shadow root
128
+
129
+ ```
130
+ send_keys
131
+ ```
132
+ > Send keyboard keys to the browser.
133
+ >
134
+ > Args:
135
+ > key: Key to send (e.g., ENTER, TAB, etc.)
136
+ > selector: CSS selector, XPath, or ID of the element to send keys to (optional)
137
+ > selector_type: Type of selector (css, xpath, id)
138
+
139
+ ```
140
+ scroll
141
+ ```
142
+ > Scroll the page.
143
+ >
144
+ > Args:
145
+ > x: Horizontal scroll amount in pixels
146
+ > y: Vertical scroll amount in pixels
147
+
148
+ ```
149
+ take_screenshot
150
+ ```
151
+ > Take a screenshot of the current page.
152
+ >
153
+ > Args:
154
+ > screenshot_path: Optional path to save the full screenshot
155
+ > return_base64: Whether to return base64 encoded thumbnail (default: False)
156
+ > return_snapshot: Whether to return page HTML snapshot (default: False)
157
+ > thumbnail_width: Optional width in pixels for thumbnail (default: 200px if return_base64=True)
158
+ > Minimum: 50px. Only used when return_base64=True.
159
+ > Note: 200px accounts for MCP protocol overhead to stay under 25K token limit.
160
+
161
+
162
+ ```
163
+ close_browser
164
+ ```
165
+ > Close a browser session.
166
+ > This is a very destructive action and should not be done without explicit request from the user!
167
+ > Never use this tool unless you are explicitly being told to!
168
+
169
+
170
+ ```
171
+ wait_for_element
172
+ ```
173
+ > Wait for an element to be present, visible, or clickable.
174
+ >
175
+ > Args:
176
+ > selector: CSS selector, XPath, or ID of the element
177
+ > selector_type: Type of selector (css, xpath, id)
178
+ > timeout: Maximum time to wait in seconds
179
+ > condition: What to wait for - 'present', 'visible', or 'clickable'
180
+
181
+
182
+ ```
183
+ extract_elements
184
+ ```
185
+ > Extract content from specific elements on the current page using CSS selectors or XPath.
186
+ > This tool enables fine-grained data extraction without requiring full page snapshots.
187
+ >
188
+ > **MODE 1: Simple Extraction** - Extract individual elements
189
+ > Args:
190
+ > selectors: List of selector specifications with optional "name" field
191
+ > Examples:
192
+ > - [{"selector": "span.price", "type": "css", "format": "text", "name": "price"}]
193
+ > - Get current state: selectors=None (returns full page snapshot)
194
+ >
195
+ > **MODE 2: Structured Extraction** - Extract from multiple containers (ideal for product listings)
196
+ > Args:
197
+ > container_selector: CSS/XPath for containers (e.g., "article.product-item")
198
+ > fields: List of field extractors with field_name, selector, attribute, regex, fallback
199
+ > selector_type: "css" or "xpath" for container
200
+ > wait_for_visible: Wait for containers to be visible
201
+ > extraction_timeout: Timeout in seconds
202
+ >
203
+ > Example (scrape all products on page):
204
+ > container_selector = "article.product-item" # or "//article[@class='product-item']"
205
+ > fields = [
206
+ > {"field_name": "product_name", "selector": "h3.title"},
207
+ > {"field_name": "mpn", "selector": "span[data-mpn]", "attribute": "data-mpn"},
208
+ > {"field_name": "price_brutto", "selector": ".price", "regex": "[0-9,.]+"},
209
+ > {"field_name": "url", "selector": "a", "attribute": "href"}
210
+ > ]
211
+ > # selector_type is optional - auto-detects from container_selector syntax
212
+ >
213
+ > Returns:
214
+ > Structured array: [{"product_name": "...", "mpn": "...", "price_brutto": "..."}, ...]
215
+
216
+
217
+ ```
218
+ read_chromedriver_log
219
+ ```
220
+ > Fetch the first N lines of the Chromedriver log for debugging.
221
+ >
222
+ > Args:
223
+ > lines (int): Number of lines to return from the top of the log.
224
+
225
+
226
+ ```
227
+ get_debug_info
228
+ ```
229
+ > Return user-data dir, profile name, full profile path, Chrome binary path,
230
+ > browser/driver/Selenium versions -- everything we need for debugging.
231
+
232
+
233
+
234
+ ```
235
+ debug_element
236
+ ```
237
+ > Debug why an element might not be clickable or visible.
238
+ >
239
+ > Note: For iframe elements, specify iframe_selector to debug within iframe context.
240
+ >
241
+ > Args:
242
+ > selector: CSS selector, XPath, or ID of the element
243
+ > selector_type: Type of selector (css, xpath, id)
244
+ > iframe_selector: Selector for the iframe (if element is inside iframe)
245
+ > iframe_selector_type: Selector type for the iframe
246
+
247
+ ```
248
+ """
249
+ #endregion
250
+
251
+ #region Imports
252
+ import logging
253
+ from typing import Optional
254
+ from mcp.server.fastmcp import FastMCP
255
+ #endregion
256
+
257
+ #region Import from your package __init__.py
258
+ import mcp_browser_use as MBU
259
+ from mcp_browser_use.decorators import (
260
+ tool_envelope,
261
+ exclusive_browser_access,
262
+ ensure_driver_ready,
263
+ )
264
+ from mcp_browser_use.helpers_context import to_context_pack as _to_context_pack
265
+
266
+ # Import tools directly (not via helpers) to break circular dependency
267
+ from mcp_browser_use.tools import browser_management, navigation, interaction, screenshots, debugging, extraction
268
+ #endregion
269
+
270
+ #region Logger
271
+ logger = logging.getLogger(__name__)
272
+ #endregion
273
+
274
+ #region Helper Functions
275
+ async def _merge_extraction_results(
276
+ action_result_json: str,
277
+ extract_selectors: Optional[list] = None,
278
+ extract_container: Optional[str] = None,
279
+ extract_fields: Optional[list] = None,
280
+ extract_selector_type: Optional[str] = None,
281
+ extract_wait_visible: bool = False,
282
+ extract_timeout: int = 10,
283
+ extract_max_items: Optional[int] = None,
284
+ extract_discover: bool = False,
285
+ extract_wait_content: Optional[dict] = None,
286
+ ) -> str:
287
+ """
288
+ Helper to merge extraction results into action results.
289
+
290
+ If extraction parameters are provided, performs extraction and merges results
291
+ into the action result JSON before returning to _to_context_pack.
292
+
293
+ Args:
294
+ action_result_json: JSON result from the action
295
+ extract_selectors: Simple extraction selectors
296
+ extract_container: Container selector for structured extraction
297
+ extract_fields: Fields for structured extraction
298
+ extract_selector_type: Selector type for container
299
+ extract_wait_visible: Wait for elements to be visible
300
+ extract_timeout: Timeout for extraction
301
+ extract_max_items: Limit number of containers to extract
302
+ extract_discover: Discovery mode flag
303
+ extract_wait_content: Smart wait config for lazy-loaded content
304
+
305
+ Returns:
306
+ Merged JSON result with extraction data
307
+ """
308
+ import json as _json
309
+
310
+ # If no extraction parameters, return original result
311
+ if not extract_selectors and not extract_container:
312
+ return action_result_json
313
+
314
+ # Perform extraction
315
+ extraction_result_json = await extraction.extract_elements(
316
+ selectors=extract_selectors,
317
+ container_selector=extract_container,
318
+ fields=extract_fields,
319
+ selector_type=extract_selector_type,
320
+ wait_for_visible=extract_wait_visible,
321
+ timeout=extract_timeout,
322
+ max_items=extract_max_items,
323
+ discover_containers=extract_discover,
324
+ wait_for_content_loaded=extract_wait_content
325
+ )
326
+
327
+ # Parse both results
328
+ try:
329
+ action_result = _json.loads(action_result_json)
330
+ extraction_result = _json.loads(extraction_result_json)
331
+
332
+ # Merge extraction data into action result
333
+ # The extraction results will appear in the 'mixed' field via _to_context_pack
334
+ action_result['extraction'] = {
335
+ 'mode': extraction_result.get('mode'),
336
+ 'extracted_elements': extraction_result.get('extracted_elements'),
337
+ 'items': extraction_result.get('items'),
338
+ 'count': extraction_result.get('count')
339
+ }
340
+
341
+ return _json.dumps(action_result)
342
+ except Exception:
343
+ # If merge fails, return original result
344
+ return action_result_json
345
+ #endregion
346
+
347
+ #region Logging
348
+ logger.warning(f"mcp_browser_use from: {getattr(MBU, '__file__', '<namespace>')}")
349
+
350
+ #region FastMCP Initialization
351
+ mcp = FastMCP("mcp_browser_use")
352
+ #endregion
353
+
354
+ #region Tools -- Navigation
355
+ @mcp.tool()
356
+ @tool_envelope
357
+ @exclusive_browser_access
358
+ async def mcp_browser_use__start_browser(
359
+ return_mode: str = "outline",
360
+ cleaning_level: int = 2,
361
+ token_budget: int = 5_000,
362
+ ) -> str:
363
+ """
364
+ Start a browser session or open a new window in an existing session.
365
+
366
+ **Performance Recommendation**: Start with token_budget=1000 and cleaning_level=3
367
+ (aggressive cleaning) unless you need more content. This reduces token usage
368
+ significantly while preserving essential information.
369
+
370
+ Returns:
371
+ ContextPack JSON
372
+ """
373
+ result = await browser_management.start_browser()
374
+ return await _to_context_pack(
375
+ result_json=result,
376
+ return_mode=return_mode,
377
+ cleaning_level=cleaning_level,
378
+ token_budget=token_budget
379
+ )
380
+
381
+ @mcp.tool()
382
+ @tool_envelope
383
+ @exclusive_browser_access
384
+ @ensure_driver_ready
385
+ async def mcp_browser_use__navigate_to_url(
386
+ url: str,
387
+ wait_for: str = "load",
388
+ timeout_sec: int = 20,
389
+ return_mode: str = "outline",
390
+ cleaning_level: int = 2,
391
+ token_budget: int = 5_000,
392
+ text_offset: Optional[int] = None,
393
+ html_offset: Optional[int] = None,
394
+ # Optional extraction parameters
395
+ extract_selectors: Optional[list] = None,
396
+ extract_container: Optional[str] = None,
397
+ extract_fields: Optional[list] = None,
398
+ extract_selector_type: Optional[str] = None,
399
+ extract_wait_visible: bool = False,
400
+ extract_timeout: int = 10,
401
+ extract_max_items: Optional[int] = None,
402
+ extract_discover: bool = False,
403
+ ) -> str:
404
+ """
405
+ MCP tool: Navigate the current tab to the given URL and return a ContextPack snapshot.
406
+
407
+ Loads the specified URL in the active window/tab and waits for the main document
408
+ to be ready before capturing the snapshot.
409
+
410
+ **Performance Recommendation**: Use token_budget=1000-2000 and cleaning_level=3
411
+ (aggressive) by default. Only increase token_budget or decrease cleaning_level
412
+ if you're missing critical information. Most pages work well with 1000 tokens
413
+ and aggressive cleaning, which removes ads, scripts, and non-content elements.
414
+
415
+ Args:
416
+ url: Absolute URL to navigate to (e.g., "https://example.com").
417
+ wait_for: Wait condition - "load" (default) or "complete".
418
+ timeout_sec: Maximum time (seconds) to wait for navigation readiness. Waits for asynchronous data to load.
419
+ Some websites are very slow and require a longer timeout of about 90 seconds.
420
+ return_mode: Controls the content type in the ContextPack snapshot. One of
421
+ {"outline", "text", "html", "dompaths", "mixed"}.
422
+ **Recommendation**: Use "outline" for navigation, "text" for content extraction.
423
+ cleaning_level: Structural/content cleaning intensity for snapshot rendering.
424
+ 0 = none, 1 = light, 2 = default, 3 = aggressive.
425
+ **Recommendation**: Start with 3 (aggressive) to minimize tokens.
426
+ token_budget: Approximate token cap for the returned snapshot. Should usually be 5_000 or lower.
427
+ **Recommendation**: Start with 1000-2000, only increase if needed.
428
+ text_offset: Optional character offset to start text extraction (for pagination).
429
+ Only applies when return_mode="text".
430
+ Example: Use text_offset=10000 to skip the first 10,000 characters.
431
+ html_offset: Optional character offset to start HTML extraction (for pagination).
432
+ Only applies when return_mode="html".
433
+ Example: Use html_offset=50000 to skip the first 50,000 characters of cleaned HTML.
434
+ Note: Offset is applied AFTER cleaning_level processing but BEFORE token_budget truncation.
435
+
436
+ extract_selectors: [OPTIONAL EXTRACTION] Simple extraction selectors (MODE 1).
437
+ See extract_elements tool for format.
438
+ extract_container: [OPTIONAL EXTRACTION] Container selector for structured extraction (MODE 2).
439
+ extract_fields: [OPTIONAL EXTRACTION] Field extractors for structured extraction (MODE 2).
440
+ extract_selector_type: [OPTIONAL EXTRACTION] Selector type for container (auto-detects if None).
441
+ extract_wait_visible: [OPTIONAL EXTRACTION] Wait for containers to be visible.
442
+ extract_timeout: [OPTIONAL EXTRACTION] Timeout for extraction in seconds.
443
+
444
+ Returns:
445
+ str: JSON-serialized ContextPack with post-navigation snapshot.
446
+
447
+ Raises:
448
+ TimeoutError: If the page fails to load within `timeout`.
449
+ ValueError: If `url` is invalid or `return_mode` is invalid.
450
+ RuntimeError: If the browser/driver is not ready.
451
+
452
+ Notes:
453
+ - The snapshot reflects the DOM after the initial load. If the site performs
454
+ heavy client-side hydration, consider waiting for a specific element with
455
+ `wait_for_element` before subsequent actions.
456
+ - **Pagination Strategy for Large Pages:**
457
+ When dealing with pages that exceed token limits, use offset parameters to paginate:
458
+
459
+ 1. First call: Set return_mode="html", token_budget=50000, no offset
460
+ - Check response for `hard_capped=true` to detect truncation
461
+
462
+ 2. Subsequent calls: Use html_offset to continue from where you left off
463
+ - Example: html_offset=200000 (50000 tokens * 4 chars/token)
464
+ - Continue until you receive less content than token_budget
465
+
466
+ 3. For TEXT mode pagination, use text_offset with return_mode="text"
467
+
468
+ - **Important:** The offset is applied to the cleaned HTML (after removing scripts,
469
+ styles, and noise), not the raw HTML. This means you're paginating through
470
+ content-rich HTML only.
471
+
472
+ - **Token Budget Interaction:**
473
+ - Cleaning happens first (scripts/styles/noise removed)
474
+ - Then html_offset is applied (skip first N chars)
475
+ - Finally token_budget truncates the remaining content
476
+
477
+ - **Use Cases:**
478
+ - Product catalogs with 1000+ items
479
+ - Long documentation pages
480
+ - Search results with many pages loaded via infinite scroll
481
+ - Large data tables with 10,000+ rows
482
+
483
+ - **Extraction Integration:**
484
+ You can optionally extract data immediately after navigation by providing
485
+ extraction parameters. This combines navigation + extraction in a single call:
486
+
487
+ Example:
488
+ navigate_to_url(
489
+ url="https://example.com/products",
490
+ extract_container="article.product-item",
491
+ extract_fields=[
492
+ {"field_name": "product_name", "selector": "h3.title"},
493
+ {"field_name": "price", "selector": ".price", "regex": "[0-9,.]+"}
494
+ ]
495
+ )
496
+
497
+ This is more efficient than calling navigate followed by extract_elements separately.
498
+ """
499
+ result = await navigation.navigate_to_url(url=url, wait_for=wait_for, timeout_sec=timeout_sec)
500
+
501
+ # Merge extraction results if extraction parameters provided
502
+ result = await _merge_extraction_results(
503
+ action_result_json=result,
504
+ extract_selectors=extract_selectors,
505
+ extract_container=extract_container,
506
+ extract_fields=extract_fields,
507
+ extract_selector_type=extract_selector_type,
508
+ extract_wait_visible=extract_wait_visible,
509
+ extract_timeout=extract_timeout,
510
+ extract_max_items=extract_max_items,
511
+ extract_discover=extract_discover
512
+ )
513
+
514
+ return await _to_context_pack(
515
+ result_json=result,
516
+ return_mode=return_mode,
517
+ cleaning_level=cleaning_level,
518
+ token_budget=token_budget,
519
+ text_offset=text_offset,
520
+ html_offset=html_offset
521
+ )
522
+
523
+ @mcp.tool()
524
+ @tool_envelope
525
+ @exclusive_browser_access
526
+ @ensure_driver_ready
527
+ async def mcp_browser_use__fill_text(
528
+ selector: str,
529
+ text: str,
530
+ selector_type: str = "css",
531
+ clear_first: bool = True,
532
+ timeout: float = 10.0,
533
+ iframe_selector: Optional[str] = None,
534
+ iframe_selector_type: str = "css",
535
+ shadow_root_selector: Optional[str] = None,
536
+ shadow_root_selector_type: str = "css",
537
+ return_mode: str = "outline",
538
+ cleaning_level: int = 2,
539
+ token_budget: int = 5_000,
540
+ text_offset: Optional[int] = None,
541
+ html_offset: Optional[int] = None,
542
+ # Optional extraction parameters
543
+ extract_selectors: Optional[list] = None,
544
+ extract_container: Optional[str] = None,
545
+ extract_fields: Optional[list] = None,
546
+ extract_selector_type: Optional[str] = None,
547
+ extract_wait_visible: bool = False,
548
+ extract_timeout: int = 10,
549
+ extract_max_items: Optional[int] = None,
550
+ extract_discover: bool = False,
551
+ ) -> str:
552
+ """
553
+ MCP tool: Set the value of an input/textarea and return a ContextPack snapshot.
554
+
555
+ Focuses the target element, optionally clears existing content, and inserts `text`.
556
+
557
+ **Performance Recommendation**: Use token_budget=1000 and cleaning_level=3
558
+ for most form fills. This is sufficient to verify the action succeeded.
559
+
560
+ Args:
561
+ selector: Element locator (CSS or XPath).
562
+ text: The exact text to set.
563
+ selector_type: One of {"css", "xpath"}.
564
+ clear_first: If True, clear any existing value before typing.
565
+ click_to_focus: If True, click the element to focus before typing.
566
+ timeout: Maximum time (seconds) to locate and interact with the element.
567
+ iframe_selector: Optional iframe locator containing the element.
568
+ iframe_selector_type: One of {"css", "xpath"}.
569
+ shadow_root_selector: Optional shadow root host locator.
570
+ shadow_root_selector_type: One of {"css", "xpath"}.
571
+ return_mode: Snapshot content type {"outline","text","html","dompaths","mixed"}.
572
+ cleaning_level: Structural/content cleaning intensity (0–3).
573
+ token_budget: Approximate token cap for the returned snapshot. Should usually be 5_000 or lower.
574
+
575
+ extract_selectors: [OPTIONAL EXTRACTION] Simple extraction selectors (MODE 1).
576
+ extract_container: [OPTIONAL EXTRACTION] Container selector for structured extraction (MODE 2).
577
+ extract_fields: [OPTIONAL EXTRACTION] Field extractors for structured extraction (MODE 2).
578
+ extract_selector_type: [OPTIONAL EXTRACTION] Selector type for container (auto-detects if None).
579
+ extract_wait_visible: [OPTIONAL EXTRACTION] Wait for containers to be visible.
580
+ extract_timeout: [OPTIONAL EXTRACTION] Timeout for extraction in seconds.
581
+
582
+ Returns:
583
+ str: JSON-serialized ContextPack with post-input snapshot.
584
+
585
+ Raises:
586
+ TimeoutError: If the element is not ready within `timeout`.
587
+ LookupError: If the selector cannot be resolved.
588
+ ValueError: If `selector_type` or `return_mode` is invalid.
589
+ RuntimeError: If the browser/driver is not ready.
590
+
591
+ Notes:
592
+ - Use `send_keys` for complex sequences or special keys.
593
+ - For masked inputs or JS-only fields, consider `force_js` variants if available.
594
+ - **Extraction Integration**: Useful for extracting search results after submitting
595
+ a search query (e.g., fill search box + extract results).
596
+ """
597
+ result = await interaction.fill_text(
598
+ selector=selector,
599
+ text=text,
600
+ selector_type=selector_type,
601
+ clear_first=clear_first,
602
+ timeout=timeout,
603
+ iframe_selector=iframe_selector,
604
+ iframe_selector_type=iframe_selector_type,
605
+ shadow_root_selector=shadow_root_selector,
606
+ shadow_root_selector_type=shadow_root_selector_type,
607
+ )
608
+
609
+ # Merge extraction results if extraction parameters provided
610
+ result = await _merge_extraction_results(
611
+ action_result_json=result,
612
+ extract_selectors=extract_selectors,
613
+ extract_container=extract_container,
614
+ extract_fields=extract_fields,
615
+ extract_selector_type=extract_selector_type,
616
+ extract_wait_visible=extract_wait_visible,
617
+ extract_timeout=extract_timeout,
618
+ extract_max_items=extract_max_items,
619
+ extract_discover=extract_discover
620
+ )
621
+
622
+ return await _to_context_pack(
623
+ result_json=result,
624
+ return_mode=return_mode,
625
+ cleaning_level=cleaning_level,
626
+ token_budget=token_budget,
627
+ text_offset=text_offset,
628
+ html_offset=html_offset
629
+ )
630
+
631
+ @mcp.tool()
632
+ @tool_envelope
633
+ @exclusive_browser_access
634
+ @ensure_driver_ready
635
+ async def mcp_browser_use__click_element(
636
+ selector: str,
637
+ selector_type: str = "css",
638
+ timeout: float = 10.0,
639
+ force_js: bool = False,
640
+ iframe_selector: Optional[str] = None,
641
+ iframe_selector_type: str = "css",
642
+ shadow_root_selector: Optional[str] = None,
643
+ shadow_root_selector_type: str = "css",
644
+ return_mode: str = "outline",
645
+ cleaning_level: int = 2,
646
+ token_budget: int = 5_000,
647
+ text_offset: Optional[int] = None,
648
+ html_offset: Optional[int] = None,
649
+ # Optional extraction parameters
650
+ extract_selectors: Optional[list] = None,
651
+ extract_container: Optional[str] = None,
652
+ extract_fields: Optional[list] = None,
653
+ extract_selector_type: Optional[str] = None,
654
+ extract_wait_visible: bool = False,
655
+ extract_timeout: int = 10,
656
+ extract_max_items: Optional[int] = None,
657
+ extract_discover: bool = False,
658
+ ) -> str:
659
+ """
660
+ MCP tool: Click an element (optionally inside an iframe or shadow root) and return a snapshot.
661
+
662
+ Attempts a native WebDriver click by default; optionally falls back to JS-based click
663
+ if `force_js` is True or native click is not possible.
664
+
665
+ **Performance Recommendation**: Use token_budget=1000 and cleaning_level=3.
666
+ After clicking, you typically only need to verify the action succeeded.
667
+
668
+ Args:
669
+ selector: Element locator (CSS or XPath).
670
+ selector_type: How to interpret `selector`. One of {"css", "xpath"}.
671
+ timeout: Maximum time (seconds) to locate a clickable element.
672
+ force_js: If True, use JavaScript-based click instead of native click.
673
+ iframe_selector: Optional locator of an iframe that contains the target element.
674
+ iframe_selector_type: One of {"css", "xpath"}; applies to `iframe_selector`.
675
+ shadow_root_selector: Optional locator whose shadowRoot contains the target element.
676
+ shadow_root_selector_type: One of {"css", "xpath"}; applies to `shadow_root_selector`.
677
+ return_mode: Controls the content type in the ContextPack snapshot.
678
+ {"outline", "text", "html", "dompaths", "mixed"}.
679
+ cleaning_level: Structural/content cleaning intensity (0–3).
680
+ token_budget: Approximate token cap for the returned snapshot. Should usually be 5_000 or lower.
681
+
682
+ extract_selectors: [OPTIONAL EXTRACTION] Simple extraction selectors (MODE 1).
683
+ extract_container: [OPTIONAL EXTRACTION] Container selector for structured extraction (MODE 2).
684
+ extract_fields: [OPTIONAL EXTRACTION] Field extractors for structured extraction (MODE 2).
685
+ extract_selector_type: [OPTIONAL EXTRACTION] Selector type for container (auto-detects if None).
686
+ extract_wait_visible: [OPTIONAL EXTRACTION] Wait for containers to be visible.
687
+ extract_timeout: [OPTIONAL EXTRACTION] Timeout for extraction in seconds.
688
+
689
+ Returns:
690
+ str: JSON-serialized ContextPack with the snapshot after the click.
691
+
692
+ Raises:
693
+ TimeoutError: If the element is not clickable within `timeout`.
694
+ LookupError: If the selector cannot be resolved.
695
+ ValueError: If any selector_type is invalid or `return_mode` is invalid.
696
+ RuntimeError: If the browser/driver is not ready.
697
+
698
+ Notes:
699
+ - If both `iframe_selector` and `shadow_root_selector` are provided, the function
700
+ will first resolve the iframe context, then the shadow root context.
701
+ - Some sites block native clicks; `force_js=True` can bypass those cases, but
702
+ it may not trigger all browser-level side effects (e.g., focus).
703
+ - **Extraction Integration**: Useful for extracting data after clicking (e.g.,
704
+ clicking "Show More" and extracting newly loaded products).
705
+ """
706
+ result = await interaction.click_element(
707
+ selector=selector,
708
+ selector_type=selector_type,
709
+ timeout=timeout,
710
+ force_js=force_js,
711
+ iframe_selector=iframe_selector,
712
+ iframe_selector_type=iframe_selector_type,
713
+ shadow_root_selector=shadow_root_selector,
714
+ shadow_root_selector_type=shadow_root_selector_type,
715
+ )
716
+
717
+ # Merge extraction results if extraction parameters provided
718
+ result = await _merge_extraction_results(
719
+ action_result_json=result,
720
+ extract_selectors=extract_selectors,
721
+ extract_container=extract_container,
722
+ extract_fields=extract_fields,
723
+ extract_selector_type=extract_selector_type,
724
+ extract_wait_visible=extract_wait_visible,
725
+ extract_timeout=extract_timeout,
726
+ extract_max_items=extract_max_items,
727
+ extract_discover=extract_discover
728
+ )
729
+
730
+ return await _to_context_pack(
731
+ result_json=result,
732
+ return_mode=return_mode,
733
+ cleaning_level=cleaning_level,
734
+ token_budget=token_budget,
735
+ text_offset=text_offset,
736
+ html_offset=html_offset
737
+ )
738
+
739
+ @mcp.tool()
740
+ @tool_envelope
741
+ @exclusive_browser_access
742
+ @ensure_driver_ready
743
+ async def mcp_browser_use__take_screenshot(
744
+ screenshot_path: Optional[str] = None,
745
+ return_base64: bool = False,
746
+ return_snapshot: bool = False,
747
+ thumbnail_width: Optional[int] = None,
748
+ return_mode: str = "outline",
749
+ cleaning_level: int = 2,
750
+ token_budget: int = 5_000,
751
+ text_offset: Optional[int] = None,
752
+ html_offset: Optional[int] = None,
753
+ ) -> str:
754
+ result = await screenshots.take_screenshot(
755
+ screenshot_path=screenshot_path,
756
+ return_base64=return_base64,
757
+ return_snapshot=return_snapshot,
758
+ thumbnail_width=thumbnail_width,
759
+ )
760
+ return await _to_context_pack(
761
+ result_json=result,
762
+ return_mode=return_mode,
763
+ cleaning_level=cleaning_level,
764
+ token_budget=token_budget,
765
+ text_offset=text_offset,
766
+ html_offset=html_offset
767
+ )
768
+ #endregion
769
+
770
+ #region Tools -- Debugging
771
+ @mcp.tool()
772
+ @tool_envelope
773
+ async def mcp_browser_use__get_debug_diagnostics_info(
774
+ return_mode: str = "outline",
775
+ cleaning_level: int = 2,
776
+ token_budget: int = 5_000,
777
+ text_offset: Optional[int] = None,
778
+ html_offset: Optional[int] = None,
779
+ ) -> str:
780
+ """
781
+ MCP tool: Collect driver/browser diagnostics and return a ContextPack.
782
+
783
+ Captures diagnostics such as driver session info, user agent, window size, active
784
+ targets, and other implementation-specific debug fields. Diagnostics are included
785
+ in the ContextPack's auxiliary section (e.g., `mixed.diagnostics`).
786
+
787
+ **Performance Recommendation**: Use token_budget=500 and cleaning_level=3.
788
+ Diagnostic info is typically metadata, not content.
789
+
790
+ Args:
791
+ return_mode: Snapshot content type {"outline","text","html","dompaths","mixed"}.
792
+ cleaning_level: Structural/content cleaning intensity (0–3).
793
+ token_budget: Approximate token cap for the returned snapshot. Should usually be 5_000 or lower.
794
+
795
+ Returns:
796
+ str: JSON-serialized ContextPack including diagnostics in `mixed`.
797
+
798
+ Raises:
799
+ RuntimeError: If diagnostics cannot be collected.
800
+ ValueError: If `return_mode` is invalid.
801
+
802
+ Notes:
803
+ - Useful for troubleshooting issues such as stale sessions, blocked popups,
804
+ or failed navigation. Avoid exposing sensitive values in logs.
805
+ """
806
+ diagnostics = await debugging.get_debug_diagnostics_info()
807
+ return await _to_context_pack(
808
+ result_json=diagnostics,
809
+ return_mode=return_mode,
810
+ cleaning_level=cleaning_level,
811
+ token_budget=token_budget,
812
+ text_offset=text_offset,
813
+ html_offset=html_offset
814
+ )
815
+
816
+ @mcp.tool()
817
+ @tool_envelope
818
+ @exclusive_browser_access
819
+ @ensure_driver_ready
820
+ async def mcp_browser_use__debug_element(
821
+ selector: str,
822
+ selector_type: str = "css",
823
+ timeout: float = 10.0,
824
+ iframe_selector: Optional[str] = None,
825
+ iframe_selector_type: str = "css",
826
+ shadow_root_selector: Optional[str] = None,
827
+ shadow_root_selector_type: str = "css",
828
+ max_html_length: int = 5000,
829
+ include_html: bool = True,
830
+ return_mode: str = "outline",
831
+ cleaning_level: int = 2,
832
+ token_budget: int = 5_000,
833
+ text_offset: Optional[int] = None,
834
+ html_offset: Optional[int] = None,
835
+ ) -> str:
836
+ result = await debugging.debug_element(
837
+ selector=selector,
838
+ selector_type=selector_type,
839
+ timeout=timeout,
840
+ iframe_selector=iframe_selector,
841
+ iframe_selector_type=iframe_selector_type,
842
+ shadow_root_selector=shadow_root_selector,
843
+ shadow_root_selector_type=shadow_root_selector_type,
844
+ max_html_length=max_html_length,
845
+ include_html=include_html,
846
+ )
847
+ return await _to_context_pack(
848
+ result_json=result,
849
+ return_mode=return_mode,
850
+ cleaning_level=cleaning_level,
851
+ token_budget=token_budget,
852
+ text_offset=text_offset,
853
+ html_offset=html_offset
854
+ )
855
+ #endregion
856
+
857
+ #region Tools -- Session management
858
+ @mcp.tool()
859
+ @tool_envelope
860
+ @exclusive_browser_access
861
+ async def mcp_browser_use__unlock_browser() -> str:
862
+ unlock_browser_info = await browser_management.unlock_browser()
863
+ return unlock_browser_info
864
+
865
+ @mcp.tool()
866
+ @tool_envelope
867
+ @exclusive_browser_access
868
+ async def mcp_browser_use__close_browser() -> str:
869
+ """
870
+ This is a very destructive action and should not be done without explicit request from the user!
871
+
872
+ Simply do not use this! It is very bad!
873
+ """
874
+ close_browser_info = await browser_management.close_browser()
875
+ return close_browser_info
876
+
877
+ @mcp.tool()
878
+ @tool_envelope
879
+ async def mcp_browser_use__force_close_all_chrome() -> str:
880
+ """
881
+ Force close all Chrome processes and clean up all state.
882
+
883
+ This is a very destructive action and should not be done without explicit request from the user!
884
+
885
+ Simply do not use this! It is very bad!
886
+
887
+ All other agents working with this MCP will be affected by this action.
888
+
889
+ Use this to recover from stuck Chrome instances or when normal close_browser fails.
890
+ This will:
891
+ - Quit the Selenium driver
892
+ - Kill all Chrome processes using the MCP profile
893
+ - Clean up lock files and global state
894
+
895
+ Returns:
896
+ str: JSON with status, killed process IDs, and any errors encountered
897
+ """
898
+ return await browser_management.force_close_all_chrome()
899
+ #endregion
900
+
901
+ #region Tools -- Page interaction
902
+ @mcp.tool()
903
+ @tool_envelope
904
+ @exclusive_browser_access
905
+ @ensure_driver_ready
906
+ async def mcp_browser_use__scroll(
907
+ x: int = 0,
908
+ y: int = 0,
909
+ return_mode: str = "outline",
910
+ cleaning_level: int = 2,
911
+ token_budget: int = 100,
912
+ text_offset: Optional[int] = None,
913
+ html_offset: Optional[int] = None,
914
+ # Optional extraction parameters
915
+ extract_selectors: Optional[list] = None,
916
+ extract_container: Optional[str] = None,
917
+ extract_fields: Optional[list] = None,
918
+ extract_selector_type: Optional[str] = None,
919
+ extract_wait_visible: bool = False,
920
+ extract_timeout: int = 10,
921
+ extract_max_items: Optional[int] = None,
922
+ extract_discover: bool = False,
923
+ ) -> str:
924
+ """
925
+ MCP tool: Scroll the page or bring an element into view, then return a snapshot.
926
+
927
+ If `selector` is provided, the element is scrolled into view. Otherwise the viewport
928
+ is scrolled by the given pixel deltas (`dx`, `dy`).
929
+
930
+ **Performance Recommendation**: Use token_budget=500-1000 and cleaning_level=3.
931
+ Scrolling typically reveals limited new content that needs minimal tokens.
932
+
933
+ Args:
934
+ dx: Horizontal pixels to scroll (+right / -left) when no selector is given.
935
+ dy: Vertical pixels to scroll (+down / -up) when no selector is given.
936
+ selector: Optional element to scroll into view instead of pixel-based scroll.
937
+ selector_type: One of {"css", "xpath"}; applies to `selector`.
938
+ smooth: If True, perform a smooth scroll animation (if supported).
939
+ timeout: Maximum time (seconds) to locate the `selector` when provided.
940
+ return_mode: Snapshot content type {"outline","text","html","dompaths","mixed"}.
941
+ cleaning_level: Structural/content cleaning intensity (0–3).
942
+ token_budget: Optional approximate token cap for the returned snapshot. It is generally advisable to set a very low token budget when scrolling.
943
+
944
+ extract_selectors: [OPTIONAL EXTRACTION] Simple extraction selectors (MODE 1).
945
+ extract_container: [OPTIONAL EXTRACTION] Container selector for structured extraction (MODE 2).
946
+ extract_fields: [OPTIONAL EXTRACTION] Field extractors for structured extraction (MODE 2).
947
+ extract_selector_type: [OPTIONAL EXTRACTION] Selector type for container (auto-detects if None).
948
+ extract_wait_visible: [OPTIONAL EXTRACTION] Wait for containers to be visible.
949
+ extract_timeout: [OPTIONAL EXTRACTION] Timeout for extraction in seconds.
950
+
951
+ Returns:
952
+ str: JSON-serialized ContextPack with post-scroll snapshot.
953
+
954
+ Raises:
955
+ TimeoutError: If `selector` is provided but not found within `timeout`.
956
+ ValueError: If `selector_type` or `return_mode` is invalid.
957
+ RuntimeError: If the browser/driver is not ready.
958
+
959
+ Notes:
960
+ - Some sticky headers may cover targets scrolled into view; consider an offset
961
+ if your implementation supports it.
962
+ - **Extraction Integration**: Useful for infinite scroll pages where you scroll
963
+ and extract newly loaded items. Example:
964
+ scroll(y=1000, extract_container="article.product", extract_fields=[...])
965
+ """
966
+ result = await navigation.scroll(x=x, y=y)
967
+
968
+ # Merge extraction results if extraction parameters provided
969
+ result = await _merge_extraction_results(
970
+ action_result_json=result,
971
+ extract_selectors=extract_selectors,
972
+ extract_container=extract_container,
973
+ extract_fields=extract_fields,
974
+ extract_selector_type=extract_selector_type,
975
+ extract_wait_visible=extract_wait_visible,
976
+ extract_timeout=extract_timeout,
977
+ extract_max_items=extract_max_items,
978
+ extract_discover=extract_discover
979
+ )
980
+
981
+ return await _to_context_pack(
982
+ result_json=result,
983
+ return_mode=return_mode,
984
+ cleaning_level=cleaning_level,
985
+ token_budget=token_budget,
986
+ text_offset=text_offset,
987
+ html_offset=html_offset
988
+ )
989
+
990
+ @mcp.tool()
991
+ @tool_envelope
992
+ @exclusive_browser_access
993
+ @ensure_driver_ready
994
+ async def mcp_browser_use__send_keys(
995
+ key: str,
996
+ selector: Optional[str] = None,
997
+ selector_type: str = "css",
998
+ timeout: float = 10.0,
999
+ return_mode: str = "outline",
1000
+ cleaning_level: int = 2,
1001
+ token_budget: int = 1_000,
1002
+ text_offset: Optional[int] = None,
1003
+ html_offset: Optional[int] = None,
1004
+ ) -> str:
1005
+ """
1006
+ MCP tool: Send key strokes to an element and return a ContextPack snapshot.
1007
+
1008
+ Useful for submitting forms (e.g., Enter) or sending special keys (e.g., Tab, Escape).
1009
+
1010
+ Args:
1011
+ selector: Element locator (CSS or XPath).
1012
+ keys: A string or list of key tokens to send. Special keys can be supported by
1013
+ name (e.g., "ENTER", "TAB", "ESCAPE") depending on implementation.
1014
+ selector_type: One of {"css", "xpath"}.
1015
+ timeout: Maximum time (seconds) to locate and focus the element.
1016
+ iframe_selector: Optional iframe locator containing the element.
1017
+ iframe_selector_type: One of {"css", "xpath"}.
1018
+ shadow_root_selector: Optional shadow root host locator.
1019
+ shadow_root_selector_type: One of {"css", "xpath"}.
1020
+ return_mode: Snapshot content type {"outline","text","html","dompaths","mixed"}.
1021
+ cleaning_level: Structural/content cleaning intensity (0–3).
1022
+ token_budget: Approximate token cap for the returned snapshot. Should usually be 5_000 or lower.
1023
+
1024
+ Returns:
1025
+ str: JSON-serialized ContextPack with snapshot after key events.
1026
+
1027
+ Raises:
1028
+ TimeoutError: If the element is not ready within `timeout`.
1029
+ LookupError: If the selector cannot be resolved.
1030
+ ValueError: If `selector_type` or `return_mode` is invalid.
1031
+ RuntimeError: If the browser/driver is not ready.
1032
+
1033
+ Notes:
1034
+ - Combine with `wait_for_element` to ensure predictable post-typing state.
1035
+ """
1036
+ result = await interaction.send_keys(
1037
+ key=key,
1038
+ selector=selector,
1039
+ selector_type=selector_type,
1040
+ timeout=timeout,
1041
+ )
1042
+ return await _to_context_pack(
1043
+ result_json=result,
1044
+ return_mode=return_mode,
1045
+ cleaning_level=cleaning_level,
1046
+ token_budget=token_budget,
1047
+ text_offset=text_offset,
1048
+ html_offset=html_offset
1049
+ )
1050
+
1051
+ @mcp.tool()
1052
+ @tool_envelope
1053
+ @exclusive_browser_access
1054
+ @ensure_driver_ready
1055
+ async def mcp_browser_use__wait_for_element(
1056
+ selector: str,
1057
+ selector_type: str = "css",
1058
+ timeout: float = 10.0,
1059
+ condition: str = "visible",
1060
+ iframe_selector: Optional[str] = None,
1061
+ iframe_selector_type: str = "css",
1062
+ return_mode: str = "outline",
1063
+ cleaning_level: int = 2,
1064
+ token_budget: int = 1_000,
1065
+ text_offset: Optional[int] = None,
1066
+ html_offset: Optional[int] = None,
1067
+ ) -> str:
1068
+ """
1069
+ MCP tool: Wait for an element to appear (and optionally be visible) and return a snapshot.
1070
+
1071
+ Polls for the presence of the element and (if `visible=True`) a visible display state.
1072
+
1073
+ Args:
1074
+ selector: Element locator (CSS or XPath).
1075
+ selector_type: One of {"css", "xpath"}.
1076
+ visible: If True, require that the element is visible (not just present).
1077
+ timeout: Maximum time (seconds) to wait.
1078
+ iframe_selector: Optional iframe locator containing the element.
1079
+ iframe_selector_type: One of {"css", "xpath"}.
1080
+ shadow_root_selector: Optional shadow root host locator.
1081
+ shadow_root_selector_type: One of {"css", "xpath"}.
1082
+ return_mode: Snapshot content type {"outline","text","html","dompaths","mixed"}.
1083
+ cleaning_level: Structural/content cleaning intensity (0–3).
1084
+ token_budget: Approximate token cap for the returned snapshot. Should usually be 5_000 or lower.
1085
+
1086
+ Returns:
1087
+ str: JSON-serialized ContextPack capturing the page after the wait condition.
1088
+
1089
+ Raises:
1090
+ TimeoutError: If the condition is not met within `timeout`.
1091
+ LookupError: If the selector context cannot be resolved.
1092
+ ValueError: If `selector_type` or `return_mode` is invalid.
1093
+ RuntimeError: If the browser/driver is not ready.
1094
+ """
1095
+ result = await interaction.wait_for_element(
1096
+ selector=selector,
1097
+ selector_type=selector_type,
1098
+ timeout=timeout,
1099
+ condition=condition,
1100
+ iframe_selector=iframe_selector,
1101
+ iframe_selector_type=iframe_selector_type,
1102
+ )
1103
+ return await _to_context_pack(
1104
+ result_json=result,
1105
+ return_mode=return_mode,
1106
+ cleaning_level=cleaning_level,
1107
+ token_budget=token_budget,
1108
+ text_offset=text_offset,
1109
+ html_offset=html_offset
1110
+ )
1111
+
1112
+ @mcp.tool()
1113
+ @tool_envelope
1114
+ @exclusive_browser_access
1115
+ @ensure_driver_ready
1116
+ async def mcp_browser_use__extract_elements(
1117
+ selectors: Optional[list] = None,
1118
+ container_selector: Optional[str] = None,
1119
+ fields: Optional[list] = None,
1120
+ selector_type: Optional[str] = None,
1121
+ wait_for_visible: bool = False,
1122
+ extraction_timeout: int = 10,
1123
+ max_items: Optional[int] = None,
1124
+ discover_containers: bool = False,
1125
+ wait_for_content_loaded: Optional[dict] = None,
1126
+ return_mode: str = "outline",
1127
+ cleaning_level: int = 2,
1128
+ token_budget: int = 5_000,
1129
+ text_offset: Optional[int] = None,
1130
+ html_offset: Optional[int] = None,
1131
+ ) -> str:
1132
+ """
1133
+ MCP tool: Extract content from specific elements on the current page or get page snapshot.
1134
+
1135
+ This tool provides two extraction modes:
1136
+
1137
+ MODE 1: Simple Extraction (using 'selectors' parameter)
1138
+ - Extract individual elements with CSS/XPath
1139
+ - Returns list of extracted elements with optional field names
1140
+
1141
+ MODE 2: Structured Extraction (using 'container_selector' + 'fields' parameters)
1142
+ - Find multiple containers (e.g., all product items on a page)
1143
+ - Extract named fields from each container
1144
+ - Support attribute extraction (href, data-*, etc.)
1145
+ - Apply regex patterns to clean extracted values
1146
+ - Returns array of structured objects (perfect for product listings)
1147
+
1148
+ Args:
1149
+ selectors: [MODE 1] Optional list of selector specifications. Each dict contains:
1150
+ {
1151
+ "selector": str, # CSS selector or XPath expression (required)
1152
+ "type": str, # "css" or "xpath" (default: "css")
1153
+ "format": str, # "html" or "text" (default: "html")
1154
+ "name": str, # Optional: field name for the result
1155
+ "timeout": int, # Timeout in seconds (default: 10)
1156
+ "iframe_selector": str, # Optional: selector for parent iframe
1157
+ "iframe_type": str, # Optional: "css" or "xpath" for iframe
1158
+ "shadow_root_selector": str, # Optional: selector for shadow root host
1159
+ "shadow_root_type": str, # Optional: "css" or "xpath" for shadow root
1160
+ }
1161
+
1162
+ container_selector: [MODE 2] CSS or XPath selector for container elements
1163
+ Example: "article.product-item" or "//div[@class='product']"
1164
+
1165
+ fields: [MODE 2] List of field extractors, each dict contains:
1166
+ {
1167
+ "field_name": str, # Output field name (e.g., "price_net", "mpn")
1168
+ "selector": str, # CSS/XPath relative to container
1169
+ "selector_type": str, # "css" or "xpath" (default: "css")
1170
+ "attribute": str, # Optional: extract attribute (e.g., "href", "data-id")
1171
+ "regex": str, # Optional: regex to extract/clean value
1172
+ "fallback": str # Optional: fallback value if extraction fails
1173
+ }
1174
+
1175
+ selector_type: [MODE 2] Optional selector type for container_selector.
1176
+ If None, auto-detects from syntax:
1177
+ - Starts with "//" or "/" → xpath
1178
+ - Otherwise → css
1179
+ Note: Each field in 'fields' has its own selector_type inside the dict.
1180
+
1181
+ wait_for_visible: [MODE 2] Wait for containers to be visible before extracting
1182
+ extraction_timeout: [MODE 2] Timeout in seconds (default: 10s, capped at 5s for discovery)
1183
+
1184
+ max_items: [MODE 2] **NEW** Limit number of containers to extract (None = all).
1185
+ Prevents token explosions and enables testing.
1186
+ Recommended values:
1187
+ - 10 for testing selectors
1188
+ - 50-100 for production extraction
1189
+ Example: max_items=10 extracts only first 10 products
1190
+
1191
+ discover_containers: [MODE 2] **NEW** Discovery mode flag (default: False).
1192
+ When True, returns container analysis instead of full extraction:
1193
+ - Fast (~5s timeout)
1194
+ - Lightweight (~1K tokens)
1195
+ - Returns: count, sample_html, sample_text, common_child_selectors
1196
+ Use this to explore page structure before committing to full extraction
1197
+
1198
+ wait_for_content_loaded: [MODE 2] **NEW** Smart wait for lazy-loaded content (e.g., async prices).
1199
+ Essential for modern JavaScript-heavy sites (Vue.js/React/Angular) that load data
1200
+ asynchronously after initial page render. Dict with keys:
1201
+ {
1202
+ "selector": str, # CSS/XPath to check for loaded content (e.g., ".price")
1203
+ "min_percentage": int, # % of containers that must have content (default: 80)
1204
+ "timeout": int, # Max wait time in seconds (default: 60)
1205
+ "check_interval": int, # Seconds between checks (default: 5)
1206
+ "check_attribute": str, # Optional: attribute to check instead of text
1207
+ "min_length": int # Minimum text/attribute length to consider loaded (default: 1)
1208
+ }
1209
+ Polls periodically until min_percentage of containers have the specified content loaded.
1210
+ Results include _wait_metadata with timing and loading statistics.
1211
+
1212
+ return_mode: Snapshot content type {"outline","text","html","dompaths","mixed"}
1213
+ cleaning_level: Structural/content cleaning intensity (0–3)
1214
+ token_budget: Approximate token cap for the returned snapshot. Should usually be 5_000 or lower.
1215
+ text_offset: Optional character offset for text mode pagination
1216
+ html_offset: Optional character offset for html mode pagination
1217
+
1218
+ Returns:
1219
+ str: JSON-serialized ContextPack with extraction results in 'mixed' field:
1220
+
1221
+ MODE 1 Response:
1222
+ {
1223
+ "mixed": {
1224
+ "mode": "simple",
1225
+ "extracted_elements": [
1226
+ {"selector": "...", "found": true, "content": "...", "name": "price"},
1227
+ ...
1228
+ ]
1229
+ },
1230
+ "snapshot": {...},
1231
+ ...
1232
+ }
1233
+
1234
+ MODE 2 Response:
1235
+ {
1236
+ "mixed": {
1237
+ "mode": "structured",
1238
+ "items": [
1239
+ {"product_name": "Widget A", "mpn": "12345", "price_brutto": "99.99", ...},
1240
+ {"product_name": "Widget B", "mpn": "67890", "price_brutto": "149.99", ...}
1241
+ ],
1242
+ "count": 2
1243
+ },
1244
+ "snapshot": {...},
1245
+ ...
1246
+ }
1247
+
1248
+ Examples:
1249
+ # MODE 1: Simple extraction with named fields
1250
+ selectors = [
1251
+ {"selector": "span.price", "type": "css", "format": "text", "name": "price"},
1252
+ {"selector": "div.stock", "type": "css", "format": "text", "name": "stock_status"}
1253
+ ]
1254
+
1255
+ # MODE 2A: Discovery mode (NEW!) - Find containers first
1256
+ container_selector = "article.product-item"
1257
+ discover_containers = True
1258
+ # Fast response with: count, sample_html, common_child_selectors
1259
+
1260
+ # MODE 2B: Test extraction - Small batch
1261
+ container_selector = "article.product-item"
1262
+ fields = [
1263
+ {"field_name": "product_name", "selector": "h3.product-title"},
1264
+ {"field_name": "price", "selector": ".price", "regex": r"[0-9,.]+"}
1265
+ ]
1266
+ max_items = 10 # Test on first 10 items only
1267
+
1268
+ # MODE 2C: Full extraction - With safety limit
1269
+ container_selector = "article.product-item"
1270
+ fields = [
1271
+ {"field_name": "product_name", "selector": "h3.product-title", "selector_type": "css"},
1272
+ {"field_name": "mpn", "selector": "span[data-mpn]", "attribute": "data-mpn"},
1273
+ {"field_name": "price_brutto", "selector": ".price-brutto", "regex": r"[0-9,.]+"},
1274
+ {"field_name": "availability", "selector": ".availability-status"},
1275
+ {"field_name": "url", "selector": "a.product-link", "attribute": "href"}
1276
+ ]
1277
+ max_items = 100 # Safety limit to prevent token explosion
1278
+ wait_for_visible = True
1279
+ extraction_timeout = 10 # Lowered from 45s
1280
+
1281
+ # Get current page state without extraction
1282
+ selectors = None, container_selector = None # Returns full page snapshot
1283
+
1284
+ Raises:
1285
+ ValueError: If selector specification is invalid
1286
+ RuntimeError: If the browser/driver is not ready
1287
+ TimeoutError: If containers not found within extraction_timeout
1288
+
1289
+ Notes:
1290
+ MODE 1:
1291
+ - Each selector processed independently; if one fails, others continue
1292
+ - Failed extractions return found=False with error message
1293
+ - Use "name" field to label extractions
1294
+ - Supports iframes and shadow DOM
1295
+
1296
+ MODE 2:
1297
+ - Ideal for scraping product listings, tables, search results
1298
+ - Extracts all matching containers (e.g., all products on page)
1299
+ - Fields extracted relative to each container
1300
+ - Regex patterns applied after extraction for cleaning
1301
+ - Attribute extraction for links, data-* attributes, etc.
1302
+ - Fallback values prevent missing fields
1303
+ - Returns structured array of objects ready for database insertion
1304
+
1305
+ **NEW WORKFLOW (Recommended):**
1306
+ 1. **Discover**: Use discover_containers=True to find correct selector (~5s, ~1K tokens)
1307
+ 2. **Test**: Use max_items=10 to validate extraction (~10s, ~5K tokens)
1308
+ 3. **Extract**: Use max_items=50-100 for production (~20s, controlled tokens)
1309
+
1310
+ This 3-step workflow prevents:
1311
+ - Wasting time on wrong selectors (45s timeouts)
1312
+ - Token explosions (41K+ tokens from bad selectors)
1313
+ - Context clogging from oversized responses
1314
+
1315
+ - Use return_mode="mixed" to see extraction results alongside page content
1316
+ - All extractions performed on current page state (no navigation)
1317
+ """
1318
+ result = await extraction.extract_elements(
1319
+ selectors=selectors,
1320
+ container_selector=container_selector,
1321
+ fields=fields,
1322
+ selector_type=selector_type,
1323
+ wait_for_visible=wait_for_visible,
1324
+ timeout=extraction_timeout,
1325
+ max_items=max_items,
1326
+ discover_containers=discover_containers,
1327
+ wait_for_content_loaded=wait_for_content_loaded
1328
+ )
1329
+ return await _to_context_pack(
1330
+ result_json=result,
1331
+ return_mode=return_mode,
1332
+ cleaning_level=cleaning_level,
1333
+ token_budget=token_budget,
1334
+ text_offset=text_offset,
1335
+ html_offset=html_offset
1336
+ )
1337
+ #endregion
1338
+
1339
+
1340
+
1341
+
1342
+ def main():
1343
+ """Main entry point for the MCP server."""
1344
+ mcp.run()
1345
+
1346
+ if __name__ == "__main__":
1347
+ main()