PyPI - browser-mcp-server - Versions diffs - 0.2.0__tar.gz - Mend

browser-mcp-server 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

browser_mcp_server-0.2.0/LICENSE +21 -0
browser_mcp_server-0.2.0/PKG-INFO +185 -0
browser_mcp_server-0.2.0/README.md +159 -0
browser_mcp_server-0.2.0/pyproject.toml +40 -0
browser_mcp_server-0.2.0/src/mcp_browser_use/__init__.py +6 -0
browser_mcp_server-0.2.0/src/mcp_browser_use/server.py +363 -0
browser_mcp_server-0.2.0/src/mcp_browser_use/tools.py +130 -0

browser_mcp_server-0.2.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Your Name
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

browser_mcp_server-0.2.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,185 @@
+Metadata-Version: 2.4
+Name: browser-mcp-server
+Version: 0.2.0
+Summary: MCP server that gives any LLM agent full browser control via browser-use. Auto-state after every action.
+Project-URL: Homepage, https://github.com/your-username/browser-mcp-server
+Project-URL: Repository, https://github.com/your-username/browser-mcp-server
+Project-URL: Issues, https://github.com/your-username/browser-mcp-server/issues
+Author-email: Your Name <you@example.com>
+License: MIT
+License-File: LICENSE
+Keywords: agent,automation,browser,browser-use,llm,mcp,playwright
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.10
+Requires-Dist: browser-use>=0.12.0
+Requires-Dist: mcp>=1.0.0
+Description-Content-Type: text/markdown
+# browser-mcp-server
+> Full browser control for any LLM agent via the [Model Context Protocol (MCP)](https://modelcontextprotocol.io).
+Powered by [browser-use](https://github.com/browser-use/browser-use) — a persistent, Playwright-backed browser session that your agent controls step-by-step.
+---
+## ✨ Key Feature: Zero State Calls
+Every action that changes the page (`open`, `click`, `input`, `type`, `keys`, `scroll`, `back`) **automatically returns the updated page state** in the same response. Your agent never wastes a round-trip calling `state` after an action — it's already there.
+---
+## Installation
+```bash
+pip install browser-mcp-server
+```
+> **Chromium is installed automatically on first run.** The server detects whether
+> the browser binary is present and downloads it if not (~170 MB, one-time).
+> You never need to run `playwright install chromium` manually.
+---
+## Quick Start
+### Claude Desktop
+Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
+```json
+{
+  "mcpServers": {
+    "browser-use": {
+      "command": "browser-mcp-server"
+    }
+  }
+}
+```
+### Cursor / Zed / Windsurf
+Add to your MCP settings:
+```json
+{
+  "mcpServers": {
+    "browser-use": {
+      "command": "browser-mcp-server"
+    }
+  }
+}
+```
+### nixagent (`mcp.json`)
+```json
+{
+  "mcpServers": {
+    "browser-use": {
+      "command": "browser-mcp-server",
+      "active": true
+    }
+  }
+}
+```
+```python
+from nixagent import Agent
+agent = Agent(
+    name="BrowserAgent",
+    system_prompt="You are a web browsing assistant.",
+    mcp_config_path="mcp.json"
+)
+agent.run("Go to stripe.com and tell me the pricing for the Starter plan.")
+```
+### HTTP / Remote Agents
+```bash
+# Modern streaming (recommended)
+browser-mcp-server --transport streamable-http --port 8080
+# Legacy SSE
+browser-mcp-server --transport sse --port 8080
+```
+---
+## Available Tools
+| Tool                       | Description                                               |
+| -------------------------- | --------------------------------------------------------- |
+| `browser_use_tool`       | Generic dispatcher — any browser-use command             |
+| `browser_open_tool`      | Open URL →**state auto-returned**                  |
+| `browser_click_tool`     | Click by index →**state auto-returned**            |
+| `browser_input_tool`     | Click + type (preferred) →**state auto-returned**  |
+| `browser_type_tool`      | Type into focused element →**state auto-returned** |
+| `browser_keys_tool`      | Send keyboard key →**state auto-returned**         |
+| `browser_scroll_tool`    | Scroll up/down →**state auto-returned**            |
+| `browser_back_tool`      | Navigate back →**state auto-returned**             |
+| `browser_state_tool`     | Explicit state fetch*(rarely needed)*                   |
+| `browser_get_text_tool`  | Extract element text                                      |
+| `browser_get_html_tool`  | Full/scoped page HTML                                     |
+| `browser_get_title_tool` | Page title                                                |
+| `browser_close_tool`     | Close all sessions*(call when done)*                    |
+---
+## How the Auto-State Works
+Traditional browser agents need two calls to interact:
+```
+1. agent calls: state         → get element indexes
+2. agent calls: click 3       → click
+3. agent calls: state         → get updated indexes  ← wasted call
+4. agent calls: input 7 "..." → type
+```
+With `browser-mcp-server`, every mutating action returns fresh state in its own response:
+```
+1. agent calls: open https://...  → navigated + state returned ✓
+2. agent calls: click 3           → clicked + updated state returned ✓
+3. agent calls: input 7 "..."     → typed + updated state returned ✓
+```
+---
+## CLI Reference
+```
+usage: browser-mcp-server [-h] [--transport {stdio,sse,streamable-http}]
+                        [--host HOST] [--port PORT]
+options:
+  --transport   stdio | sse | streamable-http  (default: stdio)
+  --host        host for HTTP transports       (default: 127.0.0.1)
+  --port        port for HTTP transports       (default: 8080)
+```
+---
+## Requirements
+- Python 3.10+
+- `browser-use >= 0.12.0`
+- `mcp >= 1.0.0`
+- Chromium (installed via `playwright install chromium`)
+---
+## License
+MIT

browser_mcp_server-0.2.0/README.md ADDED Viewed

@@ -0,0 +1,159 @@
+# browser-mcp-server
+> Full browser control for any LLM agent via the [Model Context Protocol (MCP)](https://modelcontextprotocol.io).
+Powered by [browser-use](https://github.com/browser-use/browser-use) — a persistent, Playwright-backed browser session that your agent controls step-by-step.
+---
+## ✨ Key Feature: Zero State Calls
+Every action that changes the page (`open`, `click`, `input`, `type`, `keys`, `scroll`, `back`) **automatically returns the updated page state** in the same response. Your agent never wastes a round-trip calling `state` after an action — it's already there.
+---
+## Installation
+```bash
+pip install browser-mcp-server
+```
+> **Chromium is installed automatically on first run.** The server detects whether
+> the browser binary is present and downloads it if not (~170 MB, one-time).
+> You never need to run `playwright install chromium` manually.
+---
+## Quick Start
+### Claude Desktop
+Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
+```json
+{
+  "mcpServers": {
+    "browser-use": {
+      "command": "browser-mcp-server"
+    }
+  }
+}
+```
+### Cursor / Zed / Windsurf
+Add to your MCP settings:
+```json
+{
+  "mcpServers": {
+    "browser-use": {
+      "command": "browser-mcp-server"
+    }
+  }
+}
+```
+### nixagent (`mcp.json`)
+```json
+{
+  "mcpServers": {
+    "browser-use": {
+      "command": "browser-mcp-server",
+      "active": true
+    }
+  }
+}
+```
+```python
+from nixagent import Agent
+agent = Agent(
+    name="BrowserAgent",
+    system_prompt="You are a web browsing assistant.",
+    mcp_config_path="mcp.json"
+)
+agent.run("Go to stripe.com and tell me the pricing for the Starter plan.")
+```
+### HTTP / Remote Agents
+```bash
+# Modern streaming (recommended)
+browser-mcp-server --transport streamable-http --port 8080
+# Legacy SSE
+browser-mcp-server --transport sse --port 8080
+```
+---
+## Available Tools
+| Tool                       | Description                                               |
+| -------------------------- | --------------------------------------------------------- |
+| `browser_use_tool`       | Generic dispatcher — any browser-use command             |
+| `browser_open_tool`      | Open URL →**state auto-returned**                  |
+| `browser_click_tool`     | Click by index →**state auto-returned**            |
+| `browser_input_tool`     | Click + type (preferred) →**state auto-returned**  |
+| `browser_type_tool`      | Type into focused element →**state auto-returned** |
+| `browser_keys_tool`      | Send keyboard key →**state auto-returned**         |
+| `browser_scroll_tool`    | Scroll up/down →**state auto-returned**            |
+| `browser_back_tool`      | Navigate back →**state auto-returned**             |
+| `browser_state_tool`     | Explicit state fetch*(rarely needed)*                   |
+| `browser_get_text_tool`  | Extract element text                                      |
+| `browser_get_html_tool`  | Full/scoped page HTML                                     |
+| `browser_get_title_tool` | Page title                                                |
+| `browser_close_tool`     | Close all sessions*(call when done)*                    |
+---
+## How the Auto-State Works
+Traditional browser agents need two calls to interact:
+```
+1. agent calls: state         → get element indexes
+2. agent calls: click 3       → click
+3. agent calls: state         → get updated indexes  ← wasted call
+4. agent calls: input 7 "..." → type
+```
+With `browser-mcp-server`, every mutating action returns fresh state in its own response:
+```
+1. agent calls: open https://...  → navigated + state returned ✓
+2. agent calls: click 3           → clicked + updated state returned ✓
+3. agent calls: input 7 "..."     → typed + updated state returned ✓
+```
+---
+## CLI Reference
+```
+usage: browser-mcp-server [-h] [--transport {stdio,sse,streamable-http}]
+                        [--host HOST] [--port PORT]
+options:
+  --transport   stdio | sse | streamable-http  (default: stdio)
+  --host        host for HTTP transports       (default: 127.0.0.1)
+  --port        port for HTTP transports       (default: 8080)
+```
+---
+## Requirements
+- Python 3.10+
+- `browser-use >= 0.12.0`
+- `mcp >= 1.0.0`
+- Chromium (installed via `playwright install chromium`)
+---
+## License
+MIT

browser_mcp_server-0.2.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,40 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "browser-mcp-server"
+version = "0.2.0"
+description = "MCP server that gives any LLM agent full browser control via browser-use. Auto-state after every action."
+readme = "README.md"
+license = { text = "MIT" }
+authors = [{ name = "Your Name", email = "you@example.com" }]
+keywords = ["mcp", "browser", "browser-use", "llm", "agent", "automation", "playwright"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+    "Topic :: Internet :: WWW/HTTP :: Browsers",
+]
+requires-python = ">=3.10"
+dependencies = [
+    "mcp>=1.0.0",
+    "browser-use>=0.12.0",
+]
+[project.urls]
+Homepage = "https://github.com/your-username/browser-mcp-server"
+Repository = "https://github.com/your-username/browser-mcp-server"
+Issues = "https://github.com/your-username/browser-mcp-server/issues"
+[project.scripts]
+browser-mcp-server = "mcp_browser_use.server:main"
+[tool.hatch.build.targets.wheel]
+packages = ["src/mcp_browser_use"]

browser_mcp_server-0.2.0/src/mcp_browser_use/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""mcp_browser_use — Browser automation MCP server powered by browser-use."""
+__version__ = "0.2.0"
+__all__ = ["create_server"]
+from .server import create_server

browser_mcp_server-0.2.0/src/mcp_browser_use/server.py ADDED Viewed

@@ -0,0 +1,363 @@
+"""
+mcp_browser_use/server.py
+──────────────────────────
+FastMCP server exposing the browser-use CLI as MCP tools.
+Transport options
+-----------------
+  stdio            — Claude Desktop, nixagent, Cursor, Zed, etc. (default)
+  streamable-http  — Remote / networked agents (recommended for HTTP)
+  sse              — Legacy HTTP server-sent events
+Usage
+-----
+  # stdio (most common)
+  mcp-browser-use
+  # HTTP
+  mcp-browser-use --transport streamable-http --port 8080
+Chromium auto-install
+---------------------
+On first run the server checks whether the Chromium binary is present.
+If it is missing it runs `playwright install chromium` automatically.
+Subsequent starts skip the download entirely (binary already on disk).
+"""
+import argparse
+import os
+import subprocess
+import sys
+from mcp.server.fastmcp import FastMCP
+from .tools import (
+    browser_use,
+    browser_open,
+    browser_state,
+    browser_click,
+    browser_input,
+    browser_type,
+    browser_keys,
+    browser_scroll,
+    browser_back,
+    browser_get_text,
+    browser_get_html,
+    browser_get_title,
+    browser_close,
+)
+# ── Chromium auto-install ─────────────────────────────────────────────────────
+def _ensure_chromium() -> None:
+    """
+    Check if the Playwright Chromium binary is present.
+    If it is missing, download it automatically via `playwright install chromium`.
+    This runs at most once — subsequent calls return immediately because
+    the binary is already on disk.
+    """
+    try:
+        from playwright.sync_api import sync_playwright  # noqa: PLC0415
+        with sync_playwright() as p:
+            executable = p.chromium.executable_path
+            if os.path.exists(executable):
+                return  # Already installed — nothing to do
+    except Exception:
+        pass  # playwright not importable or path check failed → fall through
+    # Binary missing — download it now
+    print(
+        "[mcp-browser-use] Chromium not found. Installing automatically "
+        "(one-time download, ~170 MB)...",
+        file=sys.stderr,
+    )
+    subprocess.run(
+        [sys.executable, "-m", "playwright", "install", "chromium"],
+        check=True,
+    )
+    print("[mcp-browser-use] Chromium installed successfully.", file=sys.stderr)
+def create_server() -> FastMCP:
+    """
+    Build and return the configured FastMCP server instance.
+    Exposed so the server can be embedded or tested programmatically.
+    """
+    mcp = FastMCP(
+        name="browser-use",
+        instructions=(
+            "Controls a persistent browser session via the browser-use CLI. "
+            "After any mutating command (open, click, input, type, keys, "
+            "scroll, back) the updated page state is returned automatically — "
+            "never call browser_state after one of these. "
+            "Always finish a task with browser_close."
+        ),
+    )
+    # ── tool: browser_use (generic dispatcher) ────────────────────────────────
+    @mcp.tool(
+        description=(
+            "Execute any browser-use CLI command.\n\n"
+            "Mutating commands automatically return the updated page state — "
+            "no need to call browser_state after them.\n\n"
+            "Commands:\n"
+            "  'open https://example.com'   → navigate, state auto-returned\n"
+            "  'click 3'                    → click element, state auto-returned\n"
+            "  'input 5 \"query\"'            → click+type, state auto-returned\n"
+            "  'type \"hello\"'               → type into focused element\n"
+            "  'keys \"Enter\"'               → send keyboard key\n"
+            "  'scroll down' / 'scroll up'  → scroll page\n"
+            "  'back'                       → navigate back\n"
+            "  'get text 2'                 → extract element text\n"
+            "  'get html'                   → full page HTML\n"
+            "  'get html --selector \"h1\"'   → scoped HTML\n"
+            "  'get title'                  → page title\n"
+            "  'close --all'                → close all sessions"
+        )
+    )
+    def browser_use_tool(command: str, headed: bool = False) -> str:
+        """
+        Run a browser-use CLI command.
+        Args:
+            command: Sub-command string (e.g. 'open https://example.com').
+            headed:  Show the browser window. Only applies to 'open'. Default false.
+        """
+        return browser_use(command, headed=headed)
+    # ── tool: browser_open ────────────────────────────────────────────────────
+    @mcp.tool(
+        description=(
+            "Open a URL in the browser. "
+            "The current page state (URL, title, clickable elements) "
+            "is returned automatically."
+        )
+    )
+    def browser_open_tool(url: str, headed: bool = False) -> str:
+        """
+        Navigate to a URL and return the page state.
+        Args:
+            url:    Full URL to open (e.g. 'https://news.ycombinator.com').
+            headed: Show the browser window. Default false.
+        """
+        return browser_open(url, headed=headed)
+    # ── tool: browser_state ───────────────────────────────────────────────────
+    @mcp.tool(
+        description=(
+            "Get the current browser page state: URL, title, and a numbered list "
+            "of all interactive elements with their indexes. "
+            "Rarely needed — mutating commands already return state automatically."
+        )
+    )
+    def browser_state_tool() -> str:
+        """Explicitly fetch the current page state."""
+        return browser_state()
+    # ── tool: browser_click ───────────────────────────────────────────────────
+    @mcp.tool(
+        description=(
+            "Click an element by its index (from the page state list). "
+            "The updated page state is returned automatically after clicking."
+        )
+    )
+    def browser_click_tool(index: int) -> str:
+        """
+        Click the element at the given index.
+        Args:
+            index: Element index from the page state list.
+        """
+        return browser_click(index)
+    # ── tool: browser_input ───────────────────────────────────────────────────
+    @mcp.tool(
+        description=(
+            "Click an element and type text into it in one step. "
+            "Preferred over separate click + type. "
+            "The updated page state is returned automatically."
+        )
+    )
+    def browser_input_tool(index: int, text: str) -> str:
+        """
+        Click element at index, then type text.
+        Args:
+            index: Element index from the page state list.
+            text:  Text to type.
+        """
+        return browser_input(index, text)
+    # ── tool: browser_type ────────────────────────────────────────────────────
+    @mcp.tool(
+        description=(
+            "Type text into the currently focused element. "
+            "The updated page state is returned automatically."
+        )
+    )
+    def browser_type_tool(text: str) -> str:
+        """
+        Type into the focused element.
+        Args:
+            text: The text to type.
+        """
+        return browser_type(text)
+    # ── tool: browser_keys ────────────────────────────────────────────────────
+    @mcp.tool(
+        description=(
+            "Send a keyboard key to the browser "
+            "(e.g. 'Enter', 'Tab', 'Escape', 'ArrowDown'). "
+            "The updated page state is returned automatically."
+        )
+    )
+    def browser_keys_tool(key: str) -> str:
+        """
+        Press a keyboard key.
+        Args:
+            key: Key name (e.g. 'Enter', 'Tab', 'Escape', 'ArrowDown').
+        """
+        return browser_keys(key)
+    # ── tool: browser_scroll ──────────────────────────────────────────────────
+    @mcp.tool(
+        description=(
+            "Scroll the current page up or down. "
+            "The updated page state is returned automatically."
+        )
+    )
+    def browser_scroll_tool(direction: str = "down") -> str:
+        """
+        Scroll the page.
+        Args:
+            direction: 'down' or 'up'. Defaults to 'down'.
+        """
+        if direction not in ("up", "down"):
+            return "Error: direction must be 'up' or 'down'."
+        return browser_scroll(direction)
+    # ── tool: browser_back ────────────────────────────────────────────────────
+    @mcp.tool(
+        description=(
+            "Navigate back in browser history. "
+            "The updated page state is returned automatically."
+        )
+    )
+    def browser_back_tool() -> str:
+        """Go back one page."""
+        return browser_back()
+    # ── tool: browser_get_text ────────────────────────────────────────────────
+    @mcp.tool(
+        description="Get the text content of a specific element by its index number."
+    )
+    def browser_get_text_tool(index: int) -> str:
+        """
+        Extract the visible text of an element.
+        Args:
+            index: Element index from the page state list.
+        """
+        return browser_get_text(index)
+    # ── tool: browser_get_html ────────────────────────────────────────────────
+    @mcp.tool(
+        description=(
+            "Get the HTML of the current page, or of a specific element "
+            "using a CSS selector. Useful for scraping structured data."
+        )
+    )
+    def browser_get_html_tool(selector: str = "") -> str:
+        """
+        Retrieve page HTML.
+        Args:
+            selector: Optional CSS selector (e.g. 'h1', '.price').
+                      Leave empty for full page HTML.
+        """
+        return browser_get_html(selector if selector else None)
+    # ── tool: browser_get_title ───────────────────────────────────────────────
+    @mcp.tool(description="Get the title of the current browser page.")
+    def browser_get_title_tool() -> str:
+        """Return the current page title."""
+        return browser_get_title()
+    # ── tool: browser_close ───────────────────────────────────────────────────
+    @mcp.tool(
+        description=(
+            "Close all browser sessions. "
+            "ALWAYS call this when the task is fully complete."
+        )
+    )
+    def browser_close_tool() -> str:
+        """Close all browser sessions and end the task."""
+        return browser_close()
+    return mcp
+# ── CLI entry point ───────────────────────────────────────────────────────────
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        prog="browser-mcp-server",
+        description="browser-use MCP server — expose browser automation to any LLM agent",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Transport modes:
+  stdio            Local agents: Claude Desktop, Cursor, nixagent, Zed. (default)
+  streamable-http  Modern HTTP streaming — recommended for remote agents.
+  sse              Legacy HTTP server-sent events.
+Examples:
+  browser-mcp-server
+  browser-mcp-server --transport streamable-http --port 8080
+  browser-mcp-server --transport sse --host 0.0.0.0 --port 9000
+        """,
+    )
+    parser.add_argument(
+        "--transport",
+        choices=["stdio", "sse", "streamable-http"],
+        default="stdio",
+        help="MCP transport (default: stdio)",
+    )
+    parser.add_argument(
+        "--host",
+        default="127.0.0.1",
+        help="Host for HTTP transports (default: 127.0.0.1)",
+    )
+    parser.add_argument(
+        "--port",
+        type=int,
+        default=8080,
+        help="Port for HTTP transports (default: 8080)",
+    )
+    args = parser.parse_args()
+    # Ensure Chromium is present before the server starts accepting requests.
+    # No-op if already installed; downloads automatically on first run.
+    _ensure_chromium()
+    server = create_server()
+    if args.transport == "stdio":
+        server.run(transport="stdio")
+    else:
+        server.host = args.host
+        server.port = args.port
+        print(
+            f"[mcp-browser-use] Starting '{args.transport}' server on "
+            f"http://{args.host}:{args.port}",
+            file=sys.stderr,
+        )
+        server.run(transport=args.transport)
+if __name__ == "__main__":
+    main()

browser_mcp_server-0.2.0/src/mcp_browser_use/tools.py ADDED Viewed

@@ -0,0 +1,130 @@
+"""
+mcp_browser_use/tools.py
+─────────────────────────
+Thin wrapper around the `browser-use` CLI.
+Auto-state behaviour
+────────────────────
+Every command that mutates the page (open, click, input, type, keys, scroll,
+back) automatically fetches and appends `browser-use state` to its output.
+The LLM therefore NEVER needs to call `state` explicitly — it always has the
+current element list in the response of the previous action.
+"""
+import subprocess
+from typing import Optional
+# Commands that mutate the page → state is auto-appended after each of these.
+_MUTATING_CMDS = {"open", "click", "input", "type", "keys", "scroll", "back"}
+# ── Low-level runner ──────────────────────────────────────────────────────────
+def _run(args: list[str], timeout: int = 30) -> str:
+    """Run `browser-use <args>` and return stdout+stderr. Raises on failure."""
+    cmd = ["browser-use"] + args
+    try:
+        result = subprocess.run(
+            cmd, capture_output=True, text=True, timeout=timeout
+        )
+        output = (result.stdout + result.stderr).strip()
+        if result.returncode != 0:
+            raise RuntimeError(
+                f"browser-use failed (exit {result.returncode}):\n{output}"
+            )
+        return output
+    except FileNotFoundError:
+        raise RuntimeError(
+            "browser-use CLI not found. "
+            "Install it with: pip install browser-use && playwright install"
+        )
+    except subprocess.TimeoutExpired:
+        raise RuntimeError(
+            f"browser-use timed out after {timeout}s: {' '.join(cmd)}"
+        )
+# ── Primary dispatcher ────────────────────────────────────────────────────────
+def browser_use(command: str, headed: bool = False) -> str:
+    """
+    Run any browser-use CLI command.
+    Mutating commands (open, click, input, type, keys, scroll, back)
+    automatically append the updated page state to their output so the LLM
+    never needs an explicit `state` call after an action.
+    """
+    parts = command.strip().split(maxsplit=1)
+    sub_cmd = parts[0].lower() if parts else ""
+    args: list[str] = []
+    if headed and sub_cmd == "open":
+        args.append("--headed")
+    args += command.strip().split()
+    output = _run(args)
+    if sub_cmd in _MUTATING_CMDS:
+        label = f"browser-use: after '{sub_cmd}' — current state (auto-fetched)"
+        try:
+            state_output = _run(["state"])
+            output = f"{output}\n\n[{label}]\n{state_output}"
+        except RuntimeError as exc:
+            output += f"\n\n[browser-use: state fetch failed — {exc}]"
+    return output
+# ── Convenience wrappers ──────────────────────────────────────────────────────
+def browser_open(url: str, headed: bool = False) -> str:
+    """Open a URL. Page state is returned automatically."""
+    return browser_use(f"open {url}", headed=headed)
+def browser_state() -> str:
+    """Get the current page state (URL, title, element list)."""
+    return _run(["state"])
+def browser_click(index: int) -> str:
+    """Click the element at index. Page state auto-returned."""
+    return browser_use(f"click {index}")
+def browser_input(index: int, text: str) -> str:
+    """Click element at index then type text. Page state auto-returned."""
+    return browser_use(f'input {index} "{text}"')
+def browser_type(text: str) -> str:
+    """Type into the currently focused element. Page state auto-returned."""
+    return browser_use(f'type "{text}"')
+def browser_keys(key: str) -> str:
+    """Send a keyboard key (e.g. 'Enter'). Page state auto-returned."""
+    return browser_use(f'keys "{key}"')
+def browser_scroll(direction: str = "down") -> str:
+    """Scroll up or down. Page state auto-returned."""
+    assert direction in ("up", "down"), "direction must be 'up' or 'down'"
+    return browser_use(f"scroll {direction}")
+def browser_back() -> str:
+    """Navigate back. Page state auto-returned."""
+    return browser_use("back")
+def browser_get_text(index: int) -> str:
+    """Get the text content of the element at index."""
+    return browser_use(f"get text {index}")
+def browser_get_html(selector: Optional[str] = None) -> str:
+    """Get full page HTML, or HTML of a specific CSS selector."""
+    if selector:
+        return browser_use(f'get html --selector "{selector}"')
+    return browser_use("get html")
+def browser_get_title() -> str:
+    """Get the current page title."""
+    return browser_use("get title")
+def browser_close() -> str:
+    """Close all browser sessions. Call when the task is done."""
+    return browser_use("close --all")