PyPI - browsercontrol - Versions diffs - 0.1.3__tar.gz → 0.1.4__tar.gz - Mend

browsercontrol 0.1.3tar.gz → 0.1.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

browsercontrol-0.1.4/.claude/settings.local.json ADDED Viewed

@@ -0,0 +1,10 @@
+{
+  "permissions": {
+    "allow": [
+      "Bash(uv run *)",
+      "Bash(xargs cat -n)",
+      "Bash(git show-ref *)"
+    ]
+  },
+  "prefersReducedMotion": false
+}

browsercontrol-0.1.4/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,135 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          version: "latest"
+      - name: Install dependencies
+        run: uv sync --all-extras
+      - name: Run pre-commit hooks
+        run: uv run pre-commit run --all-files
+      - name: Run ruff check
+        run: uv run ruff check .
+      - name: Run ruff format check
+        run: uv run ruff format --check .
+  test:
+    runs-on: ubuntu-latest
+    needs: lint
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          version: "latest"
+      - name: Install dependencies
+        run: uv sync --all-extras
+      - name: Install Playwright browsers
+        run: uv run playwright install chromium --with-deps
+      - name: Run tests
+        run: uv run pytest -v
+  # Test on Windows and macOS as well
+  test-windows:
+    runs-on: windows-latest
+    needs: lint
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          version: "latest"
+      - name: Install dependencies
+        run: uv sync --all-extras
+      - name: Install Playwright browsers
+        run: uv run playwright install chromium
+      - name: Run tests
+        run: uv run pytest -v
+  test-macos:
+    runs-on: macos-latest
+    needs: lint
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          version: "latest"
+      - name: Install dependencies
+        run: uv sync --all-extras
+      - name: Install Playwright browsers
+        run: uv run playwright install chromium
+      - name: Run tests
+        run: uv run pytest -v
+  security:
+    runs-on: ubuntu-latest
+    needs: lint
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          version: "latest"
+      - name: Install dependencies
+        run: uv sync --all-extras
+      - name: Run bandit security checks
+        run: uv run bandit -c pyproject.toml -r . --exclude ./tests,./.venv
+  type-check:
+    runs-on: ubuntu-latest
+    needs: lint
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Install uv
+        uses: astral-sh/setup-uv@v3
+        with:
+          version: "latest"
+      - name: Install dependencies
+        run: uv sync --all-extras
+      - name: Run mypy (if type hints are used)
+        run: uv run mypy browsercontrol/ || echo "No mypy configuration found"

browsercontrol-0.1.4/.github/workflows/dependency-review.yml ADDED Viewed

@@ -0,0 +1,39 @@
+# Dependency Review Action
+#
+# This Action will scan dependency manifest files that change as part of a Pull Request,
+# surfacing known-vulnerable versions of the packages declared or updated in the PR.
+# Once installed, if the workflow run is marked as required, PRs introducing known-vulnerable
+# packages will be blocked from merging.
+#
+# Source repository: https://github.com/actions/dependency-review-action
+# Public documentation: https://docs.github.com/en/code-security/supply-chain-security/understanding-your-software-supply-chain/about-dependency-review#dependency-review-enforcement
+name: "Dependency review"
+on:
+  pull_request:
+    branches: ["main"]
+# If using a dependency submission action in this workflow this permission will need to be set to:
+#
+# permissions:
+#   contents: write
+#
+# https://docs.github.com/en/enterprise-cloud@latest/code-security/supply-chain-security/understanding-your-software-supply-chain/using-the-dependency-submission-api
+permissions:
+  contents: read
+  # Write permissions for pull-requests are required for using the `comment-summary-in-pr` option, comment out if you aren't using this option
+  pull-requests: write
+jobs:
+  dependency-review:
+    runs-on: ubuntu-latest
+    steps:
+      - name: "Checkout repository"
+        uses: actions/checkout@v4
+      - name: "Dependency Review"
+        uses: actions/dependency-review-action@v4
+        # Commonly enabled options, see https://github.com/actions/dependency-review-action#configuration-options for all available options.
+        with:
+          comment-summary-in-pr: always
+        #   fail-on-severity: moderate
+        #   deny-licenses: GPL-1.0-or-later, LGPL-2.0-or-later
+        #   retry-on-snapshot-warnings: true

{browsercontrol-0.1.3 → browsercontrol-0.1.4}/.gitignore RENAMED Viewed

@@ -11,3 +11,5 @@ dist/
 # Virtual environments
 .venv/
+.ruff_cache/
+.pytest_cache/

{browsercontrol-0.1.3 → browsercontrol-0.1.4}/.pre-commit-config.yaml RENAMED Viewed

@@ -25,14 +25,6 @@ repos:
       - id: mixed-line-ending
         args: [--fix=lf]
-  # Markdown formatting
-  - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v4.0.0-alpha.8
-    hooks:
-      - id: prettier
-        types_or: [markdown, yaml, json]
-        exclude: ^(uv\.lock|\.vscode/)
   # Python security checks
   - repo: https://github.com/PyCQA/bandit
     rev: 1.8.0

browsercontrol-0.1.4/CLAUDE.md ADDED Viewed

@@ -0,0 +1,114 @@
+# CLAUDE.md
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+## Overview
+BrowserControl is an MCP (Model Context Protocol) server that gives AI agents
+vision-first browser automation via Playwright. Its defining idea is **Set of
+Marks (SoM)**: every action returns an annotated screenshot where interactive
+elements are overlaid with numbered red boxes, so the agent acts by referring to
+an element's number (`click(7)`) instead of CSS selectors or XPath.
+## Commands
+This project uses `uv` for all dependency and task management.
+```bash
+uv sync                                    # Install deps (use --all-extras in CI to include dev group)
+uv run playwright install chromium         # One-time: install the browser engine
+uv run pytest                              # Run all tests
+uv run pytest tests/test_navigation.py     # Run a single test file
+uv run pytest tests/test_navigation.py::TestScroll::test_scroll_down_medium   # Single test
+uv run pytest --cov=browsercontrol         # With coverage
+uv run ruff check .                        # Lint
+uv run ruff check . --fix                  # Lint + autofix
+uv run ruff format .                       # Format (line length 100, double quotes)
+uv run mypy browsercontrol/                # Type-check (strict mode)
+uv run bandit -c pyproject.toml -r . --exclude ./tests,./.venv   # Security scan
+uv run pre-commit run --all-files          # Run every hook (mirrors the CI lint job)
+uv run fastmcp dev browsercontrol/server.py   # Run the server in dev mode with inspector
+browsercontrol                                # Run the installed server (also: python -m browsercontrol)
+```
+CI (`.github/workflows/ci.yml`) gates on the lint job first, then runs tests on
+Linux/Windows/macOS plus bandit and mypy. Run `ruff check`, `ruff format`, and
+`pytest` locally before pushing.
+## Architecture
+The flow is: agent calls an MCP tool → tool drives the Playwright page → a fresh
+annotated screenshot + element map is captured → both are returned to the agent.
+**`browser.py` — the heart of the system.** Exposes a single module-global
+`browser = BrowserManager()` instance and a module-global `element_map` dict.
+Key behaviors to understand before touching anything:
+- `screenshot_with_som()` is called at the end of nearly every tool. It takes a
+  screenshot, runs `get_interactive_elements()` (injected JS that collects
+  visible, on-screen interactive elements and their bounding boxes), draws the
+  numbered boxes, and **overwrites the global `element_map`** with 1-indexed IDs.
+- **Element IDs are ephemeral.** They are only valid against the most recent
+  screenshot. Any navigation, click, scroll, or screenshot regenerates the map
+  and renumbers everything. Tools look up the target via `get_element_map()` and
+  return an error listing valid IDs when the ID is missing.
+- The browser uses `launch_persistent_context` against a profile directory
+  (`~/.browsercontrol/user_data` by default), so cookies, localStorage, and login
+  state persist across server restarts. Launch args include localhost proxy
+  bypass to avoid `ERR_CONNECTION_REFUSED`.
+- DevTools data (console logs, network requests, page errors) is captured by
+  event listeners attached in `_setup_page_listeners`, auto-wired to every new
+  page/popup, and stored in capped in-memory ring buffers on the manager.
+**`server.py` — composition root.** Creates the `FastMCP` instance, attaches a
+`lifespan` context manager that starts/stops the browser, and calls each
+`register_*_tools(mcp)`. The server-level `instructions` string is the prompt the
+agent sees describing available tools — keep it in sync when adding tools.
+**`tools/` — one module per category** (navigation, interaction, forms, content,
+devtools, recording, tabs). Each exports a single `register_<category>_tools(mcp)`
+function that defines inner `async` functions decorated with `@mcp.tool()`.
+Conventions every tool follows:
+- Signature returns `tuple[str, Image]`: a human-readable status string plus the
+  annotated screenshot.
+- Starts with `await browser.ensure_started()` (lazily boots the browser if the
+  lifespan hasn't, e.g. in some test paths).
+- Ends by building the result via a local `_get_screenshot_with_summary()`
+  helper. **This helper is duplicated verbatim in several tool modules** — if you
+  change the summary format, update every copy.
+**`config.py`** — a `Config` dataclass loaded once at import via
+`Config.from_env()` into a global `config`. All settings come from `BROWSER_*`
+and `LOG_LEVEL` env vars (see README for the full table). Because it is read at
+import time, env changes require a server restart.
+## Testing conventions
+Tests **do not launch a real browser** — everything is mocked. The pattern:
+- `tests/conftest.py` provides `mock_browser_manager` (a fully stubbed
+  `BrowserManager`), `mock_page`, `mock_context`, and `sample_element_map`.
+- A test registers the tools onto a throwaway `FastMCP("test")`, then patches the
+  module-level `browser` symbol **in that specific tool module** (e.g.
+  `patch("browsercontrol.tools.navigation.browser", mock_browser_manager)`) —
+  because each tool module does `from browsercontrol.browser import browser`, it
+  holds its own reference that must be patched independently.
+- The tool callable is reached through FastMCP internals:
+  `mcp_server._tool_manager._tools["navigate_to"].fn(...)`.
+- `asyncio_mode = "auto"` is set, so `async def test_*` runs without an explicit
+  marker (though existing tests also add `@pytest.mark.asyncio`).
+When adding a tool, add a test covering both the happy path and graceful error
+handling, following the existing per-module test files.
+## Conventions
+- Python 3.11+. Type hints are required and checked by mypy in **strict** mode
+  (`disallow_untyped_defs`, etc.) — annotate fully.
+- Commits follow Conventional Commits (`feat:`, `fix:`, `docs:`, `chore:`).
+- Pre-commit hooks (ruff, prettier for md/yaml/json, bandit) run on commit;
+  `git commit --no-verify` bypasses them but CI will still enforce them.

{browsercontrol-0.1.3 → browsercontrol-0.1.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: browsercontrol
-Version: 0.1.3
+Version: 0.1.4
 Summary: MCP server for browser automation with Set of Marks (SoM) - AI agents can see and interact with web pages using numbered element IDs
 Project-URL: Homepage, https://github.com/adityasasidhar/browsercontrol
 Project-URL: Repository, https://github.com/adityasasidhar/browsercontrol
@@ -25,7 +25,7 @@ Requires-Dist: playwright>=1.49.0
 Description-Content-Type: text/markdown
 <p align="center">
-  <img src="https://raw.githubusercontent.com/adityasasidhar/browsercontrol/main/assets/logo.png" alt="BrowserControl" width="140">
+  <img src="assets/logo-main.png" alt="BrowserControl" width="140">
 </p>
 <h1 align="center">BrowserControl</h1>
@@ -145,12 +145,18 @@ python -m browsercontrol
 fastmcp run browsercontrol.server:mcp
 ```
-### Connect to Claude Desktop
+### Connect to Your AI Agent
-Add to your Claude configuration file:
+BrowserControl works with any MCP-compatible AI agent or IDE. Choose your platform:
 <details>
-<summary><b>📁 macOS</b> — <code>~/Library/Application Support/Claude/claude_desktop_config.json</code></summary>
+<summary><b>Claude Desktop</b></summary>
+Add to your Claude configuration file:
+**macOS:** `~/Library/Application Support/Claude/claude_desktop_config.json`
+**Linux:** `~/.config/Claude/claude_desktop_config.json`
+**Windows:** `%APPDATA%\Claude\claude_desktop_config.json`
 ```json
 {
@@ -162,17 +168,75 @@ Add to your Claude configuration file:
 }
 ```
+Restart Claude Desktop, then ask:
+> _"Go to GitHub and star the browsercontrol repo"_
 </details>
 <details>
-<summary><b>📁 Linux</b> — <code>~/.config/Claude/claude_desktop_config.json</code></summary>
+<summary><b>� Gemini CLI / Google AI Studio</b></summary>
+If using the Gemini CLI or Google AI Studio with MCP support:
+```bash
+# Set up MCP configuration
+export MCP_SERVERS='{"browsercontrol": {"command": "browsercontrol"}}'
+# Or add to your Gemini config file
+```
+For Google AI Studio, configure in the MCP settings panel.
+</details>
+<details>
+<summary><b>🔧 Cline (VS Code Extension)</b></summary>
+1. Install the [Cline extension](https://marketplace.visualstudio.com/items?itemName=saoudrizwan.claude-dev)
+2. Open Cline settings (gear icon)
+3. Navigate to "MCP Servers"
+4. Add a new server:
 ```json
 {
-  "mcpServers": {
-    "browsercontrol": {
+  "browsercontrol": {
+    "command": "browsercontrol"
+  }
+}
+```
+</details>
+<details>
+<summary><b>🤖 Continue.dev (VS Code/JetBrains)</b></summary>
+Add to your Continue configuration (`~/.continue/config.json`):
+```json
+{
+  "mcpServers": [
+    {
+      "name": "browsercontrol",
       "command": "browsercontrol"
     }
+  ]
+}
+```
+</details>
+<details>
+<summary><b>🎯 Cursor IDE</b></summary>
+1. Open Cursor Settings
+2. Navigate to "Features" → "Model Context Protocol"
+3. Add server configuration:
+```json
+{
+  "browsercontrol": {
+    "command": "browsercontrol"
   }
 }
 ```
@@ -180,13 +244,79 @@ Add to your Claude configuration file:
 </details>
 <details>
-<summary><b>📁 Windows</b> — <code>%APPDATA%\Claude\claude_desktop_config.json</code></summary>
+<summary><b>🔌 Zed Editor</b></summary>
+Add to your Zed settings (`~/.config/zed/settings.json`):
+```json
+{
+  "context_servers": {
+    "browsercontrol": {
+      "command": {
+        "path": "browsercontrol"
+      }
+    }
+  }
+}
+```
+</details>
+<details>
+<summary><b>🐍 Custom Python Integration</b></summary>
+Use the MCP Python SDK to integrate BrowserControl into your own agent:
+```python
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+# Connect to BrowserControl
+server_params = StdioServerParameters(
+    command="browsercontrol",
+    args=[],
+)
+async with stdio_client(server_params) as (read, write):
+    async with ClientSession(read, write) as session:
+        # Initialize
+        await session.initialize()
+        # List available tools
+        tools = await session.list_tools()
+        # Call a tool
+        result = await session.call_tool("navigate_to", {
+            "url": "https://github.com"
+        })
+```
+</details>
+<details>
+<summary><b>🚀 Using with uv or pipx</b></summary>
+If you installed with `uv` or `pipx`, use the full path:
 ```json
 {
   "mcpServers": {
     "browsercontrol": {
-      "command": "browsercontrol"
+      "command": "uvx",
+      "args": ["browsercontrol"]
+    }
+  }
+}
+```
+Or with pipx:
+```json
+{
+  "mcpServers": {
+    "browsercontrol": {
+      "command": "pipx",
+      "args": ["run", "browsercontrol"]
     }
   }
 }
@@ -194,11 +324,30 @@ Add to your Claude configuration file:
 </details>
-Then ask Claude:
+<details>
+<summary><b>🔧 Advanced Configuration</b></summary>
-> _"Go to GitHub and star the browsercontrol repo"_
+You can pass environment variables to customize BrowserControl:
-Claude will navigate, find the star button, and click it—showing you screenshots along the way!
+```json
+{
+  "mcpServers": {
+    "browsercontrol": {
+      "command": "browsercontrol",
+      "env": {
+        "BROWSER_HEADLESS": "false",
+        "BROWSER_VIEWPORT_WIDTH": "1920",
+        "BROWSER_VIEWPORT_HEIGHT": "1080",
+        "LOG_LEVEL": "DEBUG"
+      }
+    }
+  }
+}
+```
+See [Configuration](#-configuration) for all available options.
+</details>
 <br>
@@ -307,7 +456,6 @@ Test responsive designs or emulate mobile screens on the fly:
 | `press_key(key)`                | Press keyboard key (Enter, Tab, etc.) |
 | `hover(element_id)`             | Hover over element                    |
 | `scroll_to_element(element_id)` | Scroll element into view              |
-| `upload_file(element_id, path)` | Upload a file to an input             |
 | `wait(seconds)`                 | Wait for page loading                 |
 ### Tab Management

browsercontrol 0.1.3__tar.gz → 0.1.4__tar.gz

browsercontrol 0.1.3tar.gz → 0.1.4tar.gz