npm - @mindstone-engineering/mcp-server-browser-automation - Versions diffs - 0.1.0 - Mend

@mindstone-engineering/mcp-server-browser-automation 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/LICENSE +97 -0
package/README.md +95 -0
package/dist/bridge.d.ts +16 -0
package/dist/bridge.js +40 -0
package/dist/browser-client.d.ts +21 -0
package/dist/browser-client.js +82 -0
package/dist/index.d.ts +16 -0
package/dist/index.js +27 -0
package/dist/server.d.ts +3 -0
package/dist/server.js +15 -0
package/dist/tools/index.d.ts +5 -0
package/dist/tools/index.js +5 -0
package/dist/tools/interaction.d.ts +3 -0
package/dist/tools/interaction.js +135 -0
package/dist/tools/navigation.d.ts +3 -0
package/dist/tools/navigation.js +72 -0
package/dist/tools/observation.d.ts +3 -0
package/dist/tools/observation.js +81 -0
package/dist/tools/session.d.ts +3 -0
package/dist/tools/session.js +63 -0
package/dist/types.d.ts +12 -0
package/dist/types.js +17 -0
package/dist/utils.d.ts +18 -0
package/dist/utils.js +75 -0
package/package.json +48 -0

package/LICENSE ADDED Viewed

@@ -0,0 +1,97 @@
+# Functional Source License, Version 1.1, MIT Future License
+## Abbreviation
+FSL-1.1-MIT
+## Notice
+Copyright 2026 Mindstone Learning Limited
+## Terms and Conditions
+### Licensor ("We")
+The party offering the Software under these Terms and Conditions.
+**Licensor**: Mindstone Learning Limited
+### The Software
+The "Software" is each version of the software that we make available under
+these Terms and Conditions, as indicated by our inclusion of these Terms and
+Conditions with the Software.
+**Software**: Browser Automation MCP Server
+### License Grant
+Subject to your compliance with this License Grant and the Patents,
+Redistribution and Trademark clauses below, we hereby grant you the right to
+use, copy, modify, create derivative works, publicly perform, publicly display
+and redistribute the Software for any Permitted Purpose identified below.
+### Permitted Purpose
+A Permitted Purpose is any purpose other than a Competing Use. A "Competing
+Use" means making the Software available to third parties as a commercial
+hosted service that directly competes with any product or service provided by
+the Licensor.
+### Patents
+To the extent your use for a Permitted Purpose would necessarily infringe our
+patents, the license grant above includes a license under our patents. If you
+make a claim against any party that the Software infringes or contributes to
+the infringement of any patent, then your patent license to the Software ends
+immediately.
+### Redistribution
+The Terms and Conditions apply to all copies, modifications and derivatives of
+the Software.
+If you redistribute any copies, modifications or derivatives of the Software,
+you must include a copy of or a link to these Terms and Conditions and not
+remove any copyright notices provided in or with the Software.
+### Disclaimer
+THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR
+PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT.
+IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE
+SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, OF
+ANY CHARACTER INCLUDING DAMAGES FOR LOSS OF GOODWILL, LOST PROFITS, LOST SALES
+OR BUSINESS, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, LOST CONTENT,
+DATA OR DATA USE, BREACH OF DUTY OF GOOD FAITH, OR ANY AND ALL OTHER DAMAGES
+OR LOSSES OF ANY KIND OR NATURE WHATSOEVER (WHETHER DIRECT, INDIRECT, SPECIAL,
+COLLATERAL, INCIDENTAL, CONSEQUENTIAL OR OTHERWISE) ARISING OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THIS LICENSE, EVEN IF SUCH PARTY SHALL HAVE
+BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES.
+### Trademark
+Except for displaying the License Details and identifying us as the origin of
+the Software, you have no right under these Terms and Conditions to use our
+trademarks, trade names, service marks or product names.
+## Change Date
+Four years from the date the Software is made available under these Terms and
+Conditions: **2030-04-08**
+## Change License
+MIT License
+## License Details
+| Parameter | Value |
+|---|---|
+| Licensor | Mindstone Learning Limited |
+| Software | Browser Automation MCP Server |
+| Use Limitation | Competing Use |
+| Change Date | 2030-04-08 |
+| Change License | MIT |

package/README.md ADDED Viewed

@@ -0,0 +1,95 @@
+# Browser Automation MCP Server
+Headless browser control via accessibility snapshots — navigate pages, fill forms, click elements, take screenshots, and manage tabs using the [agent-browser](https://www.npmjs.com/package/agent-browser) CLI.
+## Installation
+```bash
+npx -y @mindstone-engineering/mcp-server-browser-automation
+```
+Or install globally:
+```bash
+npm install -g @mindstone-engineering/mcp-server-browser-automation
+mcp-server-browser-automation
+```
+## Requirements
+This server requires the `agent-browser` CLI binary to control the browser.
+### Binary Resolution
+1. **PATH lookup** (preferred): If `agent-browser` is on your PATH, it is used directly.
+2. **npx fallback**: If the binary is not found, the server automatically falls back to `npx -y agent-browser@0.17`.
+### Installing agent-browser
+```bash
+npm install -g agent-browser
+```
+Or let the npx fallback handle it automatically (slower on first use due to download).
+## Configuration
+No API keys or credentials are required. The server communicates with the browser via the agent-browser CLI.
+| Variable | Required | Description |
+|---|---|---|
+| `AGENT_BROWSER_SESSION_NAME` | No | Session name for browser persistence (default: `mcp`) |
+### MCP Host Configuration
+```json
+{
+  "mcpServers": {
+    "browser-automation": {
+      "command": "npx",
+      "args": ["-y", "@mindstone-engineering/mcp-server-browser-automation"]
+    }
+  }
+}
+```
+## Available Tools (18)
+### Navigation
+- **browser_navigate** — Navigate to a URL
+- **browser_back** — Navigate back in browser history
+- **browser_forward** — Navigate forward in browser history
+- **browser_wait** — Wait for an element to appear or a specified time
+### Observation
+- **browser_snapshot** — Get the page accessibility tree with interactive element references
+- **browser_screenshot** — Take a screenshot of the current page
+- **browser_get_page_info** — Get the current page URL and title
+### Interaction
+- **browser_click** — Click an element using @ref or CSS selector
+- **browser_fill** — Clear a field and fill it with text
+- **browser_type** — Type text character by character (real keystrokes)
+- **browser_press_key** — Press a keyboard key
+- **browser_scroll** — Scroll the page in a direction
+- **browser_select** — Select an option from a dropdown
+- **browser_hover** — Hover over an element
+- **browser_evaluate** — Execute JavaScript in the page context
+### Session Management
+- **browser_tabs** — List open tabs or switch to a tab
+- **browser_close** — Close the browser session
+- **browser_authenticate** — Open a visible browser for manual login
+## Workflow
+The typical workflow uses accessibility snapshots for reliable element targeting:
+1. `browser_navigate` → open a page
+2. `browser_snapshot` → see interactive elements with @ref IDs
+3. `browser_click` / `browser_fill` → interact using @ref references
+4. `browser_screenshot` → visual verification
+## License
+FSL-1.1-MIT

package/dist/bridge.d.ts ADDED Viewed

@@ -0,0 +1,16 @@
+/**
+ * Path to bridge state file, supporting both current and legacy env vars.
+ */
+export declare const BRIDGE_STATE_PATH: string;
+/**
+ * Send a request to the host app bridge.
+ *
+ * The bridge is an HTTP server running inside the host app (e.g. the host application)
+ * that handles credential management and other cross-process operations.
+ */
+export declare const bridgeRequest: (urlPath: string, body: Record<string, unknown>) => Promise<{
+    success: boolean;
+    warning?: string;
+    error?: string;
+}>;
+//# sourceMappingURL=bridge.d.ts.map

package/dist/bridge.js ADDED Viewed

@@ -0,0 +1,40 @@
+import * as fs from 'fs';
+import { REQUEST_TIMEOUT_MS } from './types.js';
+/**
+ * Path to bridge state file, supporting both current and legacy env vars.
+ */
+export const BRIDGE_STATE_PATH = process.env.MCP_HOST_BRIDGE_STATE || process.env.MINDSTONE_REBEL_BRIDGE_STATE || '';
+const loadBridgeState = () => {
+    if (!BRIDGE_STATE_PATH)
+        return null;
+    try {
+        const raw = fs.readFileSync(BRIDGE_STATE_PATH, 'utf8');
+        return JSON.parse(raw);
+    }
+    catch {
+        return null;
+    }
+};
+/**
+ * Send a request to the host app bridge.
+ *
+ * The bridge is an HTTP server running inside the host app (e.g. the host application)
+ * that handles credential management and other cross-process operations.
+ */
+export const bridgeRequest = async (urlPath, body) => {
+    const bridge = loadBridgeState();
+    if (!bridge) {
+        return { success: false, error: 'Bridge not available' };
+    }
+    const response = await fetch(`http://127.0.0.1:${bridge.port}${urlPath}`, {
+        method: 'POST',
+        signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS),
+        headers: {
+            'Content-Type': 'application/json',
+            Authorization: `Bearer ${bridge.token}`,
+        },
+        body: JSON.stringify(body),
+    });
+    return response.json();
+};
+//# sourceMappingURL=bridge.js.map

package/dist/browser-client.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+export interface ExecResult {
+    stdout: string;
+    stderr: string;
+}
+export interface ExecOptions {
+    timeoutMs?: number;
+    headed?: boolean;
+}
+/**
+ * Execute an agent-browser CLI command.
+ *
+ * Falls back to `npx -y agent-browser@0.17` if the binary is not on PATH.
+ * Uses execFile (no shell) to prevent command injection.
+ */
+export declare function execAgentBrowser(args: string[], options?: ExecOptions): Promise<ExecResult>;
+/**
+ * Reset the resolved binary cache.
+ * Primarily used for testing to reset state between test runs.
+ */
+export declare function resetBinaryCache(): void;
+//# sourceMappingURL=browser-client.d.ts.map

package/dist/browser-client.js ADDED Viewed

@@ -0,0 +1,82 @@
+import { execFile } from 'node:child_process';
+import { promisify } from 'node:util';
+import { ConnectorError, DEFAULT_TIMEOUT_MS, SESSION_NAME } from './types.js';
+const execFileAsync = promisify(execFile);
+let resolvedBinary = null;
+function resolveAgentBrowser() {
+    if (resolvedBinary)
+        return resolvedBinary;
+    // Default to the binary name — execFile will search PATH.
+    // If not found (ENOENT), the caller falls back to npx.
+    resolvedBinary = 'agent-browser';
+    return resolvedBinary;
+}
+function buildEnv() {
+    const env = { ...process.env };
+    // Always use session persistence
+    if (!env.AGENT_BROWSER_SESSION_NAME) {
+        env.AGENT_BROWSER_SESSION_NAME = SESSION_NAME;
+    }
+    return env;
+}
+/**
+ * Execute an agent-browser CLI command.
+ *
+ * Falls back to `npx -y agent-browser@0.17` if the binary is not on PATH.
+ * Uses execFile (no shell) to prevent command injection.
+ */
+export async function execAgentBrowser(args, options) {
+    const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
+    const env = buildEnv();
+    if (options?.headed) {
+        args = ['--headed', ...args];
+    }
+    else {
+        args = ['--headless', ...args];
+    }
+    const binary = resolveAgentBrowser();
+    try {
+        // execFile is safe against command injection (no shell interpretation)
+        const result = await execFileAsync(binary, args, {
+            env,
+            timeout: timeoutMs,
+            maxBuffer: 10 * 1024 * 1024, // 10MB for large snapshots
+        });
+        return { stdout: result.stdout, stderr: result.stderr ?? '' };
+    }
+    catch (error) {
+        const err = error;
+        // Binary not found — try npx fallback
+        if (err.code === 'ENOENT') {
+            try {
+                const npxResult = await execFileAsync('npx', ['-y', 'agent-browser@0.17', ...args], {
+                    env,
+                    timeout: timeoutMs + 15_000, // extra time for npx install
+                    maxBuffer: 10 * 1024 * 1024,
+                });
+                return { stdout: npxResult.stdout, stderr: npxResult.stderr ?? '' };
+            }
+            catch (npxError) {
+                const npxErr = npxError;
+                throw new ConnectorError(`agent-browser not found and npx fallback failed: ${npxErr.message ?? String(npxErr)}`, 'BINARY_NOT_FOUND', 'Install agent-browser: npm install -g agent-browser\n' +
+                    'Or ensure npx is available on PATH.');
+            }
+        }
+        // Timeout
+        if (err.code === 'ERR_CHILD_PROCESS_STDIO_MAXBUFFER' || err.killed) {
+            throw new ConnectorError(`Command timed out after ${timeoutMs}ms: agent-browser ${args.join(' ')}`, 'TIMEOUT', 'The browser operation took too long. Try a simpler action or increase the timeout.');
+        }
+        // Other errors — include stderr for diagnostics
+        const stderr = err.stderr?.trim() ?? '';
+        const stdout = err.stdout?.trim() ?? '';
+        throw new ConnectorError(stderr || stdout || err.message || String(error), 'CLI_ERROR', 'The agent-browser CLI command failed. Check that agent-browser is installed and the browser session is active.');
+    }
+}
+/**
+ * Reset the resolved binary cache.
+ * Primarily used for testing to reset state between test runs.
+ */
+export function resetBinaryCache() {
+    resolvedBinary = null;
+}
+//# sourceMappingURL=browser-client.js.map

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,16 @@
+#!/usr/bin/env node
+/**
+ * Browser Automation MCP Server
+ *
+ * Provides headless browser automation via the agent-browser CLI.
+ * Uses accessibility snapshots (@ref pointers) instead of fragile CSS selectors.
+ * Sessions persist automatically between invocations.
+ *
+ * Requirements:
+ * - agent-browser CLI binary on PATH, or npx available for fallback
+ *
+ * Environment variables:
+ * - AGENT_BROWSER_SESSION_NAME: Session name for persistence (default: "mcp")
+ */
+export {};
+//# sourceMappingURL=index.d.ts.map

package/dist/index.js ADDED Viewed

@@ -0,0 +1,27 @@
+#!/usr/bin/env node
+/**
+ * Browser Automation MCP Server
+ *
+ * Provides headless browser automation via the agent-browser CLI.
+ * Uses accessibility snapshots (@ref pointers) instead of fragile CSS selectors.
+ * Sessions persist automatically between invocations.
+ *
+ * Requirements:
+ * - agent-browser CLI binary on PATH, or npx available for fallback
+ *
+ * Environment variables:
+ * - AGENT_BROWSER_SESSION_NAME: Session name for persistence (default: "mcp")
+ */
+import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
+import { createServer } from './server.js';
+async function main() {
+    const server = createServer();
+    const transport = new StdioServerTransport();
+    await server.connect(transport);
+    console.error('Browser Automation MCP server running on stdio');
+}
+main().catch((error) => {
+    console.error('Fatal error:', error);
+    process.exit(1);
+});
+//# sourceMappingURL=index.js.map

package/dist/server.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+export declare function createServer(): McpServer;
+//# sourceMappingURL=server.d.ts.map

package/dist/server.js ADDED Viewed

@@ -0,0 +1,15 @@
+import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import { SERVER_NAME, SERVER_VERSION } from './types.js';
+import { registerNavigationTools, registerInteractionTools, registerObservationTools, registerSessionTools, } from './tools/index.js';
+export function createServer() {
+    const server = new McpServer({
+        name: SERVER_NAME,
+        version: SERVER_VERSION,
+    });
+    registerNavigationTools(server);
+    registerInteractionTools(server);
+    registerObservationTools(server);
+    registerSessionTools(server);
+    return server;
+}
+//# sourceMappingURL=server.js.map

package/dist/tools/index.d.ts ADDED Viewed

@@ -0,0 +1,5 @@
+export { registerNavigationTools } from './navigation.js';
+export { registerInteractionTools } from './interaction.js';
+export { registerObservationTools } from './observation.js';
+export { registerSessionTools } from './session.js';
+//# sourceMappingURL=index.d.ts.map

package/dist/tools/index.js ADDED Viewed

@@ -0,0 +1,5 @@
+export { registerNavigationTools } from './navigation.js';
+export { registerInteractionTools } from './interaction.js';
+export { registerObservationTools } from './observation.js';
+export { registerSessionTools } from './session.js';
+//# sourceMappingURL=index.js.map

package/dist/tools/interaction.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+export declare function registerInteractionTools(server: McpServer): void;
+//# sourceMappingURL=interaction.d.ts.map

package/dist/tools/interaction.js ADDED Viewed

@@ -0,0 +1,135 @@
+import { z } from 'zod';
+import { execAgentBrowser } from '../browser-client.js';
+import { withErrorHandling } from '../utils.js';
+export function registerInteractionTools(server) {
+    server.registerTool('browser_click', {
+        description: `Click an element. Use @ref from browser_snapshot (preferred) or a CSS selector.
+WORKFLOW: browser_snapshot → find @ref → browser_click @ref`,
+        inputSchema: {
+            ref: z.string().describe('Element ref from snapshot (e.g., "@e2") or CSS selector'),
+        },
+        annotations: {
+            readOnlyHint: false,
+            destructiveHint: false,
+            idempotentHint: false,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async (args) => {
+        await execAgentBrowser(['click', args.ref]);
+        return JSON.stringify({ ok: true, message: `Clicked: ${args.ref}` });
+    }));
+    server.registerTool('browser_fill', {
+        description: `Clear a field and fill it with text. Use @ref from browser_snapshot.
+WORKFLOW: browser_snapshot → find input @ref → browser_fill`,
+        inputSchema: {
+            ref: z.string().describe('Element ref (e.g., "@e3") or CSS selector'),
+            value: z.string().describe('Text to fill'),
+        },
+        annotations: {
+            readOnlyHint: false,
+            destructiveHint: false,
+            idempotentHint: false,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async (args) => {
+        await execAgentBrowser(['fill', args.ref, args.value]);
+        return JSON.stringify({ ok: true, message: `Filled ${args.ref} with ${args.value.length} characters` });
+    }));
+    server.registerTool('browser_type', {
+        description: 'Type text character by character (simulates real keystrokes). Useful for search boxes and autocompletes that respond to individual key events.',
+        inputSchema: {
+            ref: z.string().describe('Element ref or CSS selector'),
+            text: z.string().describe('Text to type'),
+        },
+        annotations: {
+            readOnlyHint: false,
+            destructiveHint: false,
+            idempotentHint: false,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async (args) => {
+        await execAgentBrowser(['type', args.ref, args.text]);
+        return JSON.stringify({ ok: true, message: `Typed ${args.text.length} characters into ${args.ref}` });
+    }));
+    server.registerTool('browser_press_key', {
+        description: 'Press a keyboard key. Common keys: Enter, Tab, Escape, Backspace, ArrowDown, ArrowUp.',
+        inputSchema: {
+            key: z.string().describe('Key to press (e.g., "Enter", "Tab", "Escape")'),
+        },
+        annotations: {
+            readOnlyHint: false,
+            destructiveHint: false,
+            idempotentHint: false,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async (args) => {
+        await execAgentBrowser(['press', args.key]);
+        return JSON.stringify({ ok: true, message: `Pressed key: ${args.key}` });
+    }));
+    server.registerTool('browser_scroll', {
+        description: 'Scroll the page in a direction.',
+        inputSchema: {
+            direction: z.enum(['up', 'down', 'left', 'right']).describe('Scroll direction'),
+            amount: z.number().optional().default(500).describe('Pixels to scroll (default: 500)'),
+        },
+        annotations: {
+            readOnlyHint: false,
+            destructiveHint: false,
+            idempotentHint: false,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async (args) => {
+        const px = args.amount ?? 500;
+        await execAgentBrowser(['scroll', args.direction, String(px)]);
+        return JSON.stringify({ ok: true, message: `Scrolled ${args.direction} ${px}px` });
+    }));
+    server.registerTool('browser_select', {
+        description: 'Select an option from a dropdown.',
+        inputSchema: {
+            ref: z.string().describe('Element ref or CSS selector for the <select>'),
+            value: z.string().describe('Option value or visible text to select'),
+        },
+        annotations: {
+            readOnlyHint: false,
+            destructiveHint: false,
+            idempotentHint: false,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async (args) => {
+        await execAgentBrowser(['select', args.ref, args.value]);
+        return JSON.stringify({ ok: true, message: `Selected "${args.value}" in ${args.ref}` });
+    }));
+    server.registerTool('browser_hover', {
+        description: 'Hover over an element (triggers hover menus/tooltips).',
+        inputSchema: {
+            ref: z.string().describe('Element ref or CSS selector'),
+        },
+        annotations: {
+            readOnlyHint: true,
+            destructiveHint: false,
+            idempotentHint: true,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async (args) => {
+        await execAgentBrowser(['hover', args.ref]);
+        return JSON.stringify({ ok: true, message: `Hovering over ${args.ref}` });
+    }));
+    server.registerTool('browser_evaluate', {
+        description: 'Execute JavaScript in the page context and return the result.',
+        inputSchema: {
+            script: z.string().describe('JavaScript code to execute'),
+        },
+        annotations: {
+            readOnlyHint: false,
+            destructiveHint: false,
+            idempotentHint: false,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async (args) => {
+        const result = await execAgentBrowser(['eval', args.script]);
+        return JSON.stringify({ ok: true, result: result.stdout.trim() });
+    }));
+}
+//# sourceMappingURL=interaction.js.map

package/dist/tools/navigation.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+export declare function registerNavigationTools(server: McpServer): void;
+//# sourceMappingURL=navigation.d.ts.map

package/dist/tools/navigation.js ADDED Viewed

@@ -0,0 +1,72 @@
+import { z } from 'zod';
+import { execAgentBrowser } from '../browser-client.js';
+import { withErrorHandling } from '../utils.js';
+export function registerNavigationTools(server) {
+    server.registerTool('browser_navigate', {
+        description: `Navigate to a URL. Opens the browser if not already running.
+IMPORTANT: After navigating, call browser_snapshot to see the page content before interacting.`,
+        inputSchema: {
+            url: z.string().describe('URL to navigate to'),
+        },
+        annotations: {
+            readOnlyHint: false,
+            destructiveHint: false,
+            idempotentHint: false,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async (args) => {
+        await execAgentBrowser(['open', args.url]);
+        const titleResult = await execAgentBrowser(['get', 'title']).catch(() => ({ stdout: '', stderr: '' }));
+        return JSON.stringify({
+            ok: true,
+            message: `Navigated to ${args.url}`,
+            title: titleResult.stdout.trim(),
+            hint: 'Call browser_snapshot to see page elements before interacting.',
+        });
+    }));
+    server.registerTool('browser_back', {
+        description: 'Navigate back in browser history.',
+        inputSchema: {},
+        annotations: {
+            readOnlyHint: false,
+            destructiveHint: false,
+            idempotentHint: false,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async () => {
+        await execAgentBrowser(['back']);
+        return JSON.stringify({ ok: true, message: 'Navigated back' });
+    }));
+    server.registerTool('browser_forward', {
+        description: 'Navigate forward in browser history.',
+        inputSchema: {},
+        annotations: {
+            readOnlyHint: false,
+            destructiveHint: false,
+            idempotentHint: false,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async () => {
+        await execAgentBrowser(['forward']);
+        return JSON.stringify({ ok: true, message: 'Navigated forward' });
+    }));
+    server.registerTool('browser_wait', {
+        description: 'Wait for an element to appear or for a specified time.',
+        inputSchema: {
+            selector: z.string().describe('CSS selector to wait for, or milliseconds (e.g., "2000")'),
+            timeout: z.number().optional().default(10000).describe('Max wait time in ms (default: 10000)'),
+        },
+        annotations: {
+            readOnlyHint: true,
+            destructiveHint: false,
+            idempotentHint: true,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async (args) => {
+        const timeoutMs = args.timeout ?? 10_000;
+        await execAgentBrowser(['wait', args.selector], { timeoutMs: timeoutMs + 2000 });
+        return JSON.stringify({ ok: true, message: `Wait completed for: ${args.selector}` });
+    }));
+}
+//# sourceMappingURL=navigation.js.map

package/dist/tools/observation.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+export declare function registerObservationTools(server: McpServer): void;
+//# sourceMappingURL=observation.d.ts.map

package/dist/tools/observation.js ADDED Viewed

@@ -0,0 +1,81 @@
+import { z } from 'zod';
+import { execAgentBrowser } from '../browser-client.js';
+import { withErrorHandling, withErrorHandlingRaw } from '../utils.js';
+import { SNAPSHOT_TIMEOUT_MS, SCREENSHOT_TIMEOUT_MS } from '../types.js';
+export function registerObservationTools(server) {
+    server.registerTool('browser_snapshot', {
+        description: `Get the page accessibility tree with interactive element references.
+THIS IS YOUR PRIMARY DISCOVERY TOOL. Always call this before clicking, filling, or interacting with the page.
+Returns element refs like @e1, @e2 that you use with browser_click, browser_fill, etc.
+Use the -i flag (default) to see only interactive elements, keeping output focused.`,
+        inputSchema: {
+            full: z.boolean().optional().default(false).describe('If true, show all elements (not just interactive). Default: false.'),
+        },
+        annotations: {
+            readOnlyHint: true,
+            destructiveHint: false,
+            idempotentHint: true,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async (args) => {
+        const cliArgs = args.full ? ['snapshot'] : ['snapshot', '-i'];
+        const result = await execAgentBrowser(cliArgs, { timeoutMs: SNAPSHOT_TIMEOUT_MS });
+        return JSON.stringify({ ok: true, snapshot: result.stdout });
+    }));
+    server.registerTool('browser_screenshot', {
+        description: 'Take a screenshot of the current page. Returns an image.',
+        inputSchema: {
+            full_page: z.boolean().optional().default(false).describe('Capture full scrollable page'),
+            annotate: z.boolean().optional().default(false).describe('Add numbered element labels to the screenshot'),
+        },
+        annotations: {
+            readOnlyHint: true,
+            destructiveHint: false,
+            idempotentHint: true,
+            openWorldHint: true,
+        },
+    }, withErrorHandlingRaw(async (args) => {
+        const cliArgs = ['screenshot'];
+        if (args.full_page)
+            cliArgs.push('--full');
+        if (args.annotate)
+            cliArgs.push('--annotate');
+        cliArgs.push('-'); // output to stdout
+        const result = await execAgentBrowser(cliArgs, { timeoutMs: SCREENSHOT_TIMEOUT_MS });
+        const data = result.stdout.trim();
+        // agent-browser outputs base64 PNG when piped to stdout
+        if (data.length > 100) {
+            return {
+                content: [{
+                        type: 'image',
+                        data,
+                        mimeType: 'image/png',
+                    }],
+            };
+        }
+        return {
+            content: [{ type: 'text', text: JSON.stringify({ ok: true, message: 'Screenshot taken', note: data }) }],
+        };
+    }));
+    server.registerTool('browser_get_page_info', {
+        description: 'Get the current page URL and title.',
+        inputSchema: {},
+        annotations: {
+            readOnlyHint: true,
+            destructiveHint: false,
+            idempotentHint: true,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async () => {
+        const urlResult = await execAgentBrowser(['get', 'url']);
+        const titleResult = await execAgentBrowser(['get', 'title']);
+        return JSON.stringify({
+            ok: true,
+            url: urlResult.stdout.trim(),
+            title: titleResult.stdout.trim(),
+        });
+    }));
+}
+//# sourceMappingURL=observation.js.map

package/dist/tools/session.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+export declare function registerSessionTools(server: McpServer): void;
+//# sourceMappingURL=session.d.ts.map

package/dist/tools/session.js ADDED Viewed

@@ -0,0 +1,63 @@
+import { z } from 'zod';
+import { execAgentBrowser } from '../browser-client.js';
+import { withErrorHandling } from '../utils.js';
+export function registerSessionTools(server) {
+    server.registerTool('browser_tabs', {
+        description: 'List open tabs or switch to a tab by number.',
+        inputSchema: {
+            action: z.enum(['list', 'new', 'close']).optional().describe('Tab action. Omit to list tabs.'),
+            tab_number: z.number().optional().describe('Tab number to switch to (from tab list)'),
+        },
+        annotations: {
+            readOnlyHint: false,
+            destructiveHint: false,
+            idempotentHint: false,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async (args) => {
+        if (args.tab_number !== undefined) {
+            await execAgentBrowser(['tab', String(args.tab_number)]);
+            return JSON.stringify({ ok: true, message: `Switched to tab ${args.tab_number}` });
+        }
+        const cliAction = args.action ?? 'list';
+        const result = await execAgentBrowser(['tab', cliAction]);
+        return JSON.stringify({ ok: true, tabs: result.stdout.trim() });
+    }));
+    server.registerTool('browser_close', {
+        description: 'Close the browser session. Sessions are saved automatically.',
+        inputSchema: {},
+        annotations: {
+            readOnlyHint: false,
+            destructiveHint: true,
+            idempotentHint: true,
+            openWorldHint: false,
+        },
+    }, withErrorHandling(async () => {
+        await execAgentBrowser(['close']);
+        return JSON.stringify({ ok: true, message: 'Browser session closed. Sessions are saved automatically.' });
+    }));
+    server.registerTool('browser_authenticate', {
+        description: `Open a visible browser window so the user can log in manually. The session is saved automatically.
+WHEN TO USE: "I need to access LinkedIn", "Log me into WhatsApp", etc.
+Tell the user to close the browser when done logging in, or call browser_close.`,
+        inputSchema: {
+            url: z.string().describe('Website URL to open for login'),
+        },
+        annotations: {
+            readOnlyHint: false,
+            destructiveHint: false,
+            idempotentHint: false,
+            openWorldHint: true,
+        },
+    }, withErrorHandling(async (args) => {
+        await execAgentBrowser(['open', args.url], { headed: true });
+        return JSON.stringify({
+            ok: true,
+            url: args.url,
+            message: `Browser opened to ${args.url} in visible mode. The user should log in manually. Their session will be saved automatically when the browser is closed.`,
+            next_step: 'Tell the user to log in and close the browser when done, or call browser_close.',
+        });
+    }));
+}
+//# sourceMappingURL=session.js.map

package/dist/types.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+export declare const SERVER_NAME = "browser-automation-mcp-server";
+export declare const SERVER_VERSION = "0.1.0";
+export declare const DEFAULT_TIMEOUT_MS = 30000;
+export declare const SNAPSHOT_TIMEOUT_MS = 15000;
+export declare const SCREENSHOT_TIMEOUT_MS = 15000;
+export declare const SESSION_NAME = "mcp";
+export declare class ConnectorError extends Error {
+    readonly code: string;
+    readonly resolution: string;
+    constructor(message: string, code: string, resolution: string);
+}
+//# sourceMappingURL=types.d.ts.map

package/dist/types.js ADDED Viewed

@@ -0,0 +1,17 @@
+export const SERVER_NAME = 'browser-automation-mcp-server';
+export const SERVER_VERSION = '0.1.0';
+export const DEFAULT_TIMEOUT_MS = 30_000;
+export const SNAPSHOT_TIMEOUT_MS = 15_000;
+export const SCREENSHOT_TIMEOUT_MS = 15_000;
+export const SESSION_NAME = 'mcp';
+export class ConnectorError extends Error {
+    code;
+    resolution;
+    constructor(message, code, resolution) {
+        super(message);
+        this.code = code;
+        this.resolution = resolution;
+        this.name = 'ConnectorError';
+    }
+}
+//# sourceMappingURL=types.js.map

package/dist/utils.d.ts ADDED Viewed

@@ -0,0 +1,18 @@
+import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
+type ToolHandler<T> = (args: T, extra: unknown) => Promise<CallToolResult>;
+/**
+ * Wraps a tool handler with standard error handling.
+ *
+ * - On success: returns the string result as a text content block.
+ * - On ConnectorError: returns a structured JSON error with code and resolution.
+ * - On unknown error: returns a generic error message.
+ *
+ * Secrets are never exposed in error messages.
+ */
+export declare function withErrorHandling<T>(fn: (args: T, extra: unknown) => Promise<string>): ToolHandler<T>;
+/**
+ * Wraps a tool handler that returns a CallToolResult directly (e.g. for image responses).
+ */
+export declare function withErrorHandlingRaw<T>(fn: (args: T, extra: unknown) => Promise<CallToolResult>): ToolHandler<T>;
+export {};
+//# sourceMappingURL=utils.d.ts.map

package/dist/utils.js ADDED Viewed

@@ -0,0 +1,75 @@
+import { ConnectorError } from './types.js';
+/**
+ * Wraps a tool handler with standard error handling.
+ *
+ * - On success: returns the string result as a text content block.
+ * - On ConnectorError: returns a structured JSON error with code and resolution.
+ * - On unknown error: returns a generic error message.
+ *
+ * Secrets are never exposed in error messages.
+ */
+export function withErrorHandling(fn) {
+    return async (args, extra) => {
+        try {
+            const result = await fn(args, extra);
+            return { content: [{ type: 'text', text: result }] };
+        }
+        catch (error) {
+            if (error instanceof ConnectorError) {
+                return {
+                    content: [
+                        {
+                            type: 'text',
+                            text: JSON.stringify({
+                                ok: false,
+                                error: error.message,
+                                code: error.code,
+                                resolution: error.resolution,
+                            }),
+                        },
+                    ],
+                    isError: true,
+                };
+            }
+            const errorMessage = error instanceof Error ? error.message : String(error);
+            return {
+                content: [{ type: 'text', text: JSON.stringify({ ok: false, error: errorMessage }) }],
+                isError: true,
+            };
+        }
+    };
+}
+/**
+ * Wraps a tool handler that returns a CallToolResult directly (e.g. for image responses).
+ */
+export function withErrorHandlingRaw(fn) {
+    return async (args, extra) => {
+        try {
+            return await fn(args, extra);
+        }
+        catch (error) {
+            if (error instanceof ConnectorError) {
+                return {
+                    content: [
+                        {
+                            type: 'text',
+                            text: JSON.stringify({
+                                ok: false,
+                                error: error.message,
+                                code: error.code,
+                                resolution: error.resolution,
+                            }),
+                        },
+                    ],
+                    isError: true,
+                };
+            }
+            const errorMessage = error instanceof Error ? error.message : String(error);
+            return {
+                content: [{ type: 'text', text: JSON.stringify({ ok: false, error: errorMessage }) }],
+                isError: true,
+            };
+        }
+    };
+}
+//# sourceMappingURL=utils.js.map

package/package.json ADDED Viewed

@@ -0,0 +1,48 @@
+{
+  "name": "@mindstone-engineering/mcp-server-browser-automation",
+  "version": "0.1.0",
+  "description": "Browser automation MCP server — headless browser control via accessibility snapshots, navigation, form filling, screenshots, and tab management",
+  "license": "FSL-1.1-MIT",
+  "type": "module",
+  "bin": {
+    "mcp-server-browser-automation": "dist/index.js"
+  },
+  "files": [
+    "dist",
+    "!dist/**/*.map"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/nspr-io/mcp-servers.git",
+    "directory": "connectors/browser-automation"
+  },
+  "homepage": "https://github.com/nspr-io/mcp-servers/tree/main/connectors/browser-automation",
+  "publishConfig": {
+    "access": "public"
+  },
+  "scripts": {
+    "build": "tsc && shx chmod +x dist/index.js",
+    "prepare": "npm run build",
+    "watch": "tsc --watch",
+    "start": "node dist/index.js",
+    "test": "vitest run",
+    "test:watch": "vitest",
+    "test:coverage": "vitest run --coverage"
+  },
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.26.0",
+    "zod": "^3.23.0"
+  },
+  "devDependencies": {
+    "@mindstone-engineering/mcp-test-harness": "file:../../test-harness",
+    "@types/node": "^22",
+    "@vitest/coverage-v8": "^4.1.3",
+    "msw": "^2.13.2",
+    "shx": "^0.3.4",
+    "typescript": "^5.8.2",
+    "vitest": "^4.1.3"
+  },
+  "engines": {
+    "node": ">=20"
+  }
+}