@mindstone-engineering/mcp-server-browser-automation 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,97 @@
1
+ # Functional Source License, Version 1.1, MIT Future License
2
+
3
+ ## Abbreviation
4
+
5
+ FSL-1.1-MIT
6
+
7
+ ## Notice
8
+
9
+ Copyright 2026 Mindstone Learning Limited
10
+
11
+ ## Terms and Conditions
12
+
13
+ ### Licensor ("We")
14
+
15
+ The party offering the Software under these Terms and Conditions.
16
+
17
+ **Licensor**: Mindstone Learning Limited
18
+
19
+ ### The Software
20
+
21
+ The "Software" is each version of the software that we make available under
22
+ these Terms and Conditions, as indicated by our inclusion of these Terms and
23
+ Conditions with the Software.
24
+
25
+ **Software**: Browser Automation MCP Server
26
+
27
+ ### License Grant
28
+
29
+ Subject to your compliance with this License Grant and the Patents,
30
+ Redistribution and Trademark clauses below, we hereby grant you the right to
31
+ use, copy, modify, create derivative works, publicly perform, publicly display
32
+ and redistribute the Software for any Permitted Purpose identified below.
33
+
34
+ ### Permitted Purpose
35
+
36
+ A Permitted Purpose is any purpose other than a Competing Use. A "Competing
37
+ Use" means making the Software available to third parties as a commercial
38
+ hosted service that directly competes with any product or service provided by
39
+ the Licensor.
40
+
41
+ ### Patents
42
+
43
+ To the extent your use for a Permitted Purpose would necessarily infringe our
44
+ patents, the license grant above includes a license under our patents. If you
45
+ make a claim against any party that the Software infringes or contributes to
46
+ the infringement of any patent, then your patent license to the Software ends
47
+ immediately.
48
+
49
+ ### Redistribution
50
+
51
+ The Terms and Conditions apply to all copies, modifications and derivatives of
52
+ the Software.
53
+
54
+ If you redistribute any copies, modifications or derivatives of the Software,
55
+ you must include a copy of or a link to these Terms and Conditions and not
56
+ remove any copyright notices provided in or with the Software.
57
+
58
+ ### Disclaimer
59
+
60
+ THE SOFTWARE IS PROVIDED "AS IS" AND WITHOUT WARRANTIES OF ANY KIND, EXPRESS OR
61
+ IMPLIED, INCLUDING WITHOUT LIMITATION WARRANTIES OF FITNESS FOR A PARTICULAR
62
+ PURPOSE, MERCHANTABILITY, TITLE OR NON-INFRINGEMENT.
63
+
64
+ IN NO EVENT WILL WE HAVE ANY LIABILITY TO YOU ARISING OUT OF OR RELATED TO THE
65
+ SOFTWARE, INCLUDING INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES, OF
66
+ ANY CHARACTER INCLUDING DAMAGES FOR LOSS OF GOODWILL, LOST PROFITS, LOST SALES
67
+ OR BUSINESS, WORK STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, LOST CONTENT,
68
+ DATA OR DATA USE, BREACH OF DUTY OF GOOD FAITH, OR ANY AND ALL OTHER DAMAGES
69
+ OR LOSSES OF ANY KIND OR NATURE WHATSOEVER (WHETHER DIRECT, INDIRECT, SPECIAL,
70
+ COLLATERAL, INCIDENTAL, CONSEQUENTIAL OR OTHERWISE) ARISING OUT OF OR IN
71
+ CONNECTION WITH THE SOFTWARE OR THIS LICENSE, EVEN IF SUCH PARTY SHALL HAVE
72
+ BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES.
73
+
74
+ ### Trademark
75
+
76
+ Except for displaying the License Details and identifying us as the origin of
77
+ the Software, you have no right under these Terms and Conditions to use our
78
+ trademarks, trade names, service marks or product names.
79
+
80
+ ## Change Date
81
+
82
+ Four years from the date the Software is made available under these Terms and
83
+ Conditions: **2030-04-08**
84
+
85
+ ## Change License
86
+
87
+ MIT License
88
+
89
+ ## License Details
90
+
91
+ | Parameter | Value |
92
+ |---|---|
93
+ | Licensor | Mindstone Learning Limited |
94
+ | Software | Browser Automation MCP Server |
95
+ | Use Limitation | Competing Use |
96
+ | Change Date | 2030-04-08 |
97
+ | Change License | MIT |
package/README.md ADDED
@@ -0,0 +1,95 @@
1
+ # Browser Automation MCP Server
2
+
3
+ Headless browser control via accessibility snapshots — navigate pages, fill forms, click elements, take screenshots, and manage tabs using the [agent-browser](https://www.npmjs.com/package/agent-browser) CLI.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ npx -y @mindstone-engineering/mcp-server-browser-automation
9
+ ```
10
+
11
+ Or install globally:
12
+
13
+ ```bash
14
+ npm install -g @mindstone-engineering/mcp-server-browser-automation
15
+ mcp-server-browser-automation
16
+ ```
17
+
18
+ ## Requirements
19
+
20
+ This server requires the `agent-browser` CLI binary to control the browser.
21
+
22
+ ### Binary Resolution
23
+
24
+ 1. **PATH lookup** (preferred): If `agent-browser` is on your PATH, it is used directly.
25
+ 2. **npx fallback**: If the binary is not found, the server automatically falls back to `npx -y agent-browser@0.17`.
26
+
27
+ ### Installing agent-browser
28
+
29
+ ```bash
30
+ npm install -g agent-browser
31
+ ```
32
+
33
+ Or let the npx fallback handle it automatically (slower on first use due to download).
34
+
35
+ ## Configuration
36
+
37
+ No API keys or credentials are required. The server communicates with the browser via the agent-browser CLI.
38
+
39
+ | Variable | Required | Description |
40
+ |---|---|---|
41
+ | `AGENT_BROWSER_SESSION_NAME` | No | Session name for browser persistence (default: `mcp`) |
42
+
43
+ ### MCP Host Configuration
44
+
45
+ ```json
46
+ {
47
+ "mcpServers": {
48
+ "browser-automation": {
49
+ "command": "npx",
50
+ "args": ["-y", "@mindstone-engineering/mcp-server-browser-automation"]
51
+ }
52
+ }
53
+ }
54
+ ```
55
+
56
+ ## Available Tools (18)
57
+
58
+ ### Navigation
59
+ - **browser_navigate** — Navigate to a URL
60
+ - **browser_back** — Navigate back in browser history
61
+ - **browser_forward** — Navigate forward in browser history
62
+ - **browser_wait** — Wait for an element to appear or a specified time
63
+
64
+ ### Observation
65
+ - **browser_snapshot** — Get the page accessibility tree with interactive element references
66
+ - **browser_screenshot** — Take a screenshot of the current page
67
+ - **browser_get_page_info** — Get the current page URL and title
68
+
69
+ ### Interaction
70
+ - **browser_click** — Click an element using @ref or CSS selector
71
+ - **browser_fill** — Clear a field and fill it with text
72
+ - **browser_type** — Type text character by character (real keystrokes)
73
+ - **browser_press_key** — Press a keyboard key
74
+ - **browser_scroll** — Scroll the page in a direction
75
+ - **browser_select** — Select an option from a dropdown
76
+ - **browser_hover** — Hover over an element
77
+ - **browser_evaluate** — Execute JavaScript in the page context
78
+
79
+ ### Session Management
80
+ - **browser_tabs** — List open tabs or switch to a tab
81
+ - **browser_close** — Close the browser session
82
+ - **browser_authenticate** — Open a visible browser for manual login
83
+
84
+ ## Workflow
85
+
86
+ The typical workflow uses accessibility snapshots for reliable element targeting:
87
+
88
+ 1. `browser_navigate` → open a page
89
+ 2. `browser_snapshot` → see interactive elements with @ref IDs
90
+ 3. `browser_click` / `browser_fill` → interact using @ref references
91
+ 4. `browser_screenshot` → visual verification
92
+
93
+ ## License
94
+
95
+ FSL-1.1-MIT
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Path to bridge state file, supporting both current and legacy env vars.
3
+ */
4
+ export declare const BRIDGE_STATE_PATH: string;
5
+ /**
6
+ * Send a request to the host app bridge.
7
+ *
8
+ * The bridge is an HTTP server running inside the host app (e.g. the host application)
9
+ * that handles credential management and other cross-process operations.
10
+ */
11
+ export declare const bridgeRequest: (urlPath: string, body: Record<string, unknown>) => Promise<{
12
+ success: boolean;
13
+ warning?: string;
14
+ error?: string;
15
+ }>;
16
+ //# sourceMappingURL=bridge.d.ts.map
package/dist/bridge.js ADDED
@@ -0,0 +1,40 @@
1
+ import * as fs from 'fs';
2
+ import { REQUEST_TIMEOUT_MS } from './types.js';
3
+ /**
4
+ * Path to bridge state file, supporting both current and legacy env vars.
5
+ */
6
+ export const BRIDGE_STATE_PATH = process.env.MCP_HOST_BRIDGE_STATE || process.env.MINDSTONE_REBEL_BRIDGE_STATE || '';
7
+ const loadBridgeState = () => {
8
+ if (!BRIDGE_STATE_PATH)
9
+ return null;
10
+ try {
11
+ const raw = fs.readFileSync(BRIDGE_STATE_PATH, 'utf8');
12
+ return JSON.parse(raw);
13
+ }
14
+ catch {
15
+ return null;
16
+ }
17
+ };
18
+ /**
19
+ * Send a request to the host app bridge.
20
+ *
21
+ * The bridge is an HTTP server running inside the host app (e.g. the host application)
22
+ * that handles credential management and other cross-process operations.
23
+ */
24
+ export const bridgeRequest = async (urlPath, body) => {
25
+ const bridge = loadBridgeState();
26
+ if (!bridge) {
27
+ return { success: false, error: 'Bridge not available' };
28
+ }
29
+ const response = await fetch(`http://127.0.0.1:${bridge.port}${urlPath}`, {
30
+ method: 'POST',
31
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS),
32
+ headers: {
33
+ 'Content-Type': 'application/json',
34
+ Authorization: `Bearer ${bridge.token}`,
35
+ },
36
+ body: JSON.stringify(body),
37
+ });
38
+ return response.json();
39
+ };
40
+ //# sourceMappingURL=bridge.js.map
@@ -0,0 +1,21 @@
1
+ export interface ExecResult {
2
+ stdout: string;
3
+ stderr: string;
4
+ }
5
+ export interface ExecOptions {
6
+ timeoutMs?: number;
7
+ headed?: boolean;
8
+ }
9
+ /**
10
+ * Execute an agent-browser CLI command.
11
+ *
12
+ * Falls back to `npx -y agent-browser@0.17` if the binary is not on PATH.
13
+ * Uses execFile (no shell) to prevent command injection.
14
+ */
15
+ export declare function execAgentBrowser(args: string[], options?: ExecOptions): Promise<ExecResult>;
16
+ /**
17
+ * Reset the resolved binary cache.
18
+ * Primarily used for testing to reset state between test runs.
19
+ */
20
+ export declare function resetBinaryCache(): void;
21
+ //# sourceMappingURL=browser-client.d.ts.map
@@ -0,0 +1,82 @@
1
+ import { execFile } from 'node:child_process';
2
+ import { promisify } from 'node:util';
3
+ import { ConnectorError, DEFAULT_TIMEOUT_MS, SESSION_NAME } from './types.js';
4
+ const execFileAsync = promisify(execFile);
5
+ let resolvedBinary = null;
6
+ function resolveAgentBrowser() {
7
+ if (resolvedBinary)
8
+ return resolvedBinary;
9
+ // Default to the binary name — execFile will search PATH.
10
+ // If not found (ENOENT), the caller falls back to npx.
11
+ resolvedBinary = 'agent-browser';
12
+ return resolvedBinary;
13
+ }
14
+ function buildEnv() {
15
+ const env = { ...process.env };
16
+ // Always use session persistence
17
+ if (!env.AGENT_BROWSER_SESSION_NAME) {
18
+ env.AGENT_BROWSER_SESSION_NAME = SESSION_NAME;
19
+ }
20
+ return env;
21
+ }
22
+ /**
23
+ * Execute an agent-browser CLI command.
24
+ *
25
+ * Falls back to `npx -y agent-browser@0.17` if the binary is not on PATH.
26
+ * Uses execFile (no shell) to prevent command injection.
27
+ */
28
+ export async function execAgentBrowser(args, options) {
29
+ const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
30
+ const env = buildEnv();
31
+ if (options?.headed) {
32
+ args = ['--headed', ...args];
33
+ }
34
+ else {
35
+ args = ['--headless', ...args];
36
+ }
37
+ const binary = resolveAgentBrowser();
38
+ try {
39
+ // execFile is safe against command injection (no shell interpretation)
40
+ const result = await execFileAsync(binary, args, {
41
+ env,
42
+ timeout: timeoutMs,
43
+ maxBuffer: 10 * 1024 * 1024, // 10MB for large snapshots
44
+ });
45
+ return { stdout: result.stdout, stderr: result.stderr ?? '' };
46
+ }
47
+ catch (error) {
48
+ const err = error;
49
+ // Binary not found — try npx fallback
50
+ if (err.code === 'ENOENT') {
51
+ try {
52
+ const npxResult = await execFileAsync('npx', ['-y', 'agent-browser@0.17', ...args], {
53
+ env,
54
+ timeout: timeoutMs + 15_000, // extra time for npx install
55
+ maxBuffer: 10 * 1024 * 1024,
56
+ });
57
+ return { stdout: npxResult.stdout, stderr: npxResult.stderr ?? '' };
58
+ }
59
+ catch (npxError) {
60
+ const npxErr = npxError;
61
+ throw new ConnectorError(`agent-browser not found and npx fallback failed: ${npxErr.message ?? String(npxErr)}`, 'BINARY_NOT_FOUND', 'Install agent-browser: npm install -g agent-browser\n' +
62
+ 'Or ensure npx is available on PATH.');
63
+ }
64
+ }
65
+ // Timeout
66
+ if (err.code === 'ERR_CHILD_PROCESS_STDIO_MAXBUFFER' || err.killed) {
67
+ throw new ConnectorError(`Command timed out after ${timeoutMs}ms: agent-browser ${args.join(' ')}`, 'TIMEOUT', 'The browser operation took too long. Try a simpler action or increase the timeout.');
68
+ }
69
+ // Other errors — include stderr for diagnostics
70
+ const stderr = err.stderr?.trim() ?? '';
71
+ const stdout = err.stdout?.trim() ?? '';
72
+ throw new ConnectorError(stderr || stdout || err.message || String(error), 'CLI_ERROR', 'The agent-browser CLI command failed. Check that agent-browser is installed and the browser session is active.');
73
+ }
74
+ }
75
+ /**
76
+ * Reset the resolved binary cache.
77
+ * Primarily used for testing to reset state between test runs.
78
+ */
79
+ export function resetBinaryCache() {
80
+ resolvedBinary = null;
81
+ }
82
+ //# sourceMappingURL=browser-client.js.map
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Browser Automation MCP Server
4
+ *
5
+ * Provides headless browser automation via the agent-browser CLI.
6
+ * Uses accessibility snapshots (@ref pointers) instead of fragile CSS selectors.
7
+ * Sessions persist automatically between invocations.
8
+ *
9
+ * Requirements:
10
+ * - agent-browser CLI binary on PATH, or npx available for fallback
11
+ *
12
+ * Environment variables:
13
+ * - AGENT_BROWSER_SESSION_NAME: Session name for persistence (default: "mcp")
14
+ */
15
+ export {};
16
+ //# sourceMappingURL=index.d.ts.map
package/dist/index.js ADDED
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Browser Automation MCP Server
4
+ *
5
+ * Provides headless browser automation via the agent-browser CLI.
6
+ * Uses accessibility snapshots (@ref pointers) instead of fragile CSS selectors.
7
+ * Sessions persist automatically between invocations.
8
+ *
9
+ * Requirements:
10
+ * - agent-browser CLI binary on PATH, or npx available for fallback
11
+ *
12
+ * Environment variables:
13
+ * - AGENT_BROWSER_SESSION_NAME: Session name for persistence (default: "mcp")
14
+ */
15
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
16
+ import { createServer } from './server.js';
17
+ async function main() {
18
+ const server = createServer();
19
+ const transport = new StdioServerTransport();
20
+ await server.connect(transport);
21
+ console.error('Browser Automation MCP server running on stdio');
22
+ }
23
+ main().catch((error) => {
24
+ console.error('Fatal error:', error);
25
+ process.exit(1);
26
+ });
27
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,3 @@
1
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ export declare function createServer(): McpServer;
3
+ //# sourceMappingURL=server.d.ts.map
package/dist/server.js ADDED
@@ -0,0 +1,15 @@
1
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import { SERVER_NAME, SERVER_VERSION } from './types.js';
3
+ import { registerNavigationTools, registerInteractionTools, registerObservationTools, registerSessionTools, } from './tools/index.js';
4
+ export function createServer() {
5
+ const server = new McpServer({
6
+ name: SERVER_NAME,
7
+ version: SERVER_VERSION,
8
+ });
9
+ registerNavigationTools(server);
10
+ registerInteractionTools(server);
11
+ registerObservationTools(server);
12
+ registerSessionTools(server);
13
+ return server;
14
+ }
15
+ //# sourceMappingURL=server.js.map
@@ -0,0 +1,5 @@
1
+ export { registerNavigationTools } from './navigation.js';
2
+ export { registerInteractionTools } from './interaction.js';
3
+ export { registerObservationTools } from './observation.js';
4
+ export { registerSessionTools } from './session.js';
5
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,5 @@
1
+ export { registerNavigationTools } from './navigation.js';
2
+ export { registerInteractionTools } from './interaction.js';
3
+ export { registerObservationTools } from './observation.js';
4
+ export { registerSessionTools } from './session.js';
5
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,3 @@
1
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ export declare function registerInteractionTools(server: McpServer): void;
3
+ //# sourceMappingURL=interaction.d.ts.map
@@ -0,0 +1,135 @@
1
+ import { z } from 'zod';
2
+ import { execAgentBrowser } from '../browser-client.js';
3
+ import { withErrorHandling } from '../utils.js';
4
+ export function registerInteractionTools(server) {
5
+ server.registerTool('browser_click', {
6
+ description: `Click an element. Use @ref from browser_snapshot (preferred) or a CSS selector.
7
+
8
+ WORKFLOW: browser_snapshot → find @ref → browser_click @ref`,
9
+ inputSchema: {
10
+ ref: z.string().describe('Element ref from snapshot (e.g., "@e2") or CSS selector'),
11
+ },
12
+ annotations: {
13
+ readOnlyHint: false,
14
+ destructiveHint: false,
15
+ idempotentHint: false,
16
+ openWorldHint: true,
17
+ },
18
+ }, withErrorHandling(async (args) => {
19
+ await execAgentBrowser(['click', args.ref]);
20
+ return JSON.stringify({ ok: true, message: `Clicked: ${args.ref}` });
21
+ }));
22
+ server.registerTool('browser_fill', {
23
+ description: `Clear a field and fill it with text. Use @ref from browser_snapshot.
24
+
25
+ WORKFLOW: browser_snapshot → find input @ref → browser_fill`,
26
+ inputSchema: {
27
+ ref: z.string().describe('Element ref (e.g., "@e3") or CSS selector'),
28
+ value: z.string().describe('Text to fill'),
29
+ },
30
+ annotations: {
31
+ readOnlyHint: false,
32
+ destructiveHint: false,
33
+ idempotentHint: false,
34
+ openWorldHint: true,
35
+ },
36
+ }, withErrorHandling(async (args) => {
37
+ await execAgentBrowser(['fill', args.ref, args.value]);
38
+ return JSON.stringify({ ok: true, message: `Filled ${args.ref} with ${args.value.length} characters` });
39
+ }));
40
+ server.registerTool('browser_type', {
41
+ description: 'Type text character by character (simulates real keystrokes). Useful for search boxes and autocompletes that respond to individual key events.',
42
+ inputSchema: {
43
+ ref: z.string().describe('Element ref or CSS selector'),
44
+ text: z.string().describe('Text to type'),
45
+ },
46
+ annotations: {
47
+ readOnlyHint: false,
48
+ destructiveHint: false,
49
+ idempotentHint: false,
50
+ openWorldHint: true,
51
+ },
52
+ }, withErrorHandling(async (args) => {
53
+ await execAgentBrowser(['type', args.ref, args.text]);
54
+ return JSON.stringify({ ok: true, message: `Typed ${args.text.length} characters into ${args.ref}` });
55
+ }));
56
+ server.registerTool('browser_press_key', {
57
+ description: 'Press a keyboard key. Common keys: Enter, Tab, Escape, Backspace, ArrowDown, ArrowUp.',
58
+ inputSchema: {
59
+ key: z.string().describe('Key to press (e.g., "Enter", "Tab", "Escape")'),
60
+ },
61
+ annotations: {
62
+ readOnlyHint: false,
63
+ destructiveHint: false,
64
+ idempotentHint: false,
65
+ openWorldHint: true,
66
+ },
67
+ }, withErrorHandling(async (args) => {
68
+ await execAgentBrowser(['press', args.key]);
69
+ return JSON.stringify({ ok: true, message: `Pressed key: ${args.key}` });
70
+ }));
71
+ server.registerTool('browser_scroll', {
72
+ description: 'Scroll the page in a direction.',
73
+ inputSchema: {
74
+ direction: z.enum(['up', 'down', 'left', 'right']).describe('Scroll direction'),
75
+ amount: z.number().optional().default(500).describe('Pixels to scroll (default: 500)'),
76
+ },
77
+ annotations: {
78
+ readOnlyHint: false,
79
+ destructiveHint: false,
80
+ idempotentHint: false,
81
+ openWorldHint: true,
82
+ },
83
+ }, withErrorHandling(async (args) => {
84
+ const px = args.amount ?? 500;
85
+ await execAgentBrowser(['scroll', args.direction, String(px)]);
86
+ return JSON.stringify({ ok: true, message: `Scrolled ${args.direction} ${px}px` });
87
+ }));
88
+ server.registerTool('browser_select', {
89
+ description: 'Select an option from a dropdown.',
90
+ inputSchema: {
91
+ ref: z.string().describe('Element ref or CSS selector for the <select>'),
92
+ value: z.string().describe('Option value or visible text to select'),
93
+ },
94
+ annotations: {
95
+ readOnlyHint: false,
96
+ destructiveHint: false,
97
+ idempotentHint: false,
98
+ openWorldHint: true,
99
+ },
100
+ }, withErrorHandling(async (args) => {
101
+ await execAgentBrowser(['select', args.ref, args.value]);
102
+ return JSON.stringify({ ok: true, message: `Selected "${args.value}" in ${args.ref}` });
103
+ }));
104
+ server.registerTool('browser_hover', {
105
+ description: 'Hover over an element (triggers hover menus/tooltips).',
106
+ inputSchema: {
107
+ ref: z.string().describe('Element ref or CSS selector'),
108
+ },
109
+ annotations: {
110
+ readOnlyHint: true,
111
+ destructiveHint: false,
112
+ idempotentHint: true,
113
+ openWorldHint: true,
114
+ },
115
+ }, withErrorHandling(async (args) => {
116
+ await execAgentBrowser(['hover', args.ref]);
117
+ return JSON.stringify({ ok: true, message: `Hovering over ${args.ref}` });
118
+ }));
119
+ server.registerTool('browser_evaluate', {
120
+ description: 'Execute JavaScript in the page context and return the result.',
121
+ inputSchema: {
122
+ script: z.string().describe('JavaScript code to execute'),
123
+ },
124
+ annotations: {
125
+ readOnlyHint: false,
126
+ destructiveHint: false,
127
+ idempotentHint: false,
128
+ openWorldHint: true,
129
+ },
130
+ }, withErrorHandling(async (args) => {
131
+ const result = await execAgentBrowser(['eval', args.script]);
132
+ return JSON.stringify({ ok: true, result: result.stdout.trim() });
133
+ }));
134
+ }
135
+ //# sourceMappingURL=interaction.js.map
@@ -0,0 +1,3 @@
1
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ export declare function registerNavigationTools(server: McpServer): void;
3
+ //# sourceMappingURL=navigation.d.ts.map
@@ -0,0 +1,72 @@
1
+ import { z } from 'zod';
2
+ import { execAgentBrowser } from '../browser-client.js';
3
+ import { withErrorHandling } from '../utils.js';
4
+ export function registerNavigationTools(server) {
5
+ server.registerTool('browser_navigate', {
6
+ description: `Navigate to a URL. Opens the browser if not already running.
7
+
8
+ IMPORTANT: After navigating, call browser_snapshot to see the page content before interacting.`,
9
+ inputSchema: {
10
+ url: z.string().describe('URL to navigate to'),
11
+ },
12
+ annotations: {
13
+ readOnlyHint: false,
14
+ destructiveHint: false,
15
+ idempotentHint: false,
16
+ openWorldHint: true,
17
+ },
18
+ }, withErrorHandling(async (args) => {
19
+ await execAgentBrowser(['open', args.url]);
20
+ const titleResult = await execAgentBrowser(['get', 'title']).catch(() => ({ stdout: '', stderr: '' }));
21
+ return JSON.stringify({
22
+ ok: true,
23
+ message: `Navigated to ${args.url}`,
24
+ title: titleResult.stdout.trim(),
25
+ hint: 'Call browser_snapshot to see page elements before interacting.',
26
+ });
27
+ }));
28
+ server.registerTool('browser_back', {
29
+ description: 'Navigate back in browser history.',
30
+ inputSchema: {},
31
+ annotations: {
32
+ readOnlyHint: false,
33
+ destructiveHint: false,
34
+ idempotentHint: false,
35
+ openWorldHint: true,
36
+ },
37
+ }, withErrorHandling(async () => {
38
+ await execAgentBrowser(['back']);
39
+ return JSON.stringify({ ok: true, message: 'Navigated back' });
40
+ }));
41
+ server.registerTool('browser_forward', {
42
+ description: 'Navigate forward in browser history.',
43
+ inputSchema: {},
44
+ annotations: {
45
+ readOnlyHint: false,
46
+ destructiveHint: false,
47
+ idempotentHint: false,
48
+ openWorldHint: true,
49
+ },
50
+ }, withErrorHandling(async () => {
51
+ await execAgentBrowser(['forward']);
52
+ return JSON.stringify({ ok: true, message: 'Navigated forward' });
53
+ }));
54
+ server.registerTool('browser_wait', {
55
+ description: 'Wait for an element to appear or for a specified time.',
56
+ inputSchema: {
57
+ selector: z.string().describe('CSS selector to wait for, or milliseconds (e.g., "2000")'),
58
+ timeout: z.number().optional().default(10000).describe('Max wait time in ms (default: 10000)'),
59
+ },
60
+ annotations: {
61
+ readOnlyHint: true,
62
+ destructiveHint: false,
63
+ idempotentHint: true,
64
+ openWorldHint: true,
65
+ },
66
+ }, withErrorHandling(async (args) => {
67
+ const timeoutMs = args.timeout ?? 10_000;
68
+ await execAgentBrowser(['wait', args.selector], { timeoutMs: timeoutMs + 2000 });
69
+ return JSON.stringify({ ok: true, message: `Wait completed for: ${args.selector}` });
70
+ }));
71
+ }
72
+ //# sourceMappingURL=navigation.js.map
@@ -0,0 +1,3 @@
1
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ export declare function registerObservationTools(server: McpServer): void;
3
+ //# sourceMappingURL=observation.d.ts.map
@@ -0,0 +1,81 @@
1
+ import { z } from 'zod';
2
+ import { execAgentBrowser } from '../browser-client.js';
3
+ import { withErrorHandling, withErrorHandlingRaw } from '../utils.js';
4
+ import { SNAPSHOT_TIMEOUT_MS, SCREENSHOT_TIMEOUT_MS } from '../types.js';
5
+ export function registerObservationTools(server) {
6
+ server.registerTool('browser_snapshot', {
7
+ description: `Get the page accessibility tree with interactive element references.
8
+
9
+ THIS IS YOUR PRIMARY DISCOVERY TOOL. Always call this before clicking, filling, or interacting with the page.
10
+
11
+ Returns element refs like @e1, @e2 that you use with browser_click, browser_fill, etc.
12
+ Use the -i flag (default) to see only interactive elements, keeping output focused.`,
13
+ inputSchema: {
14
+ full: z.boolean().optional().default(false).describe('If true, show all elements (not just interactive). Default: false.'),
15
+ },
16
+ annotations: {
17
+ readOnlyHint: true,
18
+ destructiveHint: false,
19
+ idempotentHint: true,
20
+ openWorldHint: true,
21
+ },
22
+ }, withErrorHandling(async (args) => {
23
+ const cliArgs = args.full ? ['snapshot'] : ['snapshot', '-i'];
24
+ const result = await execAgentBrowser(cliArgs, { timeoutMs: SNAPSHOT_TIMEOUT_MS });
25
+ return JSON.stringify({ ok: true, snapshot: result.stdout });
26
+ }));
27
+ server.registerTool('browser_screenshot', {
28
+ description: 'Take a screenshot of the current page. Returns an image.',
29
+ inputSchema: {
30
+ full_page: z.boolean().optional().default(false).describe('Capture full scrollable page'),
31
+ annotate: z.boolean().optional().default(false).describe('Add numbered element labels to the screenshot'),
32
+ },
33
+ annotations: {
34
+ readOnlyHint: true,
35
+ destructiveHint: false,
36
+ idempotentHint: true,
37
+ openWorldHint: true,
38
+ },
39
+ }, withErrorHandlingRaw(async (args) => {
40
+ const cliArgs = ['screenshot'];
41
+ if (args.full_page)
42
+ cliArgs.push('--full');
43
+ if (args.annotate)
44
+ cliArgs.push('--annotate');
45
+ cliArgs.push('-'); // output to stdout
46
+ const result = await execAgentBrowser(cliArgs, { timeoutMs: SCREENSHOT_TIMEOUT_MS });
47
+ const data = result.stdout.trim();
48
+ // agent-browser outputs base64 PNG when piped to stdout
49
+ if (data.length > 100) {
50
+ return {
51
+ content: [{
52
+ type: 'image',
53
+ data,
54
+ mimeType: 'image/png',
55
+ }],
56
+ };
57
+ }
58
+ return {
59
+ content: [{ type: 'text', text: JSON.stringify({ ok: true, message: 'Screenshot taken', note: data }) }],
60
+ };
61
+ }));
62
+ server.registerTool('browser_get_page_info', {
63
+ description: 'Get the current page URL and title.',
64
+ inputSchema: {},
65
+ annotations: {
66
+ readOnlyHint: true,
67
+ destructiveHint: false,
68
+ idempotentHint: true,
69
+ openWorldHint: true,
70
+ },
71
+ }, withErrorHandling(async () => {
72
+ const urlResult = await execAgentBrowser(['get', 'url']);
73
+ const titleResult = await execAgentBrowser(['get', 'title']);
74
+ return JSON.stringify({
75
+ ok: true,
76
+ url: urlResult.stdout.trim(),
77
+ title: titleResult.stdout.trim(),
78
+ });
79
+ }));
80
+ }
81
+ //# sourceMappingURL=observation.js.map
@@ -0,0 +1,3 @@
1
+ import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ export declare function registerSessionTools(server: McpServer): void;
3
+ //# sourceMappingURL=session.d.ts.map
@@ -0,0 +1,63 @@
1
+ import { z } from 'zod';
2
+ import { execAgentBrowser } from '../browser-client.js';
3
+ import { withErrorHandling } from '../utils.js';
4
+ export function registerSessionTools(server) {
5
+ server.registerTool('browser_tabs', {
6
+ description: 'List open tabs or switch to a tab by number.',
7
+ inputSchema: {
8
+ action: z.enum(['list', 'new', 'close']).optional().describe('Tab action. Omit to list tabs.'),
9
+ tab_number: z.number().optional().describe('Tab number to switch to (from tab list)'),
10
+ },
11
+ annotations: {
12
+ readOnlyHint: false,
13
+ destructiveHint: false,
14
+ idempotentHint: false,
15
+ openWorldHint: true,
16
+ },
17
+ }, withErrorHandling(async (args) => {
18
+ if (args.tab_number !== undefined) {
19
+ await execAgentBrowser(['tab', String(args.tab_number)]);
20
+ return JSON.stringify({ ok: true, message: `Switched to tab ${args.tab_number}` });
21
+ }
22
+ const cliAction = args.action ?? 'list';
23
+ const result = await execAgentBrowser(['tab', cliAction]);
24
+ return JSON.stringify({ ok: true, tabs: result.stdout.trim() });
25
+ }));
26
+ server.registerTool('browser_close', {
27
+ description: 'Close the browser session. Sessions are saved automatically.',
28
+ inputSchema: {},
29
+ annotations: {
30
+ readOnlyHint: false,
31
+ destructiveHint: true,
32
+ idempotentHint: true,
33
+ openWorldHint: false,
34
+ },
35
+ }, withErrorHandling(async () => {
36
+ await execAgentBrowser(['close']);
37
+ return JSON.stringify({ ok: true, message: 'Browser session closed. Sessions are saved automatically.' });
38
+ }));
39
+ server.registerTool('browser_authenticate', {
40
+ description: `Open a visible browser window so the user can log in manually. The session is saved automatically.
41
+
42
+ WHEN TO USE: "I need to access LinkedIn", "Log me into WhatsApp", etc.
43
+ Tell the user to close the browser when done logging in, or call browser_close.`,
44
+ inputSchema: {
45
+ url: z.string().describe('Website URL to open for login'),
46
+ },
47
+ annotations: {
48
+ readOnlyHint: false,
49
+ destructiveHint: false,
50
+ idempotentHint: false,
51
+ openWorldHint: true,
52
+ },
53
+ }, withErrorHandling(async (args) => {
54
+ await execAgentBrowser(['open', args.url], { headed: true });
55
+ return JSON.stringify({
56
+ ok: true,
57
+ url: args.url,
58
+ message: `Browser opened to ${args.url} in visible mode. The user should log in manually. Their session will be saved automatically when the browser is closed.`,
59
+ next_step: 'Tell the user to log in and close the browser when done, or call browser_close.',
60
+ });
61
+ }));
62
+ }
63
+ //# sourceMappingURL=session.js.map
@@ -0,0 +1,12 @@
1
+ export declare const SERVER_NAME = "browser-automation-mcp-server";
2
+ export declare const SERVER_VERSION = "0.1.0";
3
+ export declare const DEFAULT_TIMEOUT_MS = 30000;
4
+ export declare const SNAPSHOT_TIMEOUT_MS = 15000;
5
+ export declare const SCREENSHOT_TIMEOUT_MS = 15000;
6
+ export declare const SESSION_NAME = "mcp";
7
+ export declare class ConnectorError extends Error {
8
+ readonly code: string;
9
+ readonly resolution: string;
10
+ constructor(message: string, code: string, resolution: string);
11
+ }
12
+ //# sourceMappingURL=types.d.ts.map
package/dist/types.js ADDED
@@ -0,0 +1,17 @@
1
+ export const SERVER_NAME = 'browser-automation-mcp-server';
2
+ export const SERVER_VERSION = '0.1.0';
3
+ export const DEFAULT_TIMEOUT_MS = 30_000;
4
+ export const SNAPSHOT_TIMEOUT_MS = 15_000;
5
+ export const SCREENSHOT_TIMEOUT_MS = 15_000;
6
+ export const SESSION_NAME = 'mcp';
7
+ export class ConnectorError extends Error {
8
+ code;
9
+ resolution;
10
+ constructor(message, code, resolution) {
11
+ super(message);
12
+ this.code = code;
13
+ this.resolution = resolution;
14
+ this.name = 'ConnectorError';
15
+ }
16
+ }
17
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1,18 @@
1
+ import type { CallToolResult } from '@modelcontextprotocol/sdk/types.js';
2
+ type ToolHandler<T> = (args: T, extra: unknown) => Promise<CallToolResult>;
3
+ /**
4
+ * Wraps a tool handler with standard error handling.
5
+ *
6
+ * - On success: returns the string result as a text content block.
7
+ * - On ConnectorError: returns a structured JSON error with code and resolution.
8
+ * - On unknown error: returns a generic error message.
9
+ *
10
+ * Secrets are never exposed in error messages.
11
+ */
12
+ export declare function withErrorHandling<T>(fn: (args: T, extra: unknown) => Promise<string>): ToolHandler<T>;
13
+ /**
14
+ * Wraps a tool handler that returns a CallToolResult directly (e.g. for image responses).
15
+ */
16
+ export declare function withErrorHandlingRaw<T>(fn: (args: T, extra: unknown) => Promise<CallToolResult>): ToolHandler<T>;
17
+ export {};
18
+ //# sourceMappingURL=utils.d.ts.map
package/dist/utils.js ADDED
@@ -0,0 +1,75 @@
1
+ import { ConnectorError } from './types.js';
2
+ /**
3
+ * Wraps a tool handler with standard error handling.
4
+ *
5
+ * - On success: returns the string result as a text content block.
6
+ * - On ConnectorError: returns a structured JSON error with code and resolution.
7
+ * - On unknown error: returns a generic error message.
8
+ *
9
+ * Secrets are never exposed in error messages.
10
+ */
11
+ export function withErrorHandling(fn) {
12
+ return async (args, extra) => {
13
+ try {
14
+ const result = await fn(args, extra);
15
+ return { content: [{ type: 'text', text: result }] };
16
+ }
17
+ catch (error) {
18
+ if (error instanceof ConnectorError) {
19
+ return {
20
+ content: [
21
+ {
22
+ type: 'text',
23
+ text: JSON.stringify({
24
+ ok: false,
25
+ error: error.message,
26
+ code: error.code,
27
+ resolution: error.resolution,
28
+ }),
29
+ },
30
+ ],
31
+ isError: true,
32
+ };
33
+ }
34
+ const errorMessage = error instanceof Error ? error.message : String(error);
35
+ return {
36
+ content: [{ type: 'text', text: JSON.stringify({ ok: false, error: errorMessage }) }],
37
+ isError: true,
38
+ };
39
+ }
40
+ };
41
+ }
42
+ /**
43
+ * Wraps a tool handler that returns a CallToolResult directly (e.g. for image responses).
44
+ */
45
+ export function withErrorHandlingRaw(fn) {
46
+ return async (args, extra) => {
47
+ try {
48
+ return await fn(args, extra);
49
+ }
50
+ catch (error) {
51
+ if (error instanceof ConnectorError) {
52
+ return {
53
+ content: [
54
+ {
55
+ type: 'text',
56
+ text: JSON.stringify({
57
+ ok: false,
58
+ error: error.message,
59
+ code: error.code,
60
+ resolution: error.resolution,
61
+ }),
62
+ },
63
+ ],
64
+ isError: true,
65
+ };
66
+ }
67
+ const errorMessage = error instanceof Error ? error.message : String(error);
68
+ return {
69
+ content: [{ type: 'text', text: JSON.stringify({ ok: false, error: errorMessage }) }],
70
+ isError: true,
71
+ };
72
+ }
73
+ };
74
+ }
75
+ //# sourceMappingURL=utils.js.map
package/package.json ADDED
@@ -0,0 +1,48 @@
1
+ {
2
+ "name": "@mindstone-engineering/mcp-server-browser-automation",
3
+ "version": "0.1.0",
4
+ "description": "Browser automation MCP server — headless browser control via accessibility snapshots, navigation, form filling, screenshots, and tab management",
5
+ "license": "FSL-1.1-MIT",
6
+ "type": "module",
7
+ "bin": {
8
+ "mcp-server-browser-automation": "dist/index.js"
9
+ },
10
+ "files": [
11
+ "dist",
12
+ "!dist/**/*.map"
13
+ ],
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "https://github.com/nspr-io/mcp-servers.git",
17
+ "directory": "connectors/browser-automation"
18
+ },
19
+ "homepage": "https://github.com/nspr-io/mcp-servers/tree/main/connectors/browser-automation",
20
+ "publishConfig": {
21
+ "access": "public"
22
+ },
23
+ "scripts": {
24
+ "build": "tsc && shx chmod +x dist/index.js",
25
+ "prepare": "npm run build",
26
+ "watch": "tsc --watch",
27
+ "start": "node dist/index.js",
28
+ "test": "vitest run",
29
+ "test:watch": "vitest",
30
+ "test:coverage": "vitest run --coverage"
31
+ },
32
+ "dependencies": {
33
+ "@modelcontextprotocol/sdk": "^1.26.0",
34
+ "zod": "^3.23.0"
35
+ },
36
+ "devDependencies": {
37
+ "@mindstone-engineering/mcp-test-harness": "file:../../test-harness",
38
+ "@types/node": "^22",
39
+ "@vitest/coverage-v8": "^4.1.3",
40
+ "msw": "^2.13.2",
41
+ "shx": "^0.3.4",
42
+ "typescript": "^5.8.2",
43
+ "vitest": "^4.1.3"
44
+ },
45
+ "engines": {
46
+ "node": ">=20"
47
+ }
48
+ }