@hypothesi/tauri-mcp-server 0.8.3 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,7 @@ A **Model Context Protocol (MCP) server** that enables AI assistants like Claude
11
11
 
12
12
  | Category | Capabilities |
13
13
  |----------|-------------|
14
- | 🎯 **UI Automation** | Screenshots, clicks, typing, scrolling, element finding |
14
+ | 🎯 **UI Automation** | Screenshots, clicks, typing, scrolling, element finding, visual element picker |
15
15
  | 🔍 **IPC Monitoring** | Capture and inspect Tauri IPC calls in real-time |
16
16
  | 📱 **Mobile Dev** | List Android emulators & iOS simulators |
17
17
  | 📋 **Logs** | Stream console, Android logcat, iOS, and system logs |
@@ -52,6 +52,18 @@ npx -y install-mcp @hypothesi/tauri-mcp-server --client claude-code
52
52
 
53
53
  Supported clients: `claude-code`, `cursor`, `windsurf`, `vscode`, `cline`, `roo-cline`, `claude`, `zed`, `goose`, `warp`, `codex`
54
54
 
55
+ ## Terminal CLI
56
+
57
+ If you want to call the same tools directly from a shell, install the companion CLI package:
58
+
59
+ ```bash
60
+ npm install -g @hypothesi/tauri-mcp-cli
61
+ tauri-mcp driver-session start --port 9223
62
+ tauri-mcp driver-session status --json
63
+ ```
64
+
65
+ The CLI keeps the underlying MCP server warm in the background so stateful commands such as `driver_session` work across separate invocations.
66
+
55
67
  ## Multi-App Support
56
68
 
57
69
  The MCP server supports connecting to multiple Tauri apps simultaneously. Each app runs on a unique port, and the most recently connected app becomes the "default" app.
@@ -86,7 +98,7 @@ await driver_session({ action: "stop", appIdentifier: 9223 })
86
98
  await driver_session({ action: "stop" })
87
99
  ```
88
100
 
89
- ## Available Tools (18 total)
101
+ ## Available Tools (20 total)
90
102
 
91
103
  ### Setup & Configuration
92
104
 
@@ -108,6 +120,8 @@ await driver_session({ action: "stop" })
108
120
  | `webview_get_styles` | Get computed CSS styles |
109
121
  | `webview_execute_js` | Execute JavaScript in webview |
110
122
  | `webview_dom_snapshot` | Get structured DOM snapshot (accessibility or structure) |
123
+ | `webview_select_element` | Visual element picker — user clicks an element, returns metadata + screenshot |
124
+ | `webview_get_pointed_element` | Get metadata for element user Alt+Shift+Clicked |
111
125
  | `manage_window` | List windows, get info, or resize |
112
126
 
113
127
  ### IPC & Plugin
package/dist/api.d.ts ADDED
@@ -0,0 +1,2 @@
1
+ export { TOOLS } from './tools-registry.js';
2
+ export { createMcpServer, getCliToolDefinitions, startStdioServer } from './server.js';
package/dist/api.js ADDED
@@ -0,0 +1,2 @@
1
+ export { TOOLS } from './tools-registry.js';
2
+ export { createMcpServer, getCliToolDefinitions, startStdioServer } from './server.js';
@@ -0,0 +1,35 @@
1
+ /**
2
+ * Configuration for the MCP Bridge connection.
3
+ *
4
+ * This module provides configuration options for connecting to Tauri apps,
5
+ * with support for environment variables and sensible defaults.
6
+ */
7
+ export interface BridgeConfig {
8
+ host: string;
9
+ port: number;
10
+ }
11
+ /**
12
+ * Gets the default host for MCP Bridge connections.
13
+ *
14
+ * Resolution priority:
15
+ * 1. MCP_BRIDGE_HOST environment variable
16
+ * 2. TAURI_DEV_HOST environment variable (set by Tauri CLI for mobile dev)
17
+ * 3. 'localhost' (default)
18
+ */
19
+ export declare function getDefaultHost(): string;
20
+ /**
21
+ * Gets the default port for MCP Bridge connections.
22
+ *
23
+ * Resolution priority:
24
+ * 1. MCP_BRIDGE_PORT environment variable
25
+ * 2. 9223 (default)
26
+ */
27
+ export declare function getDefaultPort(): number;
28
+ /**
29
+ * Gets the full bridge configuration from environment variables.
30
+ */
31
+ export declare function getConfig(): BridgeConfig;
32
+ /**
33
+ * Builds a WebSocket URL from host and port.
34
+ */
35
+ export declare function buildWebSocketURL(host: string, port: number): string;
@@ -0,0 +1,75 @@
1
+ /**
2
+ * App discovery and session management for multiple Tauri instances.
3
+ *
4
+ * This module handles discovering and connecting to multiple Tauri apps
5
+ * running with MCP Bridge on the same machine or remote devices using port scanning.
6
+ */
7
+ import { PluginClient } from './plugin-client.js';
8
+ export interface AppInstance {
9
+ host: string;
10
+ port: number;
11
+ available: boolean;
12
+ }
13
+ export interface SessionInfo {
14
+ appId: string;
15
+ name: string;
16
+ host: string;
17
+ port: number;
18
+ client?: PluginClient;
19
+ connected: boolean;
20
+ }
21
+ /**
22
+ * Manages discovery and connection to multiple Tauri app instances
23
+ */
24
+ export declare class AppDiscovery {
25
+ private _activeSessions;
26
+ private _host;
27
+ private _basePort;
28
+ private _maxPorts;
29
+ constructor(host?: string, basePort?: number);
30
+ /**
31
+ * Gets the configured host.
32
+ */
33
+ get host(): string;
34
+ /**
35
+ * Sets the host for discovery.
36
+ */
37
+ setHost(host: string): void;
38
+ /**
39
+ * Discovers available Tauri app instances by scanning ports
40
+ */
41
+ discoverApps(): Promise<AppInstance[]>;
42
+ /**
43
+ * Connects to a specific app on a host and port
44
+ */
45
+ connectToPort(port: number, appName?: string, host?: string): Promise<SessionInfo>;
46
+ /**
47
+ * Gets the first available app
48
+ */
49
+ getFirstAvailableApp(): Promise<AppInstance | null>;
50
+ /**
51
+ * Disconnects from a specific session
52
+ */
53
+ disconnectSession(sessionId: string): Promise<void>;
54
+ /**
55
+ * Disconnects from all apps
56
+ */
57
+ disconnectAll(): Promise<void>;
58
+ /**
59
+ * Gets the active session by ID
60
+ */
61
+ getSession(sessionId: string): SessionInfo | undefined;
62
+ /**
63
+ * Gets all active sessions
64
+ */
65
+ getAllSessions(): SessionInfo[];
66
+ /**
67
+ * Try to connect to the default port
68
+ */
69
+ connectToDefaultPort(): Promise<SessionInfo>;
70
+ /**
71
+ * Check if a port is in use (likely a Tauri app)
72
+ */
73
+ private _isPortInUse;
74
+ }
75
+ export declare const appDiscovery: AppDiscovery;
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Element picker module for MCP Server Tauri.
3
+ *
4
+ * Provides two tools:
5
+ * - selectElement: Agent-initiated picker overlay (user clicks element)
6
+ * - getPointedElement: Retrieve element user pointed at via Alt+Shift+Click
7
+ */
8
+ import { z } from 'zod';
9
+ import type { ToolContent } from '../tools-registry.js';
10
+ export declare const SelectElementSchema: z.ZodObject<{
11
+ windowId: z.ZodOptional<z.ZodString>;
12
+ appIdentifier: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
13
+ } & {
14
+ timeout: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
15
+ }, "strip", z.ZodTypeAny, {
16
+ timeout: number;
17
+ windowId?: string | undefined;
18
+ appIdentifier?: string | number | undefined;
19
+ }, {
20
+ windowId?: string | undefined;
21
+ appIdentifier?: string | number | undefined;
22
+ timeout?: number | undefined;
23
+ }>;
24
+ export declare const GetPointedElementSchema: z.ZodObject<{
25
+ windowId: z.ZodOptional<z.ZodString>;
26
+ appIdentifier: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
27
+ }, "strip", z.ZodTypeAny, {
28
+ windowId?: string | undefined;
29
+ appIdentifier?: string | number | undefined;
30
+ }, {
31
+ windowId?: string | undefined;
32
+ appIdentifier?: string | number | undefined;
33
+ }>;
34
+ export declare function selectElement(options: {
35
+ timeout?: number;
36
+ windowId?: string;
37
+ appIdentifier?: string | number;
38
+ }): Promise<ToolContent[]>;
39
+ export declare function getPointedElement(options: {
40
+ windowId?: string;
41
+ appIdentifier?: string | number;
42
+ }): Promise<ToolContent[]>;
@@ -0,0 +1,272 @@
1
+ /**
2
+ * Element picker module for MCP Server Tauri.
3
+ *
4
+ * Provides two tools:
5
+ * - selectElement: Agent-initiated picker overlay (user clicks element)
6
+ * - getPointedElement: Retrieve element user pointed at via Alt+Shift+Click
7
+ */
8
+ import { z } from 'zod';
9
+ import { executeInWebview, executeAsyncInWebview } from './webview-executor.js';
10
+ import { ensureSessionAndConnect } from './plugin-client.js';
11
+ import { SCRIPTS, buildScript } from './scripts/index.js';
12
+ import { WindowTargetSchema } from './webview-interactions.js';
13
+ import { getHtml2CanvasSource, HTML2CANVAS_SCRIPT_ID, } from './scripts/html2canvas-loader.js';
14
+ import { registerScript, isScriptRegistered } from './script-manager.js';
15
+ // ============================================================================
16
+ // Schemas
17
+ // ============================================================================
18
+ export const SelectElementSchema = WindowTargetSchema.extend({
19
+ timeout: z.number().min(5000).max(120000).optional().default(60000)
20
+ .describe('Timeout in ms for user to pick an element (5000-120000, default 60000)'),
21
+ });
22
+ export const GetPointedElementSchema = WindowTargetSchema.extend({});
23
+ // ============================================================================
24
+ // Helpers
25
+ // ============================================================================
26
+ /**
27
+ * Format element metadata into a readable text block.
28
+ */
29
+ function formatElementMetadata(meta) {
30
+ const lines = [];
31
+ lines.push(`## Element: <${meta.tag}>`);
32
+ if (meta.id) {
33
+ lines.push(`**ID:** ${meta.id}`);
34
+ }
35
+ if (meta.classes.length > 0) {
36
+ lines.push(`**Classes:** ${meta.classes.join(', ')}`);
37
+ }
38
+ lines.push(`**CSS Selector:** \`${meta.cssSelector}\``);
39
+ if (meta.xpath) {
40
+ lines.push(`**XPath:** \`${meta.xpath}\``);
41
+ }
42
+ // Bounding rect
43
+ const r = meta.boundingRect;
44
+ lines.push(`**Bounding Rect:** ${Math.round(r.width)}x${Math.round(r.height)} at (${Math.round(r.x)}, ${Math.round(r.y)})`);
45
+ // Attributes (skip id and class which are already shown)
46
+ const attrEntries = Object.entries(meta.attributes).filter(([k]) => { return k !== 'id' && k !== 'class'; });
47
+ if (attrEntries.length > 0) {
48
+ lines.push(`**Attributes:** ${attrEntries.map(([k, v]) => { return `${k}="${v}"`; }).join(', ')}`);
49
+ }
50
+ if (meta.textContent) {
51
+ const text = meta.textContent.length > 200
52
+ ? meta.textContent.substring(0, 200) + '...'
53
+ : meta.textContent;
54
+ lines.push(`**Text Content:** ${text}`);
55
+ }
56
+ // Computed styles (only non-default interesting ones)
57
+ const styleEntries = Object.entries(meta.computedStyles);
58
+ if (styleEntries.length > 0) {
59
+ lines.push('**Computed Styles:**');
60
+ for (const [prop, val] of styleEntries) {
61
+ lines.push(` ${prop}: ${val}`);
62
+ }
63
+ }
64
+ if (meta.parentChain.length > 0) {
65
+ lines.push('**Parent Chain:**');
66
+ for (const parent of meta.parentChain) {
67
+ let desc = ` <${parent.tag}>`;
68
+ if (parent.id) {
69
+ desc += `#${parent.id}`;
70
+ }
71
+ if (parent.classes && parent.classes.length > 0) {
72
+ desc += `.${parent.classes.join('.')}`;
73
+ }
74
+ if (parent.boundingRect) {
75
+ desc += ` (${Math.round(parent.boundingRect.width)}x${Math.round(parent.boundingRect.height)})`;
76
+ }
77
+ lines.push(desc);
78
+ }
79
+ }
80
+ return lines.join('\n');
81
+ }
82
+ /**
83
+ * Inject a script that removes all picker highlight elements from the DOM.
84
+ */
85
+ async function cleanupPickerHighlights(windowId, appIdentifier) {
86
+ const script = `(function() {
87
+ var els = document.querySelectorAll('[data-mcp-picker]');
88
+ for (var i = 0; i < els.length; i++) { els[i].parentNode.removeChild(els[i]); }
89
+ return 'Cleaned up ' + els.length + ' picker elements';
90
+ })()`;
91
+ try {
92
+ await executeInWebview(script, windowId, appIdentifier);
93
+ }
94
+ catch {
95
+ // Best effort cleanup
96
+ }
97
+ }
98
+ /**
99
+ * Capture a screenshot of a specific element using html2canvas.
100
+ * Returns the base64 data URL of the cropped element image, or null on failure.
101
+ */
102
+ async function captureElementScreenshot(cssSelector, windowId) {
103
+ // Ensure html2canvas is loaded in the webview
104
+ try {
105
+ const isRegistered = await isScriptRegistered(HTML2CANVAS_SCRIPT_ID);
106
+ if (!isRegistered) {
107
+ const source = getHtml2CanvasSource();
108
+ await registerScript(HTML2CANVAS_SCRIPT_ID, 'inline', source);
109
+ }
110
+ }
111
+ catch {
112
+ // Script manager unavailable — we'll inline the library in the capture script
113
+ }
114
+ const escapedSelector = cssSelector.replace(/\\/g, '\\\\').replace(/'/g, '\\\'');
115
+ // Build a script that captures just the element with html2canvas
116
+ const captureScript = `
117
+ const html2canvasFn = typeof html2canvas !== 'undefined' ? html2canvas :
118
+ (typeof window !== 'undefined' && window.html2canvas) ? window.html2canvas :
119
+ (typeof self !== 'undefined' && self.html2canvas) ? self.html2canvas :
120
+ (typeof globalThis !== 'undefined' && globalThis.html2canvas) ? globalThis.html2canvas : null;
121
+
122
+ if (!html2canvasFn) {
123
+ throw new Error('html2canvas not loaded');
124
+ }
125
+
126
+ const el = document.querySelector('${escapedSelector}');
127
+ if (!el) {
128
+ throw new Error('Element not found for screenshot');
129
+ }
130
+
131
+ const canvas = await html2canvasFn(el, {
132
+ backgroundColor: null,
133
+ scale: window.devicePixelRatio || 1,
134
+ logging: false,
135
+ useCORS: true,
136
+ allowTaint: false,
137
+ imageTimeout: 5000,
138
+ });
139
+
140
+ if (!canvas) {
141
+ throw new Error('html2canvas returned null canvas');
142
+ }
143
+
144
+ const dataUrl = canvas.toDataURL('image/png');
145
+ if (!dataUrl || !dataUrl.startsWith('data:image/')) {
146
+ throw new Error('Invalid data URL from canvas');
147
+ }
148
+
149
+ return dataUrl;
150
+ `;
151
+ try {
152
+ const dataUrl = await executeAsyncInWebview(captureScript, windowId, 10000);
153
+ if (!dataUrl || !dataUrl.startsWith('data:image/')) {
154
+ return null;
155
+ }
156
+ // Extract base64 data from data URL
157
+ const commaIndex = dataUrl.indexOf(',');
158
+ if (commaIndex === -1) {
159
+ return null;
160
+ }
161
+ return {
162
+ type: 'image',
163
+ data: dataUrl.substring(commaIndex + 1),
164
+ mimeType: 'image/png',
165
+ };
166
+ }
167
+ catch {
168
+ return null;
169
+ }
170
+ }
171
+ // ============================================================================
172
+ // selectElement - Agent-initiated picker
173
+ // ============================================================================
174
+ export async function selectElement(options) {
175
+ const { timeout = 60000, windowId, appIdentifier } = options;
176
+ const client = await ensureSessionAndConnect(appIdentifier);
177
+ // Generate unique picker ID
178
+ const pickerId = `picker_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
179
+ // Set up event listener FIRST (before injection to avoid race condition)
180
+ const eventPromise = new Promise((resolve, reject) => {
181
+ // eslint-disable-next-line prefer-const
182
+ let timeoutHandle;
183
+ const handler = (message) => {
184
+ if (message.type !== 'element_picked') {
185
+ return;
186
+ }
187
+ const payload = message.payload;
188
+ if (!payload || payload.pickerId !== pickerId) {
189
+ return;
190
+ }
191
+ clearTimeout(timeoutHandle);
192
+ client.removeListener('event', handler);
193
+ resolve(payload);
194
+ };
195
+ client.on('event', handler);
196
+ timeoutHandle = setTimeout(() => {
197
+ client.removeListener('event', handler);
198
+ // Clean up picker UI on timeout
199
+ cleanupPickerHighlights(windowId, appIdentifier);
200
+ reject(new Error(`Element picker timed out after ${timeout}ms. User did not select an element.`));
201
+ }, timeout);
202
+ });
203
+ // Inject picker overlay (this returns quickly within the 5s execute_js timeout)
204
+ const script = buildScript(SCRIPTS.elementPicker, { mode: 'pick', pickerId });
205
+ await executeInWebview(script, windowId, appIdentifier);
206
+ // Wait for user interaction
207
+ const result = await eventPromise;
208
+ // Handle cancellation
209
+ if (result.cancelled) {
210
+ return [{ type: 'text', text: 'Element picker was cancelled by the user.' }];
211
+ }
212
+ // Element was picked
213
+ const element = result.element;
214
+ if (!element) {
215
+ await cleanupPickerHighlights(windowId, appIdentifier);
216
+ return [{ type: 'text', text: 'Element picker returned no element data.' }];
217
+ }
218
+ // Clean up all picker UI BEFORE taking the screenshot
219
+ await cleanupPickerHighlights(windowId, appIdentifier);
220
+ const content = [];
221
+ // Add formatted metadata
222
+ content.push({ type: 'text', text: formatElementMetadata(element) });
223
+ // Capture element-only screenshot (no picker overlays visible)
224
+ const screenshot = await captureElementScreenshot(element.cssSelector, windowId);
225
+ if (screenshot) {
226
+ content.push(screenshot);
227
+ }
228
+ else {
229
+ content.push({ type: 'text', text: '(Element screenshot capture failed)' });
230
+ }
231
+ return content;
232
+ }
233
+ // ============================================================================
234
+ // getPointedElement - Retrieve user-pointed element
235
+ // ============================================================================
236
+ export async function getPointedElement(options) {
237
+ const { windowId, appIdentifier } = options;
238
+ // Read and clear the pointed element
239
+ const readScript = `(function() {
240
+ var data = window.__MCP_POINTED_ELEMENT__;
241
+ window.__MCP_POINTED_ELEMENT__ = null;
242
+ return data ? JSON.stringify(data) : null;
243
+ })()`;
244
+ const raw = await executeInWebview(readScript, windowId, appIdentifier);
245
+ if (!raw || raw === 'null' || raw === 'undefined') {
246
+ return [
247
+ {
248
+ type: 'text',
249
+ text: 'No element has been pointed. Use Alt+Shift+Click on an element in the Tauri app first.',
250
+ },
251
+ ];
252
+ }
253
+ let element;
254
+ try {
255
+ element = JSON.parse(raw);
256
+ }
257
+ catch {
258
+ return [{ type: 'text', text: `Failed to parse pointed element data: ${raw.substring(0, 200)}` }];
259
+ }
260
+ const content = [];
261
+ // Add formatted metadata
262
+ content.push({ type: 'text', text: formatElementMetadata(element) });
263
+ // Capture element-only screenshot (no overlays)
264
+ const screenshot = await captureElementScreenshot(element.cssSelector, windowId);
265
+ if (screenshot) {
266
+ content.push(screenshot);
267
+ }
268
+ else {
269
+ content.push({ type: 'text', text: '(Element screenshot capture failed)' });
270
+ }
271
+ return content;
272
+ }
@@ -0,0 +1,100 @@
1
+ import { EventEmitter } from 'events';
2
+ interface PluginCommand {
3
+ id?: string;
4
+ command: string;
5
+ args?: unknown;
6
+ }
7
+ export interface PluginResponse {
8
+ id?: string;
9
+ success: boolean;
10
+ data?: unknown;
11
+ error?: string;
12
+ windowContext?: {
13
+ windowLabel: string;
14
+ totalWindows: number;
15
+ warning?: string;
16
+ };
17
+ }
18
+ /**
19
+ * Client to communicate with the MCP Bridge plugin's WebSocket server
20
+ */
21
+ export declare class PluginClient extends EventEmitter {
22
+ private _ws;
23
+ private _url;
24
+ private _host;
25
+ private _port;
26
+ private _reconnectAttempts;
27
+ private _shouldReconnect;
28
+ private _reconnectDelay;
29
+ private _pendingRequests;
30
+ /**
31
+ * Constructor for PluginClient
32
+ * @param host Host address of the WebSocket server
33
+ * @param port Port number of the WebSocket server
34
+ */
35
+ constructor(host: string, port: number);
36
+ /**
37
+ * Creates a PluginClient with default configuration from environment.
38
+ */
39
+ static create_default(): PluginClient;
40
+ /**
41
+ * Gets the host this client is configured to connect to.
42
+ */
43
+ get host(): string;
44
+ /**
45
+ * Gets the port this client is configured to connect to.
46
+ */
47
+ get port(): number;
48
+ /**
49
+ * Connect to the plugin's WebSocket server
50
+ */
51
+ connect(): Promise<void>;
52
+ /**
53
+ * Disconnect from the plugin
54
+ */
55
+ disconnect(): void;
56
+ /**
57
+ * Send a command to the plugin and wait for response.
58
+ *
59
+ * Automatically retries on transient "not found" errors (e.g. window not
60
+ * yet registered after WebSocket connect) with exponential backoff.
61
+ */
62
+ sendCommand(command: PluginCommand, timeoutMs?: number): Promise<PluginResponse>;
63
+ /**
64
+ * Check if connected
65
+ */
66
+ isConnected(): boolean;
67
+ }
68
+ /**
69
+ * Gets the existing singleton PluginClient without creating or modifying it.
70
+ * Use this for status checks where you don't want to affect the current connection.
71
+ *
72
+ * @returns The existing PluginClient or null if none exists
73
+ */
74
+ export declare function getExistingPluginClient(): PluginClient | null;
75
+ /**
76
+ * Gets or creates a singleton PluginClient.
77
+ *
78
+ * If host/port are provided and differ from the existing client's configuration,
79
+ * the existing client is disconnected and a new one is created. This ensures
80
+ * that session start with a specific port always uses that port.
81
+ *
82
+ * @param host Optional host override
83
+ * @param port Optional port override
84
+ */
85
+ export declare function getPluginClient(host?: string, port?: number): PluginClient;
86
+ /**
87
+ * Resets the singleton client (useful for reconnecting with different config).
88
+ */
89
+ export declare function resetPluginClient(): void;
90
+ export declare function connectPlugin(host?: string, port?: number): Promise<void>;
91
+ /**
92
+ * Ensures a session is active and connects to the plugin using session config.
93
+ * This should be used by all tools that require a connected Tauri app.
94
+ *
95
+ * @param appIdentifier - Optional app identifier to target specific app
96
+ * @throws Error if no session is active
97
+ */
98
+ export declare function ensureSessionAndConnect(appIdentifier?: string | number): Promise<PluginClient>;
99
+ export declare function disconnectPlugin(): Promise<void>;
100
+ export {};