@hypothesi/tauri-mcp-server 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,7 @@ A **Model Context Protocol (MCP) server** that enables AI assistants like Claude
11
11
 
12
12
  | Category | Capabilities |
13
13
  |----------|-------------|
14
- | 🎯 **UI Automation** | Screenshots, clicks, typing, scrolling, element finding |
14
+ | 🎯 **UI Automation** | Screenshots, clicks, typing, scrolling, element finding, visual element picker |
15
15
  | 🔍 **IPC Monitoring** | Capture and inspect Tauri IPC calls in real-time |
16
16
  | 📱 **Mobile Dev** | List Android emulators & iOS simulators |
17
17
  | 📋 **Logs** | Stream console, Android logcat, iOS, and system logs |
@@ -86,7 +86,7 @@ await driver_session({ action: "stop", appIdentifier: 9223 })
86
86
  await driver_session({ action: "stop" })
87
87
  ```
88
88
 
89
- ## Available Tools (18 total)
89
+ ## Available Tools (20 total)
90
90
 
91
91
  ### Setup & Configuration
92
92
 
@@ -108,6 +108,8 @@ await driver_session({ action: "stop" })
108
108
  | `webview_get_styles` | Get computed CSS styles |
109
109
  | `webview_execute_js` | Execute JavaScript in webview |
110
110
  | `webview_dom_snapshot` | Get structured DOM snapshot (accessibility or structure) |
111
+ | `webview_select_element` | Visual element picker — user clicks an element, returns metadata + screenshot |
112
+ | `webview_get_pointed_element` | Get metadata for element user Alt+Shift+Clicked |
111
113
  | `manage_window` | List windows, get info, or resize |
112
114
 
113
115
  ### IPC & Plugin
@@ -0,0 +1,272 @@
1
+ /**
2
+ * Element picker module for MCP Server Tauri.
3
+ *
4
+ * Provides two tools:
5
+ * - selectElement: Agent-initiated picker overlay (user clicks element)
6
+ * - getPointedElement: Retrieve element user pointed at via Alt+Shift+Click
7
+ */
8
+ import { z } from 'zod';
9
+ import { executeInWebview, executeAsyncInWebview } from './webview-executor.js';
10
+ import { ensureSessionAndConnect } from './plugin-client.js';
11
+ import { SCRIPTS, buildScript } from './scripts/index.js';
12
+ import { WindowTargetSchema } from './webview-interactions.js';
13
+ import { getHtml2CanvasSource, HTML2CANVAS_SCRIPT_ID, } from './scripts/html2canvas-loader.js';
14
+ import { registerScript, isScriptRegistered } from './script-manager.js';
15
+ // ============================================================================
16
+ // Schemas
17
+ // ============================================================================
18
+ export const SelectElementSchema = WindowTargetSchema.extend({
19
+ timeout: z.number().min(5000).max(120000).optional().default(60000)
20
+ .describe('Timeout in ms for user to pick an element (5000-120000, default 60000)'),
21
+ });
22
+ export const GetPointedElementSchema = WindowTargetSchema.extend({});
23
+ // ============================================================================
24
+ // Helpers
25
+ // ============================================================================
26
+ /**
27
+ * Format element metadata into a readable text block.
28
+ */
29
+ function formatElementMetadata(meta) {
30
+ const lines = [];
31
+ lines.push(`## Element: <${meta.tag}>`);
32
+ if (meta.id) {
33
+ lines.push(`**ID:** ${meta.id}`);
34
+ }
35
+ if (meta.classes.length > 0) {
36
+ lines.push(`**Classes:** ${meta.classes.join(', ')}`);
37
+ }
38
+ lines.push(`**CSS Selector:** \`${meta.cssSelector}\``);
39
+ if (meta.xpath) {
40
+ lines.push(`**XPath:** \`${meta.xpath}\``);
41
+ }
42
+ // Bounding rect
43
+ const r = meta.boundingRect;
44
+ lines.push(`**Bounding Rect:** ${Math.round(r.width)}x${Math.round(r.height)} at (${Math.round(r.x)}, ${Math.round(r.y)})`);
45
+ // Attributes (skip id and class which are already shown)
46
+ const attrEntries = Object.entries(meta.attributes).filter(([k]) => { return k !== 'id' && k !== 'class'; });
47
+ if (attrEntries.length > 0) {
48
+ lines.push(`**Attributes:** ${attrEntries.map(([k, v]) => { return `${k}="${v}"`; }).join(', ')}`);
49
+ }
50
+ if (meta.textContent) {
51
+ const text = meta.textContent.length > 200
52
+ ? meta.textContent.substring(0, 200) + '...'
53
+ : meta.textContent;
54
+ lines.push(`**Text Content:** ${text}`);
55
+ }
56
+ // Computed styles (only non-default interesting ones)
57
+ const styleEntries = Object.entries(meta.computedStyles);
58
+ if (styleEntries.length > 0) {
59
+ lines.push('**Computed Styles:**');
60
+ for (const [prop, val] of styleEntries) {
61
+ lines.push(` ${prop}: ${val}`);
62
+ }
63
+ }
64
+ if (meta.parentChain.length > 0) {
65
+ lines.push('**Parent Chain:**');
66
+ for (const parent of meta.parentChain) {
67
+ let desc = ` <${parent.tag}>`;
68
+ if (parent.id) {
69
+ desc += `#${parent.id}`;
70
+ }
71
+ if (parent.classes && parent.classes.length > 0) {
72
+ desc += `.${parent.classes.join('.')}`;
73
+ }
74
+ if (parent.boundingRect) {
75
+ desc += ` (${Math.round(parent.boundingRect.width)}x${Math.round(parent.boundingRect.height)})`;
76
+ }
77
+ lines.push(desc);
78
+ }
79
+ }
80
+ return lines.join('\n');
81
+ }
82
+ /**
83
+ * Inject a script that removes all picker highlight elements from the DOM.
84
+ */
85
+ async function cleanupPickerHighlights(windowId, appIdentifier) {
86
+ const script = `(function() {
87
+ var els = document.querySelectorAll('[data-mcp-picker]');
88
+ for (var i = 0; i < els.length; i++) { els[i].parentNode.removeChild(els[i]); }
89
+ return 'Cleaned up ' + els.length + ' picker elements';
90
+ })()`;
91
+ try {
92
+ await executeInWebview(script, windowId, appIdentifier);
93
+ }
94
+ catch {
95
+ // Best effort cleanup
96
+ }
97
+ }
98
+ /**
99
+ * Capture a screenshot of a specific element using html2canvas.
100
+ * Returns the base64 data URL of the cropped element image, or null on failure.
101
+ */
102
+ async function captureElementScreenshot(cssSelector, windowId) {
103
+ // Ensure html2canvas is loaded in the webview
104
+ try {
105
+ const isRegistered = await isScriptRegistered(HTML2CANVAS_SCRIPT_ID);
106
+ if (!isRegistered) {
107
+ const source = getHtml2CanvasSource();
108
+ await registerScript(HTML2CANVAS_SCRIPT_ID, 'inline', source);
109
+ }
110
+ }
111
+ catch {
112
+ // Script manager unavailable — we'll inline the library in the capture script
113
+ }
114
+ const escapedSelector = cssSelector.replace(/\\/g, '\\\\').replace(/'/g, '\\\'');
115
+ // Build a script that captures just the element with html2canvas
116
+ const captureScript = `
117
+ const html2canvasFn = typeof html2canvas !== 'undefined' ? html2canvas :
118
+ (typeof window !== 'undefined' && window.html2canvas) ? window.html2canvas :
119
+ (typeof self !== 'undefined' && self.html2canvas) ? self.html2canvas :
120
+ (typeof globalThis !== 'undefined' && globalThis.html2canvas) ? globalThis.html2canvas : null;
121
+
122
+ if (!html2canvasFn) {
123
+ throw new Error('html2canvas not loaded');
124
+ }
125
+
126
+ const el = document.querySelector('${escapedSelector}');
127
+ if (!el) {
128
+ throw new Error('Element not found for screenshot');
129
+ }
130
+
131
+ const canvas = await html2canvasFn(el, {
132
+ backgroundColor: null,
133
+ scale: window.devicePixelRatio || 1,
134
+ logging: false,
135
+ useCORS: true,
136
+ allowTaint: false,
137
+ imageTimeout: 5000,
138
+ });
139
+
140
+ if (!canvas) {
141
+ throw new Error('html2canvas returned null canvas');
142
+ }
143
+
144
+ const dataUrl = canvas.toDataURL('image/png');
145
+ if (!dataUrl || !dataUrl.startsWith('data:image/')) {
146
+ throw new Error('Invalid data URL from canvas');
147
+ }
148
+
149
+ return dataUrl;
150
+ `;
151
+ try {
152
+ const dataUrl = await executeAsyncInWebview(captureScript, windowId, 10000);
153
+ if (!dataUrl || !dataUrl.startsWith('data:image/')) {
154
+ return null;
155
+ }
156
+ // Extract base64 data from data URL
157
+ const commaIndex = dataUrl.indexOf(',');
158
+ if (commaIndex === -1) {
159
+ return null;
160
+ }
161
+ return {
162
+ type: 'image',
163
+ data: dataUrl.substring(commaIndex + 1),
164
+ mimeType: 'image/png',
165
+ };
166
+ }
167
+ catch {
168
+ return null;
169
+ }
170
+ }
171
+ // ============================================================================
172
+ // selectElement - Agent-initiated picker
173
+ // ============================================================================
174
+ export async function selectElement(options) {
175
+ const { timeout = 60000, windowId, appIdentifier } = options;
176
+ const client = await ensureSessionAndConnect(appIdentifier);
177
+ // Generate unique picker ID
178
+ const pickerId = `picker_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
179
+ // Set up event listener FIRST (before injection to avoid race condition)
180
+ const eventPromise = new Promise((resolve, reject) => {
181
+ // eslint-disable-next-line prefer-const
182
+ let timeoutHandle;
183
+ const handler = (message) => {
184
+ if (message.type !== 'element_picked') {
185
+ return;
186
+ }
187
+ const payload = message.payload;
188
+ if (!payload || payload.pickerId !== pickerId) {
189
+ return;
190
+ }
191
+ clearTimeout(timeoutHandle);
192
+ client.removeListener('event', handler);
193
+ resolve(payload);
194
+ };
195
+ client.on('event', handler);
196
+ timeoutHandle = setTimeout(() => {
197
+ client.removeListener('event', handler);
198
+ // Clean up picker UI on timeout
199
+ cleanupPickerHighlights(windowId, appIdentifier);
200
+ reject(new Error(`Element picker timed out after ${timeout}ms. User did not select an element.`));
201
+ }, timeout);
202
+ });
203
+ // Inject picker overlay (this returns quickly within the 5s execute_js timeout)
204
+ const script = buildScript(SCRIPTS.elementPicker, { mode: 'pick', pickerId });
205
+ await executeInWebview(script, windowId, appIdentifier);
206
+ // Wait for user interaction
207
+ const result = await eventPromise;
208
+ // Handle cancellation
209
+ if (result.cancelled) {
210
+ return [{ type: 'text', text: 'Element picker was cancelled by the user.' }];
211
+ }
212
+ // Element was picked
213
+ const element = result.element;
214
+ if (!element) {
215
+ await cleanupPickerHighlights(windowId, appIdentifier);
216
+ return [{ type: 'text', text: 'Element picker returned no element data.' }];
217
+ }
218
+ // Clean up all picker UI BEFORE taking the screenshot
219
+ await cleanupPickerHighlights(windowId, appIdentifier);
220
+ const content = [];
221
+ // Add formatted metadata
222
+ content.push({ type: 'text', text: formatElementMetadata(element) });
223
+ // Capture element-only screenshot (no picker overlays visible)
224
+ const screenshot = await captureElementScreenshot(element.cssSelector, windowId);
225
+ if (screenshot) {
226
+ content.push(screenshot);
227
+ }
228
+ else {
229
+ content.push({ type: 'text', text: '(Element screenshot capture failed)' });
230
+ }
231
+ return content;
232
+ }
233
+ // ============================================================================
234
+ // getPointedElement - Retrieve user-pointed element
235
+ // ============================================================================
236
+ export async function getPointedElement(options) {
237
+ const { windowId, appIdentifier } = options;
238
+ // Read and clear the pointed element
239
+ const readScript = `(function() {
240
+ var data = window.__MCP_POINTED_ELEMENT__;
241
+ window.__MCP_POINTED_ELEMENT__ = null;
242
+ return data ? JSON.stringify(data) : null;
243
+ })()`;
244
+ const raw = await executeInWebview(readScript, windowId, appIdentifier);
245
+ if (!raw || raw === 'null' || raw === 'undefined') {
246
+ return [
247
+ {
248
+ type: 'text',
249
+ text: 'No element has been pointed. Use Alt+Shift+Click on an element in the Tauri app first.',
250
+ },
251
+ ];
252
+ }
253
+ let element;
254
+ try {
255
+ element = JSON.parse(raw);
256
+ }
257
+ catch {
258
+ return [{ type: 'text', text: `Failed to parse pointed element data: ${raw.substring(0, 200)}` }];
259
+ }
260
+ const content = [];
261
+ // Add formatted metadata
262
+ content.push({ type: 'text', text: formatElementMetadata(element) });
263
+ // Capture element-only screenshot (no overlays)
264
+ const screenshot = await captureElementScreenshot(element.cssSelector, windowId);
265
+ if (screenshot) {
266
+ content.push(screenshot);
267
+ }
268
+ else {
269
+ content.push({ type: 'text', text: '(Element screenshot capture failed)' });
270
+ }
271
+ return content;
272
+ }
@@ -18,12 +18,13 @@
18
18
  *
19
19
  * @param {Object} params
20
20
  * @param {string} params.type - Snapshot type ('accessibility' or 'structure')
21
- * @param {string|null} params.selector - Optional CSS selector to scope snapshot
21
+ * @param {string|null} params.selector - Optional selector to scope snapshot (CSS, XPath, text, or ref ID)
22
+ * @param {string} params.strategy - Selector strategy: 'css', 'xpath', or 'text'
22
23
  */
23
24
  (function(params) {
24
25
  'use strict';
25
26
 
26
- const { type, selector } = params;
27
+ const { type, selector, strategy } = params;
27
28
 
28
29
  // ARIA states to include in snapshot (used by accessibility type)
29
30
  const ARIA_STATES = [
@@ -445,18 +446,17 @@
445
446
 
446
447
  if (selector) {
447
448
  try {
448
- document.querySelector(selector);
449
+ var structureElements = window.__MCP__.resolveAll(selector, strategy);
449
450
  } catch (e) {
450
- return 'Error: Invalid CSS selector "' + selector + '": ' + e.message;
451
+ return 'Error: Invalid selector "' + selector + '" (strategy: ' + strategy + '): ' + e.message;
451
452
  }
452
453
 
453
- var structureElements = document.querySelectorAll(selector);
454
454
  if (structureElements.length === 0) {
455
- return 'Error: No elements found matching selector "' + selector + '"';
455
+ return 'Error: No elements found matching selector "' + selector + '" (strategy: ' + strategy + ')';
456
456
  }
457
457
 
458
- structureRoots = Array.from(structureElements);
459
- structureScopeInfo = '# Scoped to: ' + selector + '\n';
458
+ structureRoots = structureElements;
459
+ structureScopeInfo = '# Scoped to: ' + selector + (strategy !== 'css' ? ' (strategy: ' + strategy + ')' : '') + '\n';
460
460
  if (structureRoots.length > 1) structureScopeInfo += '# ' + structureRoots.length + ' elements matched\n';
461
461
  } else {
462
462
  structureRoots = [document.body];
@@ -498,18 +498,17 @@
498
498
 
499
499
  if (selector) {
500
500
  try {
501
- document.querySelector(selector);
501
+ var elements = window.__MCP__.resolveAll(selector, strategy);
502
502
  } catch (e) {
503
- return 'Error: Invalid CSS selector "' + selector + '": ' + e.message;
503
+ return 'Error: Invalid selector "' + selector + '" (strategy: ' + strategy + '): ' + e.message;
504
504
  }
505
505
 
506
- var elements = document.querySelectorAll(selector);
507
506
  if (elements.length === 0) {
508
- return 'Error: No elements found matching selector "' + selector + '"';
507
+ return 'Error: No elements found matching selector "' + selector + '" (strategy: ' + strategy + ')';
509
508
  }
510
509
 
511
- roots = Array.from(elements);
512
- scopeInfo = '# Scoped to: ' + selector + '\n';
510
+ roots = elements;
511
+ scopeInfo = '# Scoped to: ' + selector + (strategy !== 'css' ? ' (strategy: ' + strategy + ')' : '') + '\n';
513
512
  if (roots.length > 1) scopeInfo += '# ' + roots.length + ' elements matched\n';
514
513
  } else {
515
514
  roots = [document.body];