@hypothesi/tauri-mcp-server 0.8.3 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -11,7 +11,7 @@ A **Model Context Protocol (MCP) server** that enables AI assistants like Claude
11
11
 
12
12
  | Category | Capabilities |
13
13
  |----------|-------------|
14
- | 🎯 **UI Automation** | Screenshots, clicks, typing, scrolling, element finding |
14
+ | 🎯 **UI Automation** | Screenshots, clicks, typing, scrolling, element finding, visual element picker |
15
15
  | 🔍 **IPC Monitoring** | Capture and inspect Tauri IPC calls in real-time |
16
16
  | 📱 **Mobile Dev** | List Android emulators & iOS simulators |
17
17
  | 📋 **Logs** | Stream console, Android logcat, iOS, and system logs |
@@ -86,7 +86,7 @@ await driver_session({ action: "stop", appIdentifier: 9223 })
86
86
  await driver_session({ action: "stop" })
87
87
  ```
88
88
 
89
- ## Available Tools (18 total)
89
+ ## Available Tools (20 total)
90
90
 
91
91
  ### Setup & Configuration
92
92
 
@@ -108,6 +108,8 @@ await driver_session({ action: "stop" })
108
108
  | `webview_get_styles` | Get computed CSS styles |
109
109
  | `webview_execute_js` | Execute JavaScript in webview |
110
110
  | `webview_dom_snapshot` | Get structured DOM snapshot (accessibility or structure) |
111
+ | `webview_select_element` | Visual element picker — user clicks an element, returns metadata + screenshot |
112
+ | `webview_get_pointed_element` | Get metadata for element user Alt+Shift+Clicked |
111
113
  | `manage_window` | List windows, get info, or resize |
112
114
 
113
115
  ### IPC & Plugin
@@ -0,0 +1,272 @@
1
+ /**
2
+ * Element picker module for MCP Server Tauri.
3
+ *
4
+ * Provides two tools:
5
+ * - selectElement: Agent-initiated picker overlay (user clicks element)
6
+ * - getPointedElement: Retrieve element user pointed at via Alt+Shift+Click
7
+ */
8
+ import { z } from 'zod';
9
+ import { executeInWebview, executeAsyncInWebview } from './webview-executor.js';
10
+ import { ensureSessionAndConnect } from './plugin-client.js';
11
+ import { SCRIPTS, buildScript } from './scripts/index.js';
12
+ import { WindowTargetSchema } from './webview-interactions.js';
13
+ import { getHtml2CanvasSource, HTML2CANVAS_SCRIPT_ID, } from './scripts/html2canvas-loader.js';
14
+ import { registerScript, isScriptRegistered } from './script-manager.js';
15
+ // ============================================================================
16
+ // Schemas
17
+ // ============================================================================
18
+ export const SelectElementSchema = WindowTargetSchema.extend({
19
+ timeout: z.number().min(5000).max(120000).optional().default(60000)
20
+ .describe('Timeout in ms for user to pick an element (5000-120000, default 60000)'),
21
+ });
22
+ export const GetPointedElementSchema = WindowTargetSchema.extend({});
23
+ // ============================================================================
24
+ // Helpers
25
+ // ============================================================================
26
+ /**
27
+ * Format element metadata into a readable text block.
28
+ */
29
+ function formatElementMetadata(meta) {
30
+ const lines = [];
31
+ lines.push(`## Element: <${meta.tag}>`);
32
+ if (meta.id) {
33
+ lines.push(`**ID:** ${meta.id}`);
34
+ }
35
+ if (meta.classes.length > 0) {
36
+ lines.push(`**Classes:** ${meta.classes.join(', ')}`);
37
+ }
38
+ lines.push(`**CSS Selector:** \`${meta.cssSelector}\``);
39
+ if (meta.xpath) {
40
+ lines.push(`**XPath:** \`${meta.xpath}\``);
41
+ }
42
+ // Bounding rect
43
+ const r = meta.boundingRect;
44
+ lines.push(`**Bounding Rect:** ${Math.round(r.width)}x${Math.round(r.height)} at (${Math.round(r.x)}, ${Math.round(r.y)})`);
45
+ // Attributes (skip id and class which are already shown)
46
+ const attrEntries = Object.entries(meta.attributes).filter(([k]) => { return k !== 'id' && k !== 'class'; });
47
+ if (attrEntries.length > 0) {
48
+ lines.push(`**Attributes:** ${attrEntries.map(([k, v]) => { return `${k}="${v}"`; }).join(', ')}`);
49
+ }
50
+ if (meta.textContent) {
51
+ const text = meta.textContent.length > 200
52
+ ? meta.textContent.substring(0, 200) + '...'
53
+ : meta.textContent;
54
+ lines.push(`**Text Content:** ${text}`);
55
+ }
56
+ // Computed styles (only non-default interesting ones)
57
+ const styleEntries = Object.entries(meta.computedStyles);
58
+ if (styleEntries.length > 0) {
59
+ lines.push('**Computed Styles:**');
60
+ for (const [prop, val] of styleEntries) {
61
+ lines.push(` ${prop}: ${val}`);
62
+ }
63
+ }
64
+ if (meta.parentChain.length > 0) {
65
+ lines.push('**Parent Chain:**');
66
+ for (const parent of meta.parentChain) {
67
+ let desc = ` <${parent.tag}>`;
68
+ if (parent.id) {
69
+ desc += `#${parent.id}`;
70
+ }
71
+ if (parent.classes && parent.classes.length > 0) {
72
+ desc += `.${parent.classes.join('.')}`;
73
+ }
74
+ if (parent.boundingRect) {
75
+ desc += ` (${Math.round(parent.boundingRect.width)}x${Math.round(parent.boundingRect.height)})`;
76
+ }
77
+ lines.push(desc);
78
+ }
79
+ }
80
+ return lines.join('\n');
81
+ }
82
+ /**
83
+ * Inject a script that removes all picker highlight elements from the DOM.
84
+ */
85
+ async function cleanupPickerHighlights(windowId, appIdentifier) {
86
+ const script = `(function() {
87
+ var els = document.querySelectorAll('[data-mcp-picker]');
88
+ for (var i = 0; i < els.length; i++) { els[i].parentNode.removeChild(els[i]); }
89
+ return 'Cleaned up ' + els.length + ' picker elements';
90
+ })()`;
91
+ try {
92
+ await executeInWebview(script, windowId, appIdentifier);
93
+ }
94
+ catch {
95
+ // Best effort cleanup
96
+ }
97
+ }
98
+ /**
99
+ * Capture a screenshot of a specific element using html2canvas.
100
+ * Returns the base64 data URL of the cropped element image, or null on failure.
101
+ */
102
+ async function captureElementScreenshot(cssSelector, windowId) {
103
+ // Ensure html2canvas is loaded in the webview
104
+ try {
105
+ const isRegistered = await isScriptRegistered(HTML2CANVAS_SCRIPT_ID);
106
+ if (!isRegistered) {
107
+ const source = getHtml2CanvasSource();
108
+ await registerScript(HTML2CANVAS_SCRIPT_ID, 'inline', source);
109
+ }
110
+ }
111
+ catch {
112
+ // Script manager unavailable — we'll inline the library in the capture script
113
+ }
114
+ const escapedSelector = cssSelector.replace(/\\/g, '\\\\').replace(/'/g, '\\\'');
115
+ // Build a script that captures just the element with html2canvas
116
+ const captureScript = `
117
+ const html2canvasFn = typeof html2canvas !== 'undefined' ? html2canvas :
118
+ (typeof window !== 'undefined' && window.html2canvas) ? window.html2canvas :
119
+ (typeof self !== 'undefined' && self.html2canvas) ? self.html2canvas :
120
+ (typeof globalThis !== 'undefined' && globalThis.html2canvas) ? globalThis.html2canvas : null;
121
+
122
+ if (!html2canvasFn) {
123
+ throw new Error('html2canvas not loaded');
124
+ }
125
+
126
+ const el = document.querySelector('${escapedSelector}');
127
+ if (!el) {
128
+ throw new Error('Element not found for screenshot');
129
+ }
130
+
131
+ const canvas = await html2canvasFn(el, {
132
+ backgroundColor: null,
133
+ scale: window.devicePixelRatio || 1,
134
+ logging: false,
135
+ useCORS: true,
136
+ allowTaint: false,
137
+ imageTimeout: 5000,
138
+ });
139
+
140
+ if (!canvas) {
141
+ throw new Error('html2canvas returned null canvas');
142
+ }
143
+
144
+ const dataUrl = canvas.toDataURL('image/png');
145
+ if (!dataUrl || !dataUrl.startsWith('data:image/')) {
146
+ throw new Error('Invalid data URL from canvas');
147
+ }
148
+
149
+ return dataUrl;
150
+ `;
151
+ try {
152
+ const dataUrl = await executeAsyncInWebview(captureScript, windowId, 10000);
153
+ if (!dataUrl || !dataUrl.startsWith('data:image/')) {
154
+ return null;
155
+ }
156
+ // Extract base64 data from data URL
157
+ const commaIndex = dataUrl.indexOf(',');
158
+ if (commaIndex === -1) {
159
+ return null;
160
+ }
161
+ return {
162
+ type: 'image',
163
+ data: dataUrl.substring(commaIndex + 1),
164
+ mimeType: 'image/png',
165
+ };
166
+ }
167
+ catch {
168
+ return null;
169
+ }
170
+ }
171
+ // ============================================================================
172
+ // selectElement - Agent-initiated picker
173
+ // ============================================================================
174
+ export async function selectElement(options) {
175
+ const { timeout = 60000, windowId, appIdentifier } = options;
176
+ const client = await ensureSessionAndConnect(appIdentifier);
177
+ // Generate unique picker ID
178
+ const pickerId = `picker_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
179
+ // Set up event listener FIRST (before injection to avoid race condition)
180
+ const eventPromise = new Promise((resolve, reject) => {
181
+ // eslint-disable-next-line prefer-const
182
+ let timeoutHandle;
183
+ const handler = (message) => {
184
+ if (message.type !== 'element_picked') {
185
+ return;
186
+ }
187
+ const payload = message.payload;
188
+ if (!payload || payload.pickerId !== pickerId) {
189
+ return;
190
+ }
191
+ clearTimeout(timeoutHandle);
192
+ client.removeListener('event', handler);
193
+ resolve(payload);
194
+ };
195
+ client.on('event', handler);
196
+ timeoutHandle = setTimeout(() => {
197
+ client.removeListener('event', handler);
198
+ // Clean up picker UI on timeout
199
+ cleanupPickerHighlights(windowId, appIdentifier);
200
+ reject(new Error(`Element picker timed out after ${timeout}ms. User did not select an element.`));
201
+ }, timeout);
202
+ });
203
+ // Inject picker overlay (this returns quickly within the 5s execute_js timeout)
204
+ const script = buildScript(SCRIPTS.elementPicker, { mode: 'pick', pickerId });
205
+ await executeInWebview(script, windowId, appIdentifier);
206
+ // Wait for user interaction
207
+ const result = await eventPromise;
208
+ // Handle cancellation
209
+ if (result.cancelled) {
210
+ return [{ type: 'text', text: 'Element picker was cancelled by the user.' }];
211
+ }
212
+ // Element was picked
213
+ const element = result.element;
214
+ if (!element) {
215
+ await cleanupPickerHighlights(windowId, appIdentifier);
216
+ return [{ type: 'text', text: 'Element picker returned no element data.' }];
217
+ }
218
+ // Clean up all picker UI BEFORE taking the screenshot
219
+ await cleanupPickerHighlights(windowId, appIdentifier);
220
+ const content = [];
221
+ // Add formatted metadata
222
+ content.push({ type: 'text', text: formatElementMetadata(element) });
223
+ // Capture element-only screenshot (no picker overlays visible)
224
+ const screenshot = await captureElementScreenshot(element.cssSelector, windowId);
225
+ if (screenshot) {
226
+ content.push(screenshot);
227
+ }
228
+ else {
229
+ content.push({ type: 'text', text: '(Element screenshot capture failed)' });
230
+ }
231
+ return content;
232
+ }
233
+ // ============================================================================
234
+ // getPointedElement - Retrieve user-pointed element
235
+ // ============================================================================
236
+ export async function getPointedElement(options) {
237
+ const { windowId, appIdentifier } = options;
238
+ // Read and clear the pointed element
239
+ const readScript = `(function() {
240
+ var data = window.__MCP_POINTED_ELEMENT__;
241
+ window.__MCP_POINTED_ELEMENT__ = null;
242
+ return data ? JSON.stringify(data) : null;
243
+ })()`;
244
+ const raw = await executeInWebview(readScript, windowId, appIdentifier);
245
+ if (!raw || raw === 'null' || raw === 'undefined') {
246
+ return [
247
+ {
248
+ type: 'text',
249
+ text: 'No element has been pointed. Use Alt+Shift+Click on an element in the Tauri app first.',
250
+ },
251
+ ];
252
+ }
253
+ let element;
254
+ try {
255
+ element = JSON.parse(raw);
256
+ }
257
+ catch {
258
+ return [{ type: 'text', text: `Failed to parse pointed element data: ${raw.substring(0, 200)}` }];
259
+ }
260
+ const content = [];
261
+ // Add formatted metadata
262
+ content.push({ type: 'text', text: formatElementMetadata(element) });
263
+ // Capture element-only screenshot (no overlays)
264
+ const screenshot = await captureElementScreenshot(element.cssSelector, windowId);
265
+ if (screenshot) {
266
+ content.push(screenshot);
267
+ }
268
+ else {
269
+ content.push({ type: 'text', text: '(Element screenshot capture failed)' });
270
+ }
271
+ return content;
272
+ }
@@ -0,0 +1,395 @@
1
+ /**
2
+ * Element picker overlay for MCP Server Tauri.
3
+ *
4
+ * Activated by the agent via webview_select_element. Displays a hover highlight,
5
+ * context tooltip, and cancel bar. On click (desktop) or two-tap (mobile) the
6
+ * selected element's metadata is emitted as a Tauri event so the MCP server can
7
+ * retrieve it asynchronously.
8
+ *
9
+ * @param {Object} params
10
+ * @param {string} params.mode - 'pick' (agent-initiated picker)
11
+ * @param {string} params.pickerId - Unique identifier for this picker session
12
+ */
13
+ (function(params) {
14
+ var mode = params.mode;
15
+ var pickerId = params.pickerId;
16
+
17
+ // Duplicate-activation guard
18
+ if (window.__MCP_PICKER_ACTIVE__) {
19
+ // Cancel the previous picker
20
+ var prevId = window.__MCP_PICKER_ACTIVE__;
21
+ cleanup();
22
+ if (window.__TAURI__ && window.__TAURI__.event && window.__TAURI__.event.emit) {
23
+ window.__TAURI__.event.emit('__element_picked', { pickerId: prevId, cancelled: true });
24
+ }
25
+ }
26
+ window.__MCP_PICKER_ACTIVE__ = pickerId;
27
+
28
+ var isTouch = ('ontouchstart' in window) || (navigator.maxTouchPoints > 0);
29
+ var highlight = null;
30
+ var tooltip = null;
31
+ var cancelBar = null;
32
+ var trackedElement = null; // element currently being highlighted
33
+ var rafId = null; // requestAnimationFrame handle for cyclic repositioning
34
+
35
+ // ── Velocity-based hover throttling state ──────────────────────────────
36
+ var lastMouseX = 0;
37
+ var lastMouseY = 0;
38
+ var lastMouseTime = 0;
39
+ var mouseVelocity = 0;
40
+ var hoverUpdateTimer = null;
41
+
42
+ // ── Cancel bar ─────────────────────────────────────────────────────────
43
+ cancelBar = document.createElement('div');
44
+ cancelBar.setAttribute('data-mcp-picker', 'cancel-bar');
45
+ var cancelBarAtTop = true;
46
+ cancelBar.style.cssText =
47
+ 'position:fixed;top:0;left:0;right:0;z-index:2147483647;height:40px;' +
48
+ 'background:rgba(30,41,59,0.95);display:flex;align-items:center;' +
49
+ 'justify-content:space-between;padding:0 12px;font:13px/1 system-ui,sans-serif;' +
50
+ 'color:#E2E8F0;box-sizing:border-box;' +
51
+ 'transition:top 0.2s ease,bottom 0.2s ease;';
52
+
53
+ var cancelText = document.createElement('span');
54
+ cancelText.textContent = 'MCP Element Picker \u2014 Click to select | ESC or tap X to cancel';
55
+ cancelText.style.cssText = 'overflow:hidden;text-overflow:ellipsis;white-space:nowrap;flex:1;';
56
+
57
+ // Move button — toggles bar between top and bottom
58
+ var moveBtn = document.createElement('button');
59
+ moveBtn.setAttribute('data-mcp-picker', 'move-btn');
60
+ moveBtn.textContent = '\u2193'; // ↓
61
+ moveBtn.title = 'Move bar to bottom';
62
+ moveBtn.style.cssText =
63
+ 'background:none;border:none;color:#94A3B8;font-size:16px;cursor:pointer;' +
64
+ 'width:32px;height:40px;display:flex;align-items:center;justify-content:center;' +
65
+ 'flex-shrink:0;';
66
+ function toggleCancelBarPosition(e) {
67
+ e.stopPropagation();
68
+ e.preventDefault();
69
+ cancelBarAtTop = !cancelBarAtTop;
70
+ cancelBar.style.top = cancelBarAtTop ? '0' : 'auto';
71
+ cancelBar.style.bottom = cancelBarAtTop ? 'auto' : '0';
72
+ moveBtn.textContent = cancelBarAtTop ? '\u2193' : '\u2191'; // ↓ or ↑
73
+ moveBtn.title = cancelBarAtTop ? 'Move bar to bottom' : 'Move bar to top';
74
+ }
75
+ moveBtn.addEventListener('click', toggleCancelBarPosition);
76
+ moveBtn.addEventListener('touchend', toggleCancelBarPosition);
77
+
78
+ var cancelBtn = document.createElement('button');
79
+ cancelBtn.setAttribute('data-mcp-picker', 'cancel-btn');
80
+ cancelBtn.textContent = '\u2715';
81
+ cancelBtn.style.cssText =
82
+ 'background:none;border:none;color:#E2E8F0;font-size:20px;cursor:pointer;' +
83
+ 'width:40px;height:40px;display:flex;align-items:center;justify-content:center;' +
84
+ 'flex-shrink:0;';
85
+ cancelBtn.addEventListener('click', function(e) {
86
+ e.stopPropagation();
87
+ cancelPicker();
88
+ });
89
+ cancelBtn.addEventListener('touchend', function(e) {
90
+ e.stopPropagation();
91
+ e.preventDefault();
92
+ cancelPicker();
93
+ });
94
+
95
+ cancelBar.appendChild(cancelText);
96
+ cancelBar.appendChild(moveBtn);
97
+ cancelBar.appendChild(cancelBtn);
98
+ document.body.appendChild(cancelBar);
99
+
100
+ // ── Highlight helpers ──────────────────────────────────────────────────
101
+ function createHighlight() {
102
+ var el = document.createElement('div');
103
+ el.setAttribute('data-mcp-picker', 'highlight');
104
+ el.style.cssText =
105
+ 'position:fixed;z-index:2147483645;pointer-events:none;' +
106
+ 'background:rgba(59,130,246,0.15);border:2px solid #3B82F6;';
107
+ document.body.appendChild(el);
108
+ return el;
109
+ }
110
+
111
+ function positionHighlight(el, target) {
112
+ var rect = target.getBoundingClientRect();
113
+ el.style.top = rect.top + 'px';
114
+ el.style.left = rect.left + 'px';
115
+ el.style.width = rect.width + 'px';
116
+ el.style.height = rect.height + 'px';
117
+ }
118
+
119
+ // ── Cyclic highlight repositioning via rAF ─────────────────────────────
120
+ // Keeps the highlight tracking animated or repositioned elements at ~60fps.
121
+ function startTracking() {
122
+ if (rafId) return;
123
+ function tick() {
124
+ if (trackedElement && highlight) {
125
+ positionHighlight(highlight, trackedElement);
126
+ }
127
+ rafId = requestAnimationFrame(tick);
128
+ }
129
+ rafId = requestAnimationFrame(tick);
130
+ }
131
+
132
+ function stopTracking() {
133
+ if (rafId) {
134
+ cancelAnimationFrame(rafId);
135
+ rafId = null;
136
+ }
137
+ }
138
+
139
+ // ── Tooltip helpers ────────────────────────────────────────────────────
140
+ function showTooltip(target) {
141
+ if (!tooltip) {
142
+ tooltip = document.createElement('div');
143
+ tooltip.setAttribute('data-mcp-picker', 'tooltip');
144
+ tooltip.style.cssText =
145
+ 'position:fixed;z-index:2147483646;pointer-events:none;' +
146
+ 'background:#1E293B;color:#E2E8F0;font:12px/1.4 monospace;' +
147
+ 'padding:4px 8px;border-radius:4px;white-space:nowrap;max-width:300px;' +
148
+ 'overflow:hidden;text-overflow:ellipsis;';
149
+ document.body.appendChild(tooltip);
150
+ }
151
+
152
+ var rect = target.getBoundingClientRect();
153
+ var tag = target.tagName.toLowerCase();
154
+ var id = target.id ? '#' + target.id : '';
155
+ var cls = target.className && typeof target.className === 'string'
156
+ ? '.' + target.className.trim().split(/\s+/).join('.')
157
+ : '';
158
+ var label = (tag + id + cls);
159
+ if (label.length > 60) label = label.substring(0, 57) + '...';
160
+ label += ' (' + Math.round(rect.width) + '\u00d7' + Math.round(rect.height) + ')';
161
+ tooltip.textContent = label;
162
+
163
+ // Position above or below the element
164
+ var tooltipTop = rect.top - 28;
165
+ if (tooltipTop < 44) { // below the cancel bar (40px + 4px padding)
166
+ tooltipTop = rect.bottom + 4;
167
+ }
168
+ tooltip.style.top = tooltipTop + 'px';
169
+ tooltip.style.left = Math.max(4, rect.left) + 'px';
170
+ }
171
+
172
+ function hideTooltip() {
173
+ if (tooltip && tooltip.parentNode) {
174
+ tooltip.parentNode.removeChild(tooltip);
175
+ tooltip = null;
176
+ }
177
+ }
178
+
179
+ // ── Element detection ──────────────────────────────────────────────────
180
+ // Uses the shared elementsFromPoint helper from bridge.js when available,
181
+ // falls back to document.elementFromPoint.
182
+ function findElementAt(x, y) {
183
+ if (window.__MCP_GET_ELEMENT_AT_POINT__) {
184
+ return window.__MCP_GET_ELEMENT_AT_POINT__(x, y);
185
+ }
186
+ // Fallback: hide highlight, use single elementFromPoint
187
+ if (highlight) highlight.style.display = 'none';
188
+ var el = document.elementFromPoint(x, y);
189
+ if (highlight) highlight.style.display = '';
190
+ return el;
191
+ }
192
+
193
+ // ── Picker element detection guard (fallback for findElementAt) ────────
194
+ function isPickerUI(el) {
195
+ while (el) {
196
+ if (el.getAttribute && el.getAttribute('data-mcp-picker')) {
197
+ return true;
198
+ }
199
+ el = el.parentElement;
200
+ }
201
+ return false;
202
+ }
203
+
204
+ // ── Selection ──────────────────────────────────────────────────────────
205
+ function selectElement(target) {
206
+ var metadata;
207
+ if (window.__MCP_COLLECT_ELEMENT_METADATA__) {
208
+ metadata = window.__MCP_COLLECT_ELEMENT_METADATA__(target);
209
+ } else {
210
+ metadata = { tag: target.tagName.toLowerCase(), cssSelector: '' };
211
+ }
212
+
213
+ // Store in window for later retrieval
214
+ window.__MCP_PICKED_ELEMENT__ = metadata;
215
+
216
+ // Remove overlay + tooltip but keep highlight for screenshot
217
+ removeCancelBar();
218
+ hideTooltip();
219
+ stopTracking();
220
+ removeListeners();
221
+
222
+ // Emit Tauri event
223
+ if (window.__TAURI__ && window.__TAURI__.event && window.__TAURI__.event.emit) {
224
+ window.__TAURI__.event.emit('__element_picked', { pickerId: pickerId, element: metadata });
225
+ }
226
+
227
+ window.__MCP_PICKER_ACTIVE__ = null;
228
+ return 'Element selected: ' + metadata.tag + (metadata.id ? '#' + metadata.id : '');
229
+ }
230
+
231
+ // ── Cancellation ──────────────────────────────────────────────────────
232
+ function cancelPicker() {
233
+ cleanup();
234
+ if (window.__TAURI__ && window.__TAURI__.event && window.__TAURI__.event.emit) {
235
+ window.__TAURI__.event.emit('__element_picked', { pickerId: pickerId, cancelled: true });
236
+ }
237
+ window.__MCP_PICKER_ACTIVE__ = null;
238
+ }
239
+
240
+ // ── Cleanup ────────────────────────────────────────────────────────────
241
+ function removeCancelBar() {
242
+ if (cancelBar && cancelBar.parentNode) {
243
+ cancelBar.parentNode.removeChild(cancelBar);
244
+ cancelBar = null;
245
+ }
246
+ }
247
+
248
+ function removeHighlight() {
249
+ if (highlight && highlight.parentNode) {
250
+ highlight.parentNode.removeChild(highlight);
251
+ highlight = null;
252
+ }
253
+ }
254
+
255
+ function cleanup() {
256
+ removeCancelBar();
257
+ removeHighlight();
258
+ hideTooltip();
259
+ stopTracking();
260
+ removeListeners();
261
+ trackedElement = null;
262
+ if (hoverUpdateTimer) {
263
+ clearTimeout(hoverUpdateTimer);
264
+ hoverUpdateTimer = null;
265
+ }
266
+ window.__MCP_PICKER_ACTIVE__ = null;
267
+ }
268
+
269
+ // ── Core hover-update logic (shared by throttled and immediate paths) ──
270
+ function updateHoveredElement() {
271
+ var el = findElementAt(lastMouseX, lastMouseY);
272
+
273
+ if (!el || isPickerUI(el)) return;
274
+
275
+ if (el === trackedElement) return; // no change
276
+ trackedElement = el;
277
+
278
+ if (!highlight) {
279
+ highlight = createHighlight();
280
+ startTracking();
281
+ }
282
+ positionHighlight(highlight, el);
283
+ showTooltip(el);
284
+ }
285
+
286
+ // ── Desktop: hover + click with velocity throttling ────────────────────
287
+ function onMouseMove(e) {
288
+ var now = performance.now();
289
+ var dx = e.clientX - lastMouseX;
290
+ var dy = e.clientY - lastMouseY;
291
+ var dt = now - lastMouseTime;
292
+ var distance = Math.sqrt(dx * dx + dy * dy);
293
+
294
+ lastMouseX = e.clientX;
295
+ lastMouseY = e.clientY;
296
+
297
+ // Calculate velocity in pixels per second
298
+ mouseVelocity = dt > 0 ? (distance / dt) * 1000 : 0;
299
+ lastMouseTime = now;
300
+
301
+ // If moving fast (>600 px/s), throttle updates to ~28fps
302
+ if (mouseVelocity > 600) {
303
+ if (hoverUpdateTimer) {
304
+ clearTimeout(hoverUpdateTimer);
305
+ }
306
+ hoverUpdateTimer = setTimeout(updateHoveredElement, 36); // ~28fps
307
+ } else {
308
+ if (hoverUpdateTimer) {
309
+ clearTimeout(hoverUpdateTimer);
310
+ hoverUpdateTimer = null;
311
+ }
312
+ updateHoveredElement();
313
+ }
314
+ }
315
+
316
+ function onClick(e) {
317
+ // Let clicks on picker UI (cancel, move buttons) pass through to their handlers
318
+ if (isPickerUI(e.target)) return;
319
+
320
+ e.preventDefault();
321
+ e.stopPropagation();
322
+
323
+ var el = findElementAt(e.clientX, e.clientY);
324
+
325
+ if (!el || isPickerUI(el)) return;
326
+
327
+ selectElement(el);
328
+ }
329
+
330
+ // ── Mobile: two-tap ────────────────────────────────────────────────────
331
+ var lastTapTarget = null;
332
+
333
+ function onTouchEnd(e) {
334
+ var touch = e.changedTouches[0];
335
+ if (!touch) return;
336
+
337
+ var el = findElementAt(touch.clientX, touch.clientY);
338
+
339
+ // Let taps on picker UI (cancel, move buttons) pass through to their handlers
340
+ if (!el || isPickerUI(el)) return;
341
+
342
+ e.preventDefault();
343
+ e.stopPropagation();
344
+
345
+ if (lastTapTarget === el) {
346
+ // Second tap on same element -> confirm
347
+ selectElement(el);
348
+ } else {
349
+ // First tap (or different element) -> highlight
350
+ lastTapTarget = el;
351
+ trackedElement = el;
352
+
353
+ if (!highlight) {
354
+ highlight = createHighlight();
355
+ startTracking();
356
+ }
357
+ positionHighlight(highlight, el);
358
+ showTooltip(el);
359
+
360
+ if (cancelText) {
361
+ cancelText.textContent = 'Tap element again to send | X Cancel';
362
+ }
363
+ }
364
+ }
365
+
366
+ // ── Keyboard escape ────────────────────────────────────────────────────
367
+ function onKeyDown(e) {
368
+ if (e.key === 'Escape') {
369
+ e.preventDefault();
370
+ e.stopPropagation();
371
+ cancelPicker();
372
+ }
373
+ }
374
+
375
+ // ── Listener management ────────────────────────────────────────────────
376
+ function removeListeners() {
377
+ document.removeEventListener('mousemove', onMouseMove, true);
378
+ document.removeEventListener('click', onClick, true);
379
+ document.removeEventListener('touchend', onTouchEnd, true);
380
+ document.removeEventListener('keydown', onKeyDown, true);
381
+ }
382
+
383
+ // Attach appropriate listeners based on device
384
+ document.addEventListener('keydown', onKeyDown, true);
385
+
386
+ if (isTouch) {
387
+ document.addEventListener('touchend', onTouchEnd, true);
388
+ }
389
+
390
+ // Always attach mouse listeners (hybrid devices like touch laptops)
391
+ document.addEventListener('mousemove', onMouseMove, true);
392
+ document.addEventListener('click', onClick, true);
393
+
394
+ return 'Picker activated (id: ' + pickerId + ')';
395
+ })
@@ -22,6 +22,7 @@ export const SCRIPTS = {
22
22
  focus: loadScript('focus'),
23
23
  findElement: loadScript('find-element'),
24
24
  domSnapshot: loadScript('dom-snapshot'),
25
+ elementPicker: loadScript('element-picker'),
25
26
  };
26
27
  /** Script ID used for resolve-ref in the script registry. */
27
28
  export const RESOLVE_REF_SCRIPT_ID = '__mcp_resolve_ref__';
@@ -91,6 +91,28 @@ Once changes are approved and made:
91
91
  - The plugin only runs in debug builds so it won't affect production
92
92
  - The WebSocket server binds to \`0.0.0.0:9223\` by default
93
93
  - For localhost-only access, use \`Builder::new().bind_address("127.0.0.1").build()\``;
94
+ const SELECT_ELEMENT_PROMPT = (message) => {
95
+ const lines = [
96
+ 'The user wants to visually select an element in their running Tauri app so they can discuss it with you.',
97
+ '',
98
+ 'Follow these steps:',
99
+ '',
100
+ '1. **Ensure a session is active** - Use `driver_session` with action "start" if not already connected',
101
+ '',
102
+ '2. **Activate the element picker** - Call `webview_select_element` to show the picker overlay in the app.',
103
+ 'The user will see a blue highlight following their cursor and can click to select an element.',
104
+ 'They can press Escape or click X to cancel.',
105
+ '',
106
+ '3. **Review the result** - You will receive the element\'s metadata (tag, id, classes, CSS selector, XPath,',
107
+ 'bounding rect, attributes, computed styles, parent chain) and an annotated screenshot with the element highlighted.',
108
+ '',
109
+ '4. **Respond to the user** - Use the element context and screenshot to address their request.',
110
+ ];
111
+ if (message) {
112
+ lines.push('', '## User\'s Message About the Element', '', message);
113
+ }
114
+ return lines.join('\n');
115
+ };
94
116
  /**
95
117
  * Complete registry of all available prompts
96
118
  */
@@ -114,6 +136,30 @@ export const PROMPTS = [
114
136
  ];
115
137
  },
116
138
  },
139
+ {
140
+ name: 'select',
141
+ description: 'Visually select an element in the running Tauri app. ' +
142
+ 'Activates a picker overlay — click an element to send its metadata and an annotated screenshot to the agent. ' +
143
+ 'Optionally include a message describing what you want to do with the element.',
144
+ arguments: [
145
+ {
146
+ name: 'message',
147
+ description: 'What you want to discuss or do with the selected element (e.g. "this button should be green instead of blue")',
148
+ required: false,
149
+ },
150
+ ],
151
+ handler: (args) => {
152
+ return [
153
+ {
154
+ role: 'user',
155
+ content: {
156
+ type: 'text',
157
+ text: SELECT_ELEMENT_PROMPT(args.message),
158
+ },
159
+ },
160
+ ];
161
+ },
162
+ },
117
163
  {
118
164
  name: 'setup',
119
165
  description: 'Set up or update the MCP Bridge plugin in a Tauri project. ' +
@@ -8,6 +8,7 @@ import { manageDriverSession, ManageDriverSessionSchema, } from './driver/sessio
8
8
  import { readLogs, ReadLogsSchema } from './monitor/logs.js';
9
9
  import { executeIPCCommand, manageIPCMonitoring, getIPCEvents, emitTestEvent, getBackendState, manageWindow, ExecuteIPCCommandSchema, ManageIPCMonitoringSchema, GetIPCEventsSchema, EmitTestEventSchema, GetBackendStateSchema, ManageWindowSchema, } from './driver/plugin-commands.js';
10
10
  import { interact, screenshot, keyboard, waitFor, getStyles, executeJavaScript, findElement, domSnapshot, InteractSchema, ScreenshotSchema, KeyboardSchema, WaitForSchema, GetStylesSchema, ExecuteJavaScriptSchema, FindElementSchema, DomSnapshotSchema, } from './driver/webview-interactions.js';
11
+ import { selectElement, getPointedElement, SelectElementSchema, GetPointedElementSchema, } from './driver/element-picker.js';
11
12
  import { PLUGIN_VERSION_CARGO } from './version.js';
12
13
  /**
13
14
  * Standard multi-app description for webview tools.
@@ -409,6 +410,56 @@ export const TOOLS = [
409
410
  });
410
411
  },
411
412
  },
413
+ // Element Picker Tools
414
+ {
415
+ name: 'webview_select_element',
416
+ description: '[Tauri Apps Only] Activates an element picker overlay in the Tauri app. ' +
417
+ 'The user visually selects an element by clicking it, and the tool returns ' +
418
+ 'rich element metadata (tag, id, classes, attributes, text, bounding rect, ' +
419
+ 'CSS selector, computed styles, parent chain) plus an annotated screenshot ' +
420
+ 'with the element highlighted. ' +
421
+ 'Requires active driver_session. ' +
422
+ MULTI_APP_DESC,
423
+ category: TOOL_CATEGORIES.UI_AUTOMATION,
424
+ schema: SelectElementSchema,
425
+ annotations: {
426
+ title: 'Select Element (Visual Picker)',
427
+ readOnlyHint: true,
428
+ openWorldHint: false,
429
+ },
430
+ handler: async (args) => {
431
+ const parsed = SelectElementSchema.parse(args);
432
+ return await selectElement({
433
+ timeout: parsed.timeout,
434
+ windowId: parsed.windowId,
435
+ appIdentifier: parsed.appIdentifier,
436
+ });
437
+ },
438
+ },
439
+ {
440
+ name: 'webview_get_pointed_element',
441
+ description: '[Tauri Apps Only] Retrieves element metadata for an element the user previously ' +
442
+ 'pointed at via Alt+Shift+Click in the Tauri app. Returns the same rich metadata ' +
443
+ 'as webview_select_element (tag, id, classes, attributes, text, bounding rect, ' +
444
+ 'CSS selector, computed styles, parent chain) plus an annotated screenshot. ' +
445
+ 'The user must Alt+Shift+Click an element first before calling this tool. ' +
446
+ 'Requires active driver_session. ' +
447
+ MULTI_APP_DESC,
448
+ category: TOOL_CATEGORIES.UI_AUTOMATION,
449
+ schema: GetPointedElementSchema,
450
+ annotations: {
451
+ title: 'Get Pointed Element',
452
+ readOnlyHint: true,
453
+ openWorldHint: false,
454
+ },
455
+ handler: async (args) => {
456
+ const parsed = GetPointedElementSchema.parse(args);
457
+ return await getPointedElement({
458
+ windowId: parsed.windowId,
459
+ appIdentifier: parsed.appIdentifier,
460
+ });
461
+ },
462
+ },
412
463
  // IPC & Plugin Tools
413
464
  {
414
465
  name: 'ipc_execute_command',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hypothesi/tauri-mcp-server",
3
- "version": "0.8.3",
3
+ "version": "0.9.0",
4
4
  "mcpName": "io.github.hypothesi/mcp-server-tauri",
5
5
  "description": "A Model Context Protocol server for use with Tauri v2 applications",
6
6
  "type": "module",