@hypothesi/tauri-mcp-server 0.10.0 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ export declare const SETUP_INSTRUCTIONS: string;
@@ -0,0 +1,67 @@
1
+ import { PLUGIN_VERSION_CARGO } from './version.js';
2
+ export const SETUP_INSTRUCTIONS = `Help me set up or update the MCP Bridge plugin in my Tauri project.
3
+
4
+ ## IMPORTANT: Do Not Act Without Permission
5
+
6
+ **You must NOT make any changes to files without my explicit approval.**
7
+
8
+ 1. First, examine my project to understand its current state
9
+ 2. Then, present a clear summary of what changes are needed
10
+ 3. Wait for my approval before making ANY modifications
11
+ 4. Only proceed with changes after I confirm
12
+
13
+ ## Prerequisites Check
14
+
15
+ First, verify this is a Tauri v2 project:
16
+ - Look for \`src-tauri/\` directory and \`tauri.conf.json\`
17
+ - If this is NOT a Tauri project, stop and let me know this setup only applies to Tauri apps
18
+
19
+ ## What to Check
20
+
21
+ Examine these files and report what needs to be added or updated:
22
+
23
+ ### 1. Rust Plugin Dependency
24
+ Check \`src-tauri/Cargo.toml\` for \`tauri-plugin-mcp-bridge\`. If missing or outdated, note that it needs:
25
+ \`\`\`toml
26
+ [dependencies]
27
+ tauri-plugin-mcp-bridge = "${PLUGIN_VERSION_CARGO}"
28
+ \`\`\`
29
+
30
+ ### 2. Plugin Registration
31
+ Check \`src-tauri/src/lib.rs\` or \`src-tauri/src/main.rs\` for plugin registration. It should have:
32
+ \`\`\`rust
33
+ #[cfg(debug_assertions)]
34
+ {
35
+ builder = builder.plugin(tauri_plugin_mcp_bridge::init());
36
+ }
37
+ \`\`\`
38
+
39
+ ### 3. Global Tauri Setting
40
+ Check \`src-tauri/tauri.conf.json\` for \`withGlobalTauri: true\` under the \`app\` section.
41
+ **This is required** - without it, the MCP bridge cannot communicate with the webview.
42
+
43
+ ### 4. Plugin Permissions
44
+ Check \`src-tauri/capabilities/default.json\` (or similar) for \`"mcp-bridge:default"\` permission.
45
+
46
+ ## Your Response Format
47
+
48
+ After examining the project, respond with:
49
+
50
+ 1. **Current State**: What's already configured correctly
51
+ 2. **Changes Needed**: A numbered list of specific changes required
52
+ 3. **Ask for Permission**: "May I proceed with these changes?"
53
+
54
+ Only after I say yes should you make any modifications.
55
+
56
+ ## After Setup
57
+
58
+ Once changes are approved and made:
59
+ 1. Run the Tauri app in development mode (\`cargo tauri dev\`)
60
+ 2. Use \`driver_session\` with action "start" to connect
61
+ 3. Use \`driver_session\` with action "status" to verify
62
+
63
+ ## Notes
64
+
65
+ - The plugin only runs in debug builds so it won't affect production
66
+ - The WebSocket server binds to \`0.0.0.0:9223\` by default
67
+ - For localhost-only access, use \`Builder::new().bind_address("127.0.0.1").build()\``;
@@ -10,7 +10,7 @@ import { executeInWebview, executeAsyncInWebview } from './webview-executor.js';
10
10
  import { ensureSessionAndConnect } from './plugin-client.js';
11
11
  import { SCRIPTS, buildScript } from './scripts/index.js';
12
12
  import { WindowTargetSchema } from './webview-interactions.js';
13
- import { getHtml2CanvasSource, HTML2CANVAS_SCRIPT_ID, } from './scripts/html2canvas-loader.js';
13
+ import { getHtml2CanvasSource, HTML2CANVAS_SCRIPT_ID, HTML2CANVAS_RESOLVER_SCRIPT, HTML2CANVAS_OPTIONS_SCRIPT, } from './scripts/html2canvas-loader.js';
14
14
  import { registerScript, isScriptRegistered } from './script-manager.js';
15
15
  // ============================================================================
16
16
  // Schemas
@@ -99,13 +99,13 @@ async function cleanupPickerHighlights(windowId, appIdentifier) {
99
99
  * Capture a screenshot of a specific element using html2canvas.
100
100
  * Returns the base64 data URL of the cropped element image, or null on failure.
101
101
  */
102
- async function captureElementScreenshot(cssSelector, windowId) {
102
+ async function captureElementScreenshot(cssSelector, windowId, appIdentifier) {
103
103
  // Ensure html2canvas is loaded in the webview
104
104
  try {
105
- const isRegistered = await isScriptRegistered(HTML2CANVAS_SCRIPT_ID);
105
+ const isRegistered = await isScriptRegistered(HTML2CANVAS_SCRIPT_ID, appIdentifier);
106
106
  if (!isRegistered) {
107
107
  const source = getHtml2CanvasSource();
108
- await registerScript(HTML2CANVAS_SCRIPT_ID, 'inline', source);
108
+ await registerScript(HTML2CANVAS_SCRIPT_ID, 'inline', source, windowId, appIdentifier);
109
109
  }
110
110
  }
111
111
  catch {
@@ -114,28 +114,14 @@ async function captureElementScreenshot(cssSelector, windowId) {
114
114
  const escapedSelector = cssSelector.replace(/\\/g, '\\\\').replace(/'/g, '\\\'');
115
115
  // Build a script that captures just the element with html2canvas
116
116
  const captureScript = `
117
- const html2canvasFn = typeof html2canvas !== 'undefined' ? html2canvas :
118
- (typeof window !== 'undefined' && window.html2canvas) ? window.html2canvas :
119
- (typeof self !== 'undefined' && self.html2canvas) ? self.html2canvas :
120
- (typeof globalThis !== 'undefined' && globalThis.html2canvas) ? globalThis.html2canvas : null;
121
-
122
- if (!html2canvasFn) {
123
- throw new Error('html2canvas not loaded');
124
- }
117
+ ${HTML2CANVAS_RESOLVER_SCRIPT}
125
118
 
126
119
  const el = document.querySelector('${escapedSelector}');
127
120
  if (!el) {
128
121
  throw new Error('Element not found for screenshot');
129
122
  }
130
123
 
131
- const canvas = await html2canvasFn(el, {
132
- backgroundColor: null,
133
- scale: window.devicePixelRatio || 1,
134
- logging: false,
135
- useCORS: true,
136
- allowTaint: false,
137
- imageTimeout: 5000,
138
- });
124
+ const canvas = await html2canvasFn(el, ${HTML2CANVAS_OPTIONS_SCRIPT});
139
125
 
140
126
  if (!canvas) {
141
127
  throw new Error('html2canvas returned null canvas');
@@ -149,7 +135,7 @@ async function captureElementScreenshot(cssSelector, windowId) {
149
135
  return dataUrl;
150
136
  `;
151
137
  try {
152
- const dataUrl = await executeAsyncInWebview(captureScript, windowId, 10000);
138
+ const dataUrl = await executeAsyncInWebview(captureScript, windowId, 10000, appIdentifier);
153
139
  if (!dataUrl || !dataUrl.startsWith('data:image/')) {
154
140
  return null;
155
141
  }
@@ -168,6 +154,23 @@ async function captureElementScreenshot(cssSelector, windowId) {
168
154
  return null;
169
155
  }
170
156
  }
157
+ /**
158
+ * Common helper to format an element and capture its screenshot.
159
+ */
160
+ async function buildElementContent(element, windowId, appIdentifier) {
161
+ const content = [];
162
+ // Add formatted metadata
163
+ content.push({ type: 'text', text: formatElementMetadata(element) });
164
+ // Capture element-only screenshot (no picker overlays visible)
165
+ const screenshot = await captureElementScreenshot(element.cssSelector, windowId, appIdentifier);
166
+ if (screenshot) {
167
+ content.push(screenshot);
168
+ }
169
+ else {
170
+ content.push({ type: 'text', text: '(Element screenshot capture failed)' });
171
+ }
172
+ return content;
173
+ }
171
174
  // ============================================================================
172
175
  // selectElement - Agent-initiated picker
173
176
  // ============================================================================
@@ -217,18 +220,7 @@ export async function selectElement(options) {
217
220
  }
218
221
  // Clean up all picker UI BEFORE taking the screenshot
219
222
  await cleanupPickerHighlights(windowId, appIdentifier);
220
- const content = [];
221
- // Add formatted metadata
222
- content.push({ type: 'text', text: formatElementMetadata(element) });
223
- // Capture element-only screenshot (no picker overlays visible)
224
- const screenshot = await captureElementScreenshot(element.cssSelector, windowId);
225
- if (screenshot) {
226
- content.push(screenshot);
227
- }
228
- else {
229
- content.push({ type: 'text', text: '(Element screenshot capture failed)' });
230
- }
231
- return content;
223
+ return buildElementContent(element, windowId, appIdentifier);
232
224
  }
233
225
  // ============================================================================
234
226
  // getPointedElement - Retrieve user-pointed element
@@ -257,16 +249,5 @@ export async function getPointedElement(options) {
257
249
  catch {
258
250
  return [{ type: 'text', text: `Failed to parse pointed element data: ${raw.substring(0, 200)}` }];
259
251
  }
260
- const content = [];
261
- // Add formatted metadata
262
- content.push({ type: 'text', text: formatElementMetadata(element) });
263
- // Capture element-only screenshot (no overlays)
264
- const screenshot = await captureElementScreenshot(element.cssSelector, windowId);
265
- if (screenshot) {
266
- content.push(screenshot);
267
- }
268
- else {
269
- content.push({ type: 'text', text: '(Element screenshot capture failed)' });
270
- }
271
- return content;
252
+ return buildElementContent(element, windowId, appIdentifier);
272
253
  }
@@ -59,7 +59,7 @@ interface GetScriptsResponse {
59
59
  * @param windowLabel - Optional window label to target
60
60
  * @returns Promise resolving to registration result
61
61
  */
62
- export declare function registerScript(id: string, type: ScriptType, content: string, windowLabel?: string): Promise<RegisterScriptResponse>;
62
+ export declare function registerScript(id: string, type: ScriptType, content: string, windowLabel?: string, appIdentifier?: string | number): Promise<RegisterScriptResponse>;
63
63
  /**
64
64
  * Removes a script from the registry and DOM.
65
65
  *
@@ -67,25 +67,25 @@ export declare function registerScript(id: string, type: ScriptType, content: st
67
67
  * @param windowLabel - Optional window label to target
68
68
  * @returns Promise resolving to removal result
69
69
  */
70
- export declare function removeScript(id: string, windowLabel?: string): Promise<RemoveScriptResponse>;
70
+ export declare function removeScript(id: string, windowLabel?: string, appIdentifier?: string | number): Promise<RemoveScriptResponse>;
71
71
  /**
72
72
  * Clears all registered scripts from the registry and DOM.
73
73
  *
74
74
  * @param windowLabel - Optional window label to target
75
75
  * @returns Promise resolving to the number of scripts cleared
76
76
  */
77
- export declare function clearScripts(windowLabel?: string): Promise<ClearScriptsResponse>;
77
+ export declare function clearScripts(windowLabel?: string, appIdentifier?: string | number): Promise<ClearScriptsResponse>;
78
78
  /**
79
79
  * Gets all registered scripts.
80
80
  *
81
81
  * @returns Promise resolving to the list of registered scripts
82
82
  */
83
- export declare function getScripts(): Promise<GetScriptsResponse>;
83
+ export declare function getScripts(appIdentifier?: string | number): Promise<GetScriptsResponse>;
84
84
  /**
85
85
  * Checks if a script with the given ID is registered.
86
86
  *
87
87
  * @param id - The script ID to check
88
88
  * @returns Promise resolving to true if the script is registered
89
89
  */
90
- export declare function isScriptRegistered(id: string): Promise<boolean>;
90
+ export declare function isScriptRegistered(id: string, appIdentifier?: string | number): Promise<boolean>;
91
91
  export {};
@@ -19,8 +19,8 @@ import { ensureSessionAndConnect } from './plugin-client.js';
19
19
  * @param windowLabel - Optional window label to target
20
20
  * @returns Promise resolving to registration result
21
21
  */
22
- export async function registerScript(id, type, content, windowLabel) {
23
- const client = await ensureSessionAndConnect();
22
+ export async function registerScript(id, type, content, windowLabel, appIdentifier) {
23
+ const client = await ensureSessionAndConnect(appIdentifier);
24
24
  const response = await client.sendCommand({
25
25
  command: 'register_script',
26
26
  args: { id, type, content, windowLabel },
@@ -37,8 +37,8 @@ export async function registerScript(id, type, content, windowLabel) {
37
37
  * @param windowLabel - Optional window label to target
38
38
  * @returns Promise resolving to removal result
39
39
  */
40
- export async function removeScript(id, windowLabel) {
41
- const client = await ensureSessionAndConnect();
40
+ export async function removeScript(id, windowLabel, appIdentifier) {
41
+ const client = await ensureSessionAndConnect(appIdentifier);
42
42
  const response = await client.sendCommand({
43
43
  command: 'remove_script',
44
44
  args: { id, windowLabel },
@@ -54,8 +54,8 @@ export async function removeScript(id, windowLabel) {
54
54
  * @param windowLabel - Optional window label to target
55
55
  * @returns Promise resolving to the number of scripts cleared
56
56
  */
57
- export async function clearScripts(windowLabel) {
58
- const client = await ensureSessionAndConnect();
57
+ export async function clearScripts(windowLabel, appIdentifier) {
58
+ const client = await ensureSessionAndConnect(appIdentifier);
59
59
  const response = await client.sendCommand({
60
60
  command: 'clear_scripts',
61
61
  args: { windowLabel },
@@ -70,8 +70,8 @@ export async function clearScripts(windowLabel) {
70
70
  *
71
71
  * @returns Promise resolving to the list of registered scripts
72
72
  */
73
- export async function getScripts() {
74
- const client = await ensureSessionAndConnect();
73
+ export async function getScripts(appIdentifier) {
74
+ const client = await ensureSessionAndConnect(appIdentifier);
75
75
  const response = await client.sendCommand({
76
76
  command: 'get_scripts',
77
77
  args: {},
@@ -87,7 +87,7 @@ export async function getScripts() {
87
87
  * @param id - The script ID to check
88
88
  * @returns Promise resolving to true if the script is registered
89
89
  */
90
- export async function isScriptRegistered(id) {
91
- const { scripts } = await getScripts();
90
+ export async function isScriptRegistered(id, appIdentifier) {
91
+ const { scripts } = await getScripts(appIdentifier);
92
92
  return scripts.some((s) => { return s.id === id; });
93
93
  }
@@ -13,6 +13,8 @@ export declare const HTML2CANVAS_SCRIPT_ID = "__mcp_html2canvas__";
13
13
  * Loaded lazily and cached.
14
14
  */
15
15
  export declare function getHtml2CanvasSource(): string;
16
+ export declare const HTML2CANVAS_RESOLVER_SCRIPT = "\n // Get the html2canvas function (may be on window, self, or globalThis)\n const html2canvasFn = typeof html2canvas !== 'undefined' ? html2canvas :\n (typeof window !== 'undefined' && window.html2canvas) ? window.html2canvas :\n (typeof self !== 'undefined' && self.html2canvas) ? self.html2canvas :\n (typeof globalThis !== 'undefined' && globalThis.html2canvas) ? globalThis.html2canvas : null;\n\n if (!html2canvasFn) {\n throw new Error('html2canvas not loaded');\n }\n";
17
+ export declare const HTML2CANVAS_OPTIONS_SCRIPT = "{\n backgroundColor: null,\n scale: window.devicePixelRatio || 1,\n logging: false,\n useCORS: true,\n allowTaint: false,\n imageTimeout: 5000,\n }";
16
18
  /**
17
19
  * Build a script that captures a screenshot using html2canvas.
18
20
  * Assumes html2canvas is already loaded (either via script manager or inline).
@@ -30,13 +30,7 @@ export function getHtml2CanvasSource() {
30
30
  }
31
31
  return html2canvasProSource;
32
32
  }
33
- /**
34
- * Build a script that captures a screenshot using html2canvas.
35
- * Assumes html2canvas is already loaded (either via script manager or inline).
36
- */
37
- export function buildScreenshotCaptureScript(format, quality) {
38
- // Note: This script is wrapped by executeAsyncInWebview, so we don't need an IIFE
39
- return `
33
+ export const HTML2CANVAS_RESOLVER_SCRIPT = `
40
34
  // Get the html2canvas function (may be on window, self, or globalThis)
41
35
  const html2canvasFn = typeof html2canvas !== 'undefined' ? html2canvas :
42
36
  (typeof window !== 'undefined' && window.html2canvas) ? window.html2canvas :
@@ -44,27 +38,34 @@ export function buildScreenshotCaptureScript(format, quality) {
44
38
  (typeof globalThis !== 'undefined' && globalThis.html2canvas) ? globalThis.html2canvas : null;
45
39
 
46
40
  if (!html2canvasFn) {
47
- throw new Error('html2canvas not loaded - function not found on any global');
41
+ throw new Error('html2canvas not loaded');
48
42
  }
49
-
50
- // Capture the entire document
51
- const element = document.documentElement;
52
- if (!element) {
53
- throw new Error('document.documentElement is null');
54
- }
55
-
56
- // Configure html2canvas options
57
- const options = {
43
+ `;
44
+ export const HTML2CANVAS_OPTIONS_SCRIPT = `{
58
45
  backgroundColor: null,
59
46
  scale: window.devicePixelRatio || 1,
60
47
  logging: false,
61
48
  useCORS: true,
62
49
  allowTaint: false,
63
50
  imageTimeout: 5000,
64
- };
51
+ }`;
52
+ /**
53
+ * Build a script that captures a screenshot using html2canvas.
54
+ * Assumes html2canvas is already loaded (either via script manager or inline).
55
+ */
56
+ export function buildScreenshotCaptureScript(format, quality) {
57
+ // Note: This script is wrapped by executeAsyncInWebview, so we don't need an IIFE
58
+ return `
59
+ ${HTML2CANVAS_RESOLVER_SCRIPT}
60
+
61
+ // Capture the entire document
62
+ const element = document.documentElement;
63
+ if (!element) {
64
+ throw new Error('document.documentElement is null');
65
+ }
65
66
 
66
67
  // Capture the webview
67
- const canvas = await html2canvasFn(element, options);
68
+ const canvas = await html2canvasFn(element, ${HTML2CANVAS_OPTIONS_SCRIPT});
68
69
  if (!canvas) {
69
70
  throw new Error('html2canvas returned null canvas');
70
71
  }
@@ -56,8 +56,24 @@ export function buildTypeScript(selector, text, strategy) {
56
56
  if (!element) throw new Error('Element not found: ' + selector);
57
57
 
58
58
  element.focus();
59
- element.value = text;
60
- element.dispatchEvent(new Event('input', { bubbles: true }));
59
+
60
+ // Use native prototype setter to bypass React's value tracker
61
+ var proto = element.tagName === 'TEXTAREA'
62
+ ? HTMLTextAreaElement.prototype
63
+ : HTMLInputElement.prototype;
64
+ var descriptor = Object.getOwnPropertyDescriptor(proto, 'value');
65
+
66
+ if (descriptor && descriptor.set) {
67
+ descriptor.set.call(element, text);
68
+ } else {
69
+ element.value = text;
70
+ }
71
+
72
+ // Reset React's internal value tracker so it detects the change
73
+ if (element._valueTracker) element._valueTracker.setValue('');
74
+
75
+ // Dispatch proper InputEvent (not generic Event) for React compatibility
76
+ element.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'insertText', data: text }));
61
77
  element.dispatchEvent(new Event('change', { bubbles: true }));
62
78
 
63
79
  var msg = 'Typed "' + text + '" into ' + selector;
@@ -90,17 +106,20 @@ export function buildKeyEventScript(action, key, modifiers = []) {
90
106
 
91
107
  const activeElement = document.activeElement || document.body;
92
108
 
109
+ const modStr = modifiers.length ? ' with ' + modifiers.join('+') : '';
110
+ const dispatch = (type) => activeElement.dispatchEvent(new KeyboardEvent(type, eventOptions));
111
+
93
112
  if (action === 'press') {
94
- activeElement.dispatchEvent(new KeyboardEvent('keydown', eventOptions));
95
- activeElement.dispatchEvent(new KeyboardEvent('keypress', eventOptions));
96
- activeElement.dispatchEvent(new KeyboardEvent('keyup', eventOptions));
97
- return 'Pressed key: ' + key + (modifiers.length ? ' with ' + modifiers.join('+') : '');
113
+ dispatch('keydown');
114
+ dispatch('keypress');
115
+ dispatch('keyup');
116
+ return 'Pressed key: ' + key + modStr;
98
117
  } else if (action === 'down') {
99
- activeElement.dispatchEvent(new KeyboardEvent('keydown', eventOptions));
100
- return 'Key down: ' + key + (modifiers.length ? ' with ' + modifiers.join('+') : '');
118
+ dispatch('keydown');
119
+ return 'Key down: ' + key + modStr;
101
120
  } else if (action === 'up') {
102
- activeElement.dispatchEvent(new KeyboardEvent('keyup', eventOptions));
103
- return 'Key up: ' + key + (modifiers.length ? ' with ' + modifiers.join('+') : '');
121
+ dispatch('keyup');
122
+ return 'Key up: ' + key + modStr;
104
123
  }
105
124
 
106
125
  throw new Error('Unknown action: ' + action);
@@ -47,9 +47,22 @@
47
47
  }
48
48
 
49
49
  if (strategy === 'text') {
50
+ // First try: match element text content
50
51
  var xpath = xpathForText(selectorOrRef);
51
52
  var result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
52
- return result.singleNodeValue;
53
+ if (result.singleNodeValue) return result.singleNodeValue;
54
+
55
+ // Fallback: search placeholder, aria-label, and title attributes
56
+ var attrSelectors = [
57
+ '[placeholder*="' + selectorOrRef.replace(/"/g, '\\"') + '"]',
58
+ '[aria-label*="' + selectorOrRef.replace(/"/g, '\\"') + '"]',
59
+ '[title*="' + selectorOrRef.replace(/"/g, '\\"') + '"]',
60
+ ];
61
+ for (var i = 0; i < attrSelectors.length; i++) {
62
+ var el = document.querySelector(attrSelectors[i]);
63
+ if (el) return el;
64
+ }
65
+ return null;
53
66
  }
54
67
 
55
68
  if (strategy === 'xpath') {
@@ -78,12 +91,25 @@
78
91
  }
79
92
 
80
93
  if (strategy === 'text') {
94
+ // First try: match element text content
81
95
  var xpath = xpathForText(selector);
82
96
  var snapshot = document.evaluate(xpath, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
83
97
  var results = [];
84
98
  for (var i = 0; i < snapshot.snapshotLength; i++) {
85
99
  results.push(snapshot.snapshotItem(i));
86
100
  }
101
+ if (results.length > 0) return results;
102
+
103
+ // Fallback: search placeholder, aria-label, and title attributes
104
+ var attrSelectors = [
105
+ '[placeholder*="' + selector.replace(/"/g, '\\"') + '"]',
106
+ '[aria-label*="' + selector.replace(/"/g, '\\"') + '"]',
107
+ '[title*="' + selector.replace(/"/g, '\\"') + '"]',
108
+ ];
109
+ for (var i = 0; i < attrSelectors.length; i++) {
110
+ var found = Array.from(document.querySelectorAll(attrSelectors[i]));
111
+ if (found.length > 0) return results.concat(found);
112
+ }
87
113
  return results;
88
114
  }
89
115
 
@@ -12,7 +12,7 @@ import { z } from 'zod';
12
12
  *
13
13
  * @throws Error if no session is active (driver_session must be called first)
14
14
  */
15
- export declare function ensureReady(): Promise<void>;
15
+ export declare function ensureReady(windowId?: string, appIdentifier?: string | number): Promise<void>;
16
16
  /**
17
17
  * Reset initialization state (useful for testing or reconnecting).
18
18
  */
@@ -48,7 +48,7 @@ export declare function executeInWebviewWithContext(script: string, windowId?: s
48
48
  * @param timeout - Timeout in milliseconds (default: 5000)
49
49
  * @returns Result of the script execution
50
50
  */
51
- export declare function executeAsyncInWebview(script: string, windowId?: string, timeout?: number): Promise<string>;
51
+ export declare function executeAsyncInWebview(script: string, windowId?: string, timeout?: number, appIdentifier?: string | number): Promise<string>;
52
52
  /**
53
53
  * Initialize console log capture in the webview.
54
54
  * This intercepts console methods and stores logs in memory.
@@ -1,6 +1,5 @@
1
1
  import { z } from 'zod';
2
- import { connectPlugin } from './plugin-client.js';
3
- import { hasActiveSession, getDefaultSession, resolveTargetApp } from './session-manager.js';
2
+ import { hasActiveSession, resolveTargetApp, manageDriverSession } from './session-manager.js';
4
3
  import { createMcpLogger } from '../logger.js';
5
4
  import { buildScreenshotScript, buildScreenshotCaptureScript, getHtml2CanvasSource, HTML2CANVAS_SCRIPT_ID, } from './scripts/html2canvas-loader.js';
6
5
  import { registerScript, isScriptRegistered } from './script-manager.js';
@@ -17,7 +16,7 @@ import { getResolveRefSource, RESOLVE_REF_SCRIPT_ID } from './scripts/index.js';
17
16
  // ============================================================================
18
17
  // Auto-Initialization System
19
18
  // ============================================================================
20
- let isInitialized = false;
19
+ const initializedTargets = new Set();
21
20
  const driverLogger = createMcpLogger('DRIVER');
22
21
  /**
23
22
  * Ensures the MCP server is fully initialized and ready to use.
@@ -32,28 +31,52 @@ const driverLogger = createMcpLogger('DRIVER');
32
31
  *
33
32
  * @throws Error if no session is active (driver_session must be called first)
34
33
  */
35
- export async function ensureReady() {
36
- if (isInitialized) {
37
- return;
38
- }
39
- // Require an active session to prevent connecting to wrong app
34
+ export async function ensureReady(windowId, appIdentifier) {
35
+ // Auto-connect if no active session
40
36
  if (!hasActiveSession()) {
41
- throw new Error('No active session. Call driver_session with action "start" first to connect to a Tauri app.');
37
+ const result = await manageDriverSession('start');
38
+ if (!hasActiveSession()) {
39
+ throw new Error('Auto-connect failed: ' + result + '. Call driver_session with action "start" to connect manually.');
40
+ }
42
41
  }
43
- // Get default session for initial connection
44
- const session = getDefaultSession();
45
- if (session) {
46
- await connectPlugin(session.host, session.port);
42
+ const session = resolveTargetApp(appIdentifier);
43
+ if (!session.client.isConnected()) {
44
+ await session.client.connect();
47
45
  }
48
- // Register the resolve-ref helper so ref-based selectors work in all tools
49
- await registerScript(RESOLVE_REF_SCRIPT_ID, 'inline', getResolveRefSource());
50
- isInitialized = true;
46
+ const targetKey = `${session.host}:${session.port}:${windowId ?? 'main'}`;
47
+ if (initializedTargets.has(targetKey)) {
48
+ return;
49
+ }
50
+ // Register the resolve-ref helper in the target window
51
+ // so ref-based selectors work there.
52
+ await registerScript(RESOLVE_REF_SCRIPT_ID, 'inline', getResolveRefSource(), windowId, appIdentifier);
53
+ await waitForResolveRefHelper(session, windowId);
54
+ initializedTargets.add(targetKey);
51
55
  }
52
56
  /**
53
57
  * Reset initialization state (useful for testing or reconnecting).
54
58
  */
55
59
  export function resetInitialization() {
56
- isInitialized = false;
60
+ initializedTargets.clear();
61
+ }
62
+ async function waitForResolveRefHelper(session, windowId) {
63
+ if (!session) {
64
+ throw new Error('No active session available while registering resolve-ref helper.');
65
+ }
66
+ for (let attempt = 0; attempt < 20; attempt++) {
67
+ const response = await session.client.sendCommand({
68
+ command: 'execute_js',
69
+ args: {
70
+ script: 'return !!(window.__MCP__ && typeof window.__MCP__.resolveRef === "function")',
71
+ windowLabel: windowId,
72
+ },
73
+ }, 2000);
74
+ if (response.success && response.data === true) {
75
+ return;
76
+ }
77
+ await new Promise((resolve) => { return setTimeout(resolve, 50); });
78
+ }
79
+ throw new Error('Resolve-ref helper was not available in the webview after registration.');
57
80
  }
58
81
  /**
59
82
  * Execute JavaScript in the Tauri webview using native IPC via WebSocket.
@@ -78,7 +101,7 @@ export async function executeInWebview(script, windowId, appIdentifier) {
78
101
  export async function executeInWebviewWithContext(script, windowId, appIdentifier) {
79
102
  try {
80
103
  // Ensure we're fully initialized
81
- await ensureReady();
104
+ await ensureReady(windowId, appIdentifier);
82
105
  // Resolve target session
83
106
  const session = resolveTargetApp(appIdentifier);
84
107
  const client = session.client;
@@ -124,11 +147,12 @@ export async function executeInWebviewWithContext(script, windowId, appIdentifie
124
147
  * @param timeout - Timeout in milliseconds (default: 5000)
125
148
  * @returns Result of the script execution
126
149
  */
127
- export async function executeAsyncInWebview(script, windowId, timeout = 5000) {
150
+ export async function executeAsyncInWebview(script, windowId, timeout, appIdentifier) {
151
+ const resolvedTimeout = timeout ?? 5000;
128
152
  const wrappedScript = `
129
153
  return (async () => {
130
154
  const timeoutPromise = new Promise((_, reject) => {
131
- setTimeout(() => reject(new Error('Script execution timeout')), ${timeout});
155
+ setTimeout(() => reject(new Error('Script execution timeout')), ${resolvedTimeout});
132
156
  });
133
157
 
134
158
  const scriptPromise = (async () => {
@@ -138,7 +162,7 @@ export async function executeAsyncInWebview(script, windowId, timeout = 5000) {
138
162
  return await Promise.race([scriptPromise, timeoutPromise]);
139
163
  })();
140
164
  `;
141
- return executeInWebview(wrappedScript, windowId);
165
+ return executeInWebview(wrappedScript, windowId, appIdentifier);
142
166
  }
143
167
  // ============================================================================
144
168
  // Console Log Capture System
@@ -268,14 +292,14 @@ function buildScreenshotResult(dataUrl, method, windowContext) {
268
292
  * Prepares the html2canvas script for screenshot capture.
269
293
  * Tries to use the script manager for persistence, falls back to inline injection.
270
294
  */
271
- async function prepareHtml2canvasScript(format, quality) {
295
+ async function prepareHtml2canvasScript(format, quality, windowId, appIdentifier) {
272
296
  try {
273
297
  // Check if html2canvas is already registered
274
- const isRegistered = await isScriptRegistered(HTML2CANVAS_SCRIPT_ID);
298
+ const isRegistered = await isScriptRegistered(HTML2CANVAS_SCRIPT_ID, appIdentifier);
275
299
  if (!isRegistered) {
276
300
  // Register html2canvas via script manager for persistence across navigations
277
301
  const html2canvasSource = getHtml2CanvasSource();
278
- await registerScript(HTML2CANVAS_SCRIPT_ID, 'inline', html2canvasSource);
302
+ await registerScript(HTML2CANVAS_SCRIPT_ID, 'inline', html2canvasSource, windowId, appIdentifier);
279
303
  }
280
304
  // Use the capture-only script since html2canvas is now registered
281
305
  return buildScreenshotCaptureScript(format, quality);
@@ -299,7 +323,7 @@ export async function captureScreenshot(options = {}) {
299
323
  // - Linux: Chromium/WebKit screenshot APIs
300
324
  try {
301
325
  // Ensure we're fully initialized
302
- await ensureReady();
326
+ await ensureReady(windowId, appIdentifier);
303
327
  // Resolve target session
304
328
  const session = resolveTargetApp(appIdentifier);
305
329
  const client = session.client;
@@ -331,7 +355,7 @@ export async function captureScreenshot(options = {}) {
331
355
  }
332
356
  // Fallback 1: Use html2canvas library for high-quality DOM rendering
333
357
  // Try to use the script manager to register html2canvas for persistence
334
- const html2canvasScript = await prepareHtml2canvasScript(format, quality);
358
+ const html2canvasScript = await prepareHtml2canvasScript(format, quality, windowId, appIdentifier);
335
359
  // Fallback: Try Screen Capture API if available
336
360
  // Note: This script is wrapped by executeAsyncInWebview, so we don't need an IIFE
337
361
  const screenCaptureScript = `
@@ -390,7 +414,7 @@ export async function captureScreenshot(options = {}) {
390
414
  `;
391
415
  try {
392
416
  // Try html2canvas second (after native APIs)
393
- const result = await executeAsyncInWebview(html2canvasScript, undefined, 10000); // Longer timeout for library loading
417
+ const result = await executeAsyncInWebview(html2canvasScript, windowId, 10000, appIdentifier);
394
418
  // Validate that we got a real data URL, not 'null' or empty
395
419
  if (result && result !== 'null' && result.startsWith('data:image/')) {
396
420
  return buildScreenshotResult(result, 'html2canvas');
@@ -400,7 +424,7 @@ export async function captureScreenshot(options = {}) {
400
424
  catch (html2canvasError) {
401
425
  try {
402
426
  // Fallback to Screen Capture API
403
- const result = await executeAsyncInWebview(screenCaptureScript);
427
+ const result = await executeAsyncInWebview(screenCaptureScript, windowId, 5000, appIdentifier);
404
428
  // Validate that we got a real data URL
405
429
  if (result && result.startsWith('data:image/')) {
406
430
  return buildScreenshotResult(result, 'Screen Capture API');
@@ -22,7 +22,8 @@ export const WindowTargetSchema = z.object({
22
22
  * Defaults to 'css' for backward compatibility.
23
23
  */
24
24
  const selectorStrategyField = z.enum(['css', 'xpath', 'text']).default('css').describe('Selector strategy: "css" (default) for CSS selectors, "xpath" for XPath expressions, ' +
25
- '"text" to find elements containing the given text. Ref IDs (e.g., "ref=e3") work with any strategy.');
25
+ '"text" to find elements by text content, with fallback to placeholder, aria-label, ' +
26
+ 'and title attributes. Ref IDs (e.g., "ref=e3") work with any strategy.');
26
27
  // ============================================================================
27
28
  // Schemas
28
29
  // ============================================================================
@@ -154,10 +155,8 @@ export async function screenshot(options = {}) {
154
155
  if (!imageContent || imageContent.type !== 'image') {
155
156
  throw new Error('Screenshot capture failed: no image data');
156
157
  }
157
- // Decode base64 and write to file
158
- const buffer = Buffer.from(imageContent.data, 'base64');
159
158
  const resolvedPath = resolve(filePath);
160
- await writeFile(resolvedPath, buffer);
159
+ await writeFile(resolvedPath, imageContent.data, 'base64');
161
160
  return { filePath: resolvedPath, format };
162
161
  }
163
162
  return result;
@@ -295,7 +294,7 @@ export async function domSnapshot(options) {
295
294
  const { type, selector, strategy, windowId, appIdentifier } = options;
296
295
  // Only load aria-api for accessibility snapshots
297
296
  if (type === 'accessibility') {
298
- await ensureAriaApiLoaded(windowId);
297
+ await ensureAriaApiLoaded(windowId, appIdentifier);
299
298
  }
300
299
  // Then execute the snapshot script
301
300
  const script = buildScript(SCRIPTS.domSnapshot, { type, selector: selector ?? null, strategy: strategy ?? 'css' });
@@ -311,11 +310,11 @@ export async function domSnapshot(options) {
311
310
  * Ensure aria-api library is loaded in the webview.
312
311
  * Uses the script manager to inject the library if not already present.
313
312
  */
314
- async function ensureAriaApiLoaded(windowId) {
313
+ async function ensureAriaApiLoaded(windowId, appIdentifier) {
315
314
  const { getAriaApiSource, ARIA_API_SCRIPT_ID: ariaApiScriptId } = await import('./scripts/aria-api-loader.js');
316
315
  const { registerScript, isScriptRegistered } = await import('./script-manager.js');
317
- if (await isScriptRegistered(ariaApiScriptId)) {
316
+ if (await isScriptRegistered(ariaApiScriptId, appIdentifier)) {
318
317
  return;
319
318
  }
320
- await registerScript(ariaApiScriptId, 'inline', getAriaApiSource(), windowId);
319
+ await registerScript(ariaApiScriptId, 'inline', getAriaApiSource(), windowId, appIdentifier);
321
320
  }
@@ -2,7 +2,7 @@
2
2
  * Single source of truth for all MCP prompt definitions
3
3
  * Prompts are user-controlled templates that appear as slash commands in MCP clients
4
4
  */
5
- import { PLUGIN_VERSION_CARGO } from './version.js';
5
+ import { SETUP_INSTRUCTIONS as SETUP_PROMPT } from './constants.js';
6
6
  const FIX_WEBVIEW_ERRORS_PROMPT = `I need help finding and fixing JavaScript errors in my Tauri app's webview.
7
7
 
8
8
  Please follow these steps:
@@ -25,72 +25,6 @@ Please follow these steps:
25
25
  If no errors are found, let me know the app is running cleanly.
26
26
 
27
27
  If the session fails to start, help me troubleshoot the connection (is the app running? is the MCP bridge plugin installed?).`;
28
- const SETUP_PROMPT = `Help me set up or update the MCP Bridge plugin in my Tauri project.
29
-
30
- ## IMPORTANT: Do Not Act Without Permission
31
-
32
- **You must NOT make any changes to files without my explicit approval.**
33
-
34
- 1. First, examine my project to understand its current state
35
- 2. Then, present a clear summary of what changes are needed
36
- 3. Wait for my approval before making ANY modifications
37
- 4. Only proceed with changes after I confirm
38
-
39
- ## Prerequisites Check
40
-
41
- First, verify this is a Tauri v2 project:
42
- - Look for \`src-tauri/\` directory and \`tauri.conf.json\`
43
- - If this is NOT a Tauri project, stop and let me know this setup only applies to Tauri apps
44
-
45
- ## What to Check
46
-
47
- Examine these files and report what needs to be added or updated:
48
-
49
- ### 1. Rust Plugin Dependency
50
- Check \`src-tauri/Cargo.toml\` for \`tauri-plugin-mcp-bridge\`. If missing or outdated, note that it needs:
51
- \`\`\`toml
52
- [dependencies]
53
- tauri-plugin-mcp-bridge = "${PLUGIN_VERSION_CARGO}"
54
- \`\`\`
55
-
56
- ### 2. Plugin Registration
57
- Check \`src-tauri/src/lib.rs\` or \`src-tauri/src/main.rs\` for plugin registration. It should have:
58
- \`\`\`rust
59
- #[cfg(debug_assertions)]
60
- {
61
- builder = builder.plugin(tauri_plugin_mcp_bridge::init());
62
- }
63
- \`\`\`
64
-
65
- ### 3. Global Tauri Setting
66
- Check \`src-tauri/tauri.conf.json\` for \`withGlobalTauri: true\` under the \`app\` section.
67
- **This is required** - without it, the MCP bridge cannot communicate with the webview.
68
-
69
- ### 4. Plugin Permissions
70
- Check \`src-tauri/capabilities/default.json\` (or similar) for \`"mcp-bridge:default"\` permission.
71
-
72
- ## Your Response Format
73
-
74
- After examining the project, respond with:
75
-
76
- 1. **Current State**: What's already configured correctly
77
- 2. **Changes Needed**: A numbered list of specific changes required
78
- 3. **Ask for Permission**: "May I proceed with these changes?"
79
-
80
- Only after I say yes should you make any modifications.
81
-
82
- ## After Setup
83
-
84
- Once changes are approved and made:
85
- 1. Run the Tauri app in development mode (\`cargo tauri dev\`)
86
- 2. Use \`driver_session\` with action "start" to connect
87
- 3. Use \`driver_session\` with action "status" to verify
88
-
89
- ## Notes
90
-
91
- - The plugin only runs in debug builds so it won't affect production
92
- - The WebSocket server binds to \`0.0.0.0:9223\` by default
93
- - For localhost-only access, use \`Builder::new().bind_address("127.0.0.1").build()\``;
94
28
  const SELECT_ELEMENT_PROMPT = (message) => {
95
29
  const lines = [
96
30
  'The user wants to visually select an element in their running Tauri app so they can discuss it with you.',
@@ -9,7 +9,7 @@ import { readLogs, ReadLogsSchema } from './monitor/logs.js';
9
9
  import { executeIPCCommand, manageIPCMonitoring, getIPCEvents, emitTestEvent, getBackendState, manageWindow, ExecuteIPCCommandSchema, ManageIPCMonitoringSchema, GetIPCEventsSchema, EmitTestEventSchema, GetBackendStateSchema, ManageWindowSchema, } from './driver/plugin-commands.js';
10
10
  import { interact, screenshot, keyboard, waitFor, getStyles, executeJavaScript, findElement, domSnapshot, InteractSchema, ScreenshotSchema, KeyboardSchema, WaitForSchema, GetStylesSchema, ExecuteJavaScriptSchema, FindElementSchema, DomSnapshotSchema, } from './driver/webview-interactions.js';
11
11
  import { selectElement, getPointedElement, SelectElementSchema, GetPointedElementSchema, } from './driver/element-picker.js';
12
- import { PLUGIN_VERSION_CARGO } from './version.js';
12
+ import { SETUP_INSTRUCTIONS } from './constants.js';
13
13
  /**
14
14
  * Standard multi-app description for webview tools.
15
15
  */
@@ -24,76 +24,6 @@ export const TOOL_CATEGORIES = {
24
24
  UI_AUTOMATION: 'UI Automation & WebView Interaction',
25
25
  IPC_PLUGIN: 'IPC & Plugin Tools (via MCP Bridge)',
26
26
  };
27
- // Setup instructions for the MCP Bridge plugin
28
- const SETUP_INSTRUCTIONS = `# MCP Bridge Plugin Setup Instructions
29
-
30
- Use these instructions to set up or update the MCP Bridge plugin in a Tauri v2 project.
31
-
32
- ## IMPORTANT: Do Not Act Without Permission
33
-
34
- **You must NOT make any changes to files without the user's explicit approval.**
35
-
36
- 1. First, examine the project to understand its current state
37
- 2. Then, present a clear summary of what changes are needed
38
- 3. Wait for user approval before making ANY modifications
39
- 4. Only proceed with changes after they confirm
40
-
41
- ## Prerequisites Check
42
-
43
- First, verify this is a Tauri v2 project:
44
- - Look for \`src-tauri/\` directory and \`tauri.conf.json\`
45
- - If this is NOT a Tauri project, stop and let the user know this setup only applies to Tauri apps
46
-
47
- ## What to Check
48
-
49
- Examine these files and report what needs to be added or updated:
50
-
51
- ### 1. Rust Plugin Dependency
52
- Check \`src-tauri/Cargo.toml\` for \`tauri-plugin-mcp-bridge\`. If missing or outdated, note that it needs:
53
- \`\`\`toml
54
- [dependencies]
55
- tauri-plugin-mcp-bridge = "${PLUGIN_VERSION_CARGO}"
56
- \`\`\`
57
-
58
- ### 2. Plugin Registration
59
- Check \`src-tauri/src/lib.rs\` or \`src-tauri/src/main.rs\` for plugin registration. It should have:
60
- \`\`\`rust
61
- #[cfg(debug_assertions)]
62
- {
63
- builder = builder.plugin(tauri_plugin_mcp_bridge::init());
64
- }
65
- \`\`\`
66
-
67
- ### 3. Global Tauri Setting
68
- Check \`src-tauri/tauri.conf.json\` for \`withGlobalTauri: true\` under the \`app\` section.
69
- **This is required** - without it, the MCP bridge cannot communicate with the webview.
70
-
71
- ### 4. Plugin Permissions
72
- Check \`src-tauri/capabilities/default.json\` (or similar) for \`"mcp-bridge:default"\` permission.
73
-
74
- ## Response Format
75
-
76
- After examining the project, respond with:
77
-
78
- 1. **Current State**: What's already configured correctly
79
- 2. **Changes Needed**: A numbered list of specific changes required
80
- 3. **Ask for Permission**: "May I proceed with these changes?"
81
-
82
- Only after the user says yes should you make any modifications.
83
-
84
- ## After Setup
85
-
86
- Once changes are approved and made:
87
- 1. Run the Tauri app in development mode (\`cargo tauri dev\`)
88
- 2. Use \`driver_session\` with action "start" to connect
89
- 3. Use \`driver_session\` with action "status" to verify
90
-
91
- ## Notes
92
-
93
- - The plugin only runs in debug builds so it won't affect production
94
- - The WebSocket server binds to \`0.0.0.0:9223\` by default
95
- - For localhost-only access, use \`Builder::new().bind_address("127.0.0.1").build()\`
96
- `;
97
27
  /**
98
28
  * Complete registry of all available tools
99
29
  * This is the single source of truth for tool definitions
@@ -171,6 +101,7 @@ export const TOOLS = [
171
101
  name: 'webview_find_element',
172
102
  description: '[Tauri Apps Only] Find DOM elements in a running Tauri app\'s webview. ' +
173
103
  'Supports CSS selectors (default), XPath expressions, and text content matching via the strategy parameter. ' +
104
+ 'The "text" strategy first searches element text content, then falls back to placeholder, aria-label, and title attributes. ' +
174
105
  'Returns the element\'s HTML. ' +
175
106
  'Requires active driver_session. ' +
176
107
  MULTI_APP_DESC + ' ' +
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hypothesi/tauri-mcp-server",
3
- "version": "0.10.0",
3
+ "version": "0.11.1",
4
4
  "mcpName": "io.github.hypothesi/mcp-server-tauri",
5
5
  "description": "A Model Context Protocol server for use with Tauri v2 applications",
6
6
  "type": "module",