@hypothesi/tauri-mcp-server 0.8.0 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -127,43 +127,58 @@ export class PluginClient extends EventEmitter {
127
127
  }
128
128
  }
129
129
  /**
130
- * Send a command to the plugin and wait for response
130
+ * Send a command to the plugin and wait for response.
131
+ *
132
+ * Automatically retries on transient "not found" errors (e.g. window not
133
+ * yet registered after WebSocket connect) with exponential backoff.
131
134
  */
132
135
  async sendCommand(command, timeoutMs = 5000) {
133
- // If not connected, try to reconnect first
134
- if (!this._ws || this._ws.readyState !== WebSocket.OPEN) {
135
- try {
136
- await this.connect();
136
+ const maxRetries = 3;
137
+ const baseDelayMs = 100;
138
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
139
+ // If not connected, try to reconnect first
140
+ if (!this._ws || this._ws.readyState !== WebSocket.OPEN) {
141
+ try {
142
+ await this.connect();
143
+ }
144
+ catch {
145
+ throw new Error('Not connected to plugin and reconnection failed');
146
+ }
137
147
  }
138
- catch {
139
- throw new Error('Not connected to plugin and reconnection failed');
148
+ // Double-check connection after reconnect attempt
149
+ if (!this._ws || this._ws.readyState !== WebSocket.OPEN) {
150
+ throw new Error('Not connected to plugin');
140
151
  }
141
- }
142
- // Double-check connection after reconnect attempt
143
- if (!this._ws || this._ws.readyState !== WebSocket.OPEN) {
144
- throw new Error('Not connected to plugin');
145
- }
146
- // Generate unique ID for this request
147
- const id = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
148
- const commandWithId = { ...command, id };
149
- return new Promise((resolve, reject) => {
150
- // Set up timeout
151
- const timeout = setTimeout(() => {
152
- this._pendingRequests.delete(id);
153
- reject(new Error(`Request timeout after ${timeoutMs}ms`));
154
- }, timeoutMs);
155
- // Store pending request
156
- this._pendingRequests.set(id, { resolve, reject, timeout });
157
- // Send command
158
- // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
159
- this._ws.send(JSON.stringify(commandWithId), (error) => {
160
- if (error) {
161
- clearTimeout(timeout);
152
+ // Generate unique ID for this request
153
+ const id = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
154
+ const commandWithId = { ...command, id };
155
+ const response = await new Promise((resolve, reject) => {
156
+ // Set up timeout
157
+ const timeout = setTimeout(() => {
162
158
  this._pendingRequests.delete(id);
163
- reject(error);
164
- }
159
+ reject(new Error(`Request timeout after ${timeoutMs}ms`));
160
+ }, timeoutMs);
161
+ // Store pending request
162
+ this._pendingRequests.set(id, { resolve, reject, timeout });
163
+ // Send command
164
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
165
+ this._ws.send(JSON.stringify(commandWithId), (error) => {
166
+ if (error) {
167
+ clearTimeout(timeout);
168
+ this._pendingRequests.delete(id);
169
+ reject(error);
170
+ }
171
+ });
165
172
  });
166
- });
173
+ // Retry on "not found" errors (window not yet registered)
174
+ if (!response.success && response.error?.includes('not found') && attempt < maxRetries) {
175
+ await new Promise((r) => { setTimeout(r, baseDelayMs * Math.pow(2, attempt)); });
176
+ continue;
177
+ }
178
+ return response;
179
+ }
180
+ // Unreachable — loop always returns or throws — but satisfies TypeScript
181
+ throw new Error('Retry attempts exhausted');
167
182
  }
168
183
  /**
169
184
  * Check if connected
@@ -56,8 +56,9 @@
56
56
  return refMap.get(element);
57
57
  }
58
58
 
59
- window.__MCP_ARIA_REFS__ = refMap;
60
- window.__MCP_ARIA_REFS_REVERSE__ = reverseRefMap;
59
+ window.__MCP__ = window.__MCP__ || {};
60
+ window.__MCP__.refs = refMap;
61
+ window.__MCP__.reverseRefs = reverseRefMap;
61
62
 
62
63
  // ========================================================================
63
64
  // Visibility (using aria-api for correct aria-hidden inheritance)
@@ -10,13 +10,8 @@
10
10
  let element;
11
11
 
12
12
  // Check if it's a ref ID first (works with any strategy)
13
- const refMatch = selector.match(/^(?:ref=)?(e\d+)$/);
14
- if (refMatch) {
15
- const refId = refMatch[1],
16
- refMap = window.__MCP_ARIA_REFS_REVERSE__;
17
- if (refMap) {
18
- element = refMap.get(refId);
19
- }
13
+ if (/^\[?(?:ref=)?(e\d+)\]?$/.test(selector)) {
14
+ element = window.__MCP__.resolveRef(selector);
20
15
  } else if (strategy === 'text') {
21
16
  // Find element containing text
22
17
  const xpath = "//*[contains(text(), '" + selector + "')]";
@@ -40,7 +35,7 @@
40
35
  element = result.singleNodeValue;
41
36
  } else {
42
37
  // CSS selector (default)
43
- element = document.querySelector(selector);
38
+ element = window.__MCP__.resolveRef(selector);
44
39
  }
45
40
 
46
41
  if (element) {
@@ -7,19 +7,9 @@
7
7
  (function(params) {
8
8
  const { selector } = params;
9
9
 
10
- // Resolve element from CSS selector or ref ID (e.g., "ref=e3" or "e3")
11
10
  function resolveElement(selectorOrRef) {
12
11
  if (!selectorOrRef) return null;
13
- var refMatch = selectorOrRef.match(/^(?:ref=)?(e\d+)$/);
14
- if (refMatch) {
15
- var refId = refMatch[1],
16
- refMap = window.__MCP_ARIA_REFS_REVERSE__;
17
- if (!refMap) throw new Error('Ref "' + refId + '" not found. Run webview_dom_snapshot first to index elements.');
18
- var el = refMap.get(refId);
19
- if (!el) throw new Error('Ref "' + refId + '" not found. The DOM may have changed since the snapshot.');
20
- return el;
21
- }
22
- var el = document.querySelector(selectorOrRef);
12
+ var el = window.__MCP__.resolveRef(selectorOrRef);
23
13
  if (!el) throw new Error('Element not found: ' + selectorOrRef);
24
14
  return el;
25
15
  }
@@ -9,25 +9,15 @@
9
9
  (function(params) {
10
10
  const { selector, properties, multiple } = params;
11
11
 
12
- // Resolve element from CSS selector or ref ID (e.g., "ref=e3" or "e3")
13
12
  function resolveElement(selectorOrRef) {
14
13
  if (!selectorOrRef) return null;
15
- var refMatch = selectorOrRef.match(/^(?:ref=)?(e\d+)$/);
16
- if (refMatch) {
17
- var refId = refMatch[1],
18
- refMap = window.__MCP_ARIA_REFS_REVERSE__;
19
- if (!refMap) throw new Error('Ref "' + refId + '" not found. Run webview_dom_snapshot first to index elements.');
20
- var el = refMap.get(refId);
21
- if (!el) throw new Error('Ref "' + refId + '" not found. The DOM may have changed since the snapshot.');
22
- return el;
23
- }
24
- var el = document.querySelector(selectorOrRef);
14
+ var el = window.__MCP__.resolveRef(selectorOrRef);
25
15
  if (!el) throw new Error('Element not found: ' + selectorOrRef);
26
16
  return el;
27
17
  }
28
18
 
29
19
  // Check if selector is a ref ID - if so, multiple doesn't apply
30
- const isRef = /^(?:ref=)?(e\d+)$/.test(selector);
20
+ const isRef = /^\[?(?:ref=)?(e\d+)\]?$/.test(selector);
31
21
  const elements = isRef
32
22
  ? [resolveElement(selector)]
33
23
  : (multiple ? Array.from(document.querySelectorAll(selector)) : [document.querySelector(selector)]);
@@ -1,27 +1,30 @@
1
1
  /**
2
- * html2canvas library loader
2
+ * html2canvas-pro library loader
3
3
  *
4
- * Loads the html2canvas library from node_modules and provides it as a string
5
- * that can be injected into the webview.
4
+ * Loads the html2canvas-pro library from node_modules and provides it as a string
5
+ * that can be injected into the webview. html2canvas-pro is a fork of html2canvas
6
+ * that adds support for modern CSS color functions like oklch(), oklab(), lab(),
7
+ * lch(), and color().
6
8
  */
7
9
  import { readFileSync } from 'fs';
8
10
  import { createRequire } from 'module';
9
- // Use createRequire to resolve the path to html2canvas in node_modules
11
+ // Use createRequire to resolve the path to html2canvas-pro in node_modules
10
12
  const require = createRequire(import.meta.url);
11
- let html2canvasSource = null;
13
+ let html2canvasProSource = null;
12
14
  /** Script ID used for the html2canvas library in the script registry. */
13
15
  export const HTML2CANVAS_SCRIPT_ID = '__mcp_html2canvas__';
14
16
  /**
15
- * Get the html2canvas library source code.
17
+ * Get the html2canvas-pro library source code.
16
18
  * Loaded lazily and cached.
17
19
  */
18
20
  export function getHtml2CanvasSource() {
19
- if (html2canvasSource === null) {
20
- // Resolve the path to html2canvas.min.js
21
- const html2canvasPath = require.resolve('html2canvas/dist/html2canvas.min.js');
22
- html2canvasSource = readFileSync(html2canvasPath, 'utf-8');
21
+ if (html2canvasProSource === null) {
22
+ // Resolve the path to html2canvas-pro.js (UMD build)
23
+ // Note: We use the main entry point since the minified version isn't exported
24
+ const html2canvasProPath = require.resolve('html2canvas-pro');
25
+ html2canvasProSource = readFileSync(html2canvasProPath, 'utf-8');
23
26
  }
24
- return html2canvasSource;
27
+ return html2canvasProSource;
25
28
  }
26
29
  /**
27
30
  * Build a script that captures a screenshot using html2canvas.
@@ -13,6 +13,7 @@ function loadScript(name) {
13
13
  }
14
14
  // Load scripts once at module initialization
15
15
  export const SCRIPTS = {
16
+ resolveRef: loadScript('resolve-ref'),
16
17
  interact: loadScript('interact'),
17
18
  swipe: loadScript('swipe'),
18
19
  keyboard: loadScript('keyboard'),
@@ -22,6 +23,14 @@ export const SCRIPTS = {
22
23
  findElement: loadScript('find-element'),
23
24
  domSnapshot: loadScript('dom-snapshot'),
24
25
  };
26
+ /** Script ID used for resolve-ref in the script registry. */
27
+ export const RESOLVE_REF_SCRIPT_ID = '__mcp_resolve_ref__';
28
+ /**
29
+ * Get the resolve-ref script source code.
30
+ */
31
+ export function getResolveRefSource() {
32
+ return SCRIPTS.resolveRef;
33
+ }
25
34
  /**
26
35
  * Build a script invocation with parameters
27
36
  * The script should be an IIFE that accepts a params object
@@ -39,10 +48,8 @@ export function buildTypeScript(selector, text) {
39
48
  const selector = '${selector}';
40
49
  const text = '${escapedText}';
41
50
 
42
- const element = document.querySelector(selector);
43
- if (!element) {
44
- throw new Error('Element not found: ' + selector);
45
- }
51
+ var element = window.__MCP__.resolveRef(selector);
52
+ if (!element) throw new Error('Element not found: ' + selector);
46
53
 
47
54
  element.focus();
48
55
  element.value = text;
@@ -14,19 +14,9 @@
14
14
  (function(params) {
15
15
  const { action, selector, x, y, duration, scrollX, scrollY } = params;
16
16
 
17
- // Resolve element from CSS selector or ref ID (e.g., "ref=e3" or "e3")
18
17
  function resolveElement(selectorOrRef) {
19
18
  if (!selectorOrRef) return null;
20
- var refMatch = selectorOrRef.match(/^(?:ref=)?(e\d+)$/);
21
- if (refMatch) {
22
- var refId = refMatch[1],
23
- refMap = window.__MCP_ARIA_REFS_REVERSE__;
24
- if (!refMap) throw new Error('Ref "' + refId + '" not found. Run webview_dom_snapshot first to index elements.');
25
- var el = refMap.get(refId);
26
- if (!el) throw new Error('Ref "' + refId + '" not found. The DOM may have changed since the snapshot.');
27
- return el;
28
- }
29
- var el = document.querySelector(selectorOrRef);
19
+ var el = window.__MCP__.resolveRef(selectorOrRef);
30
20
  if (!el) throw new Error('Element not found: ' + selectorOrRef);
31
21
  return el;
32
22
  }
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Shared ref resolver - always available via window.__MCP__.resolveRef.
3
+ * Accepts a ref ID ("e3", "ref=e3", "[ref=e3]") or CSS selector.
4
+ * Returns the DOM element, or null if not found.
5
+ *
6
+ * Reads window.__MCP__.reverseRefs dynamically at call time so it always
7
+ * uses the latest snapshot's data.
8
+ */
9
+ (function() {
10
+ window.__MCP__ = window.__MCP__ || {};
11
+ window.__MCP__.resolveRef = function(selectorOrRef) {
12
+ if (!selectorOrRef) return null;
13
+ var refMatch = selectorOrRef.match(/^\[?(?:ref=)?(e\d+)\]?$/);
14
+ if (refMatch) {
15
+ var reverseRefs = window.__MCP__.reverseRefs;
16
+ if (!reverseRefs) {
17
+ throw new Error('Ref IDs require a snapshot. Run webview_dom_snapshot first to index elements.');
18
+ }
19
+ return reverseRefs.get(refMatch[1]) || null;
20
+ }
21
+ return document.querySelector(selectorOrRef);
22
+ };
23
+ })();
@@ -10,17 +10,9 @@
10
10
  const { type, value, timeout } = params;
11
11
  const startTime = Date.now();
12
12
 
13
- // Resolve element from CSS selector or ref ID (e.g., "ref=e3" or "e3")
14
13
  function resolveElement(selectorOrRef) {
15
14
  if (!selectorOrRef) return null;
16
- var refMatch = selectorOrRef.match(/^(?:ref=)?(e\d+)$/);
17
- if (refMatch) {
18
- var refId = refMatch[1],
19
- refMap = window.__MCP_ARIA_REFS_REVERSE__;
20
- if (!refMap) return null; // For wait-for, return null instead of throwing
21
- return refMap.get(refId) || null;
22
- }
23
- return document.querySelector(selectorOrRef);
15
+ return window.__MCP__.resolveRef(selectorOrRef);
24
16
  }
25
17
 
26
18
  return new Promise((resolve, reject) => {
@@ -4,6 +4,7 @@ import { hasActiveSession, getDefaultSession, resolveTargetApp } from './session
4
4
  import { createMcpLogger } from '../logger.js';
5
5
  import { buildScreenshotScript, buildScreenshotCaptureScript, getHtml2CanvasSource, HTML2CANVAS_SCRIPT_ID, } from './scripts/html2canvas-loader.js';
6
6
  import { registerScript, isScriptRegistered } from './script-manager.js';
7
+ import { getResolveRefSource, RESOLVE_REF_SCRIPT_ID } from './scripts/index.js';
7
8
  /**
8
9
  * WebView Executor - Native IPC-based JavaScript execution
9
10
  *
@@ -44,6 +45,8 @@ export async function ensureReady() {
44
45
  if (session) {
45
46
  await connectPlugin(session.host, session.port);
46
47
  }
48
+ // Register the resolve-ref helper so ref-based selectors work in all tools
49
+ await registerScript(RESOLVE_REF_SCRIPT_ID, 'inline', getResolveRefSource());
47
50
  isInitialized = true;
48
51
  }
49
52
  /**
@@ -48,27 +48,123 @@ First, verify this is a Tauri v2 project:
48
48
  Examine these files and report what needs to be added or updated:
49
49
 
50
50
  ### 1. Rust Plugin Dependency
51
- Check \`src-tauri/Cargo.toml\` for \`tauri-plugin-mcp-bridge\`. If missing or outdated, note that it needs:
51
+
52
+ Check \`src-tauri/Cargo.toml\` for \`tauri-plugin-mcp-bridge\`.
53
+ It should be an **optional** dependency behind a Cargo feature
54
+ so that it is completely excluded from production builds:
55
+
52
56
  \`\`\`toml
53
57
  [dependencies]
54
- tauri-plugin-mcp-bridge = "${PLUGIN_VERSION_CARGO}"
58
+ tauri-plugin-mcp-bridge = { version = "${PLUGIN_VERSION_CARGO}", optional = true }
59
+ \`\`\`
60
+
61
+ Under \`[features]\`, add a feature that enables it:
62
+
63
+ \`\`\`toml
64
+ [features]
65
+ mcp-bridge = ["dep:tauri-plugin-mcp-bridge"]
55
66
  \`\`\`
56
67
 
57
68
  ### 2. Plugin Registration
58
- Check \`src-tauri/src/lib.rs\` or \`src-tauri/src/main.rs\` for plugin registration. It should have:
69
+
70
+ Check \`src-tauri/src/lib.rs\` or \`src-tauri/src/main.rs\` for plugin
71
+ registration. It should be gated behind the \`mcp-bridge\` feature flag:
72
+
59
73
  \`\`\`rust
60
- #[cfg(debug_assertions)]
74
+ #[cfg(all(feature = "mcp-bridge", debug_assertions))]
61
75
  {
62
76
  builder = builder.plugin(tauri_plugin_mcp_bridge::init());
63
77
  }
64
78
  \`\`\`
65
79
 
66
80
  ### 3. Global Tauri Setting
81
+
67
82
  Check \`src-tauri/tauri.conf.json\` for \`withGlobalTauri: true\` under the \`app\` section.
68
83
  **This is required** - without it, the MCP bridge cannot communicate with the webview.
69
84
 
70
- ### 4. Plugin Permissions
71
- Check \`src-tauri/capabilities/default.json\` (or similar) for \`"mcp-bridge:default"\` permission.
85
+ This setting should only be enabled for development. If the project
86
+ uses a \`tauri.dev.conf.json\` overlay (applied only during
87
+ \`cargo tauri dev\`), prefer placing it there:
88
+
89
+ \`\`\`json
90
+ {
91
+ "app": {
92
+ "withGlobalTauri": true
93
+ }
94
+ }
95
+ \`\`\`
96
+
97
+ ### 4. Plugin Capability (Conditional via build.rs)
98
+
99
+ The \`mcp-bridge:default\` permission must **not** be added to
100
+ \`src-tauri/capabilities/default.json\`. Instead, it should be
101
+ conditionally generated by the build script so that it only exists
102
+ when the \`mcp-bridge\` feature is active.
103
+
104
+ Check \`src-tauri/build.rs\` and update it to conditionally write
105
+ (or remove) a separate capability file before
106
+ \`tauri_build::build()\` runs. Tauri auto-discovers all \`.json\`
107
+ files in \`capabilities/\`, so this ensures the permission is only
108
+ present when the feature is enabled:
109
+
110
+ \`\`\`rust
111
+ fn main() {
112
+ let mcp_cap_path = std::path::Path::new("capabilities/mcp-bridge.json");
113
+ #[cfg(all(feature = "mcp-bridge", debug_assertions))]
114
+ {
115
+ let cap = r#"{
116
+ "identifier": "mcp-bridge",
117
+ "description": "enables MCP bridge for development",
118
+ "windows": [
119
+ "main"
120
+ ],
121
+ "permissions": [
122
+ "mcp-bridge:default"
123
+ ]
124
+ }"#;
125
+ std::fs::write(mcp_cap_path, cap)
126
+ .expect("failed to write mcp-bridge capability");
127
+ }
128
+ #[cfg(not(all(feature = "mcp-bridge", debug_assertions)))]
129
+ {
130
+ let _ = std::fs::remove_file(mcp_cap_path);
131
+ }
132
+
133
+ tauri_build::build()
134
+ }
135
+ \`\`\`
136
+
137
+ If \`build.rs\` already has other logic, integrate the conditional
138
+ block before the \`tauri_build::build()\` call.
139
+
140
+ ### 5. Gitignore the Generated Capability File
141
+
142
+ Since \`capabilities/mcp-bridge.json\` is generated at build time, add it to \`src-tauri/.gitignore\`:
143
+
144
+ \`\`\`gitignore
145
+ /capabilities/mcp-bridge.json
146
+ \`\`\`
147
+
148
+ ### 6. Dev Scripts (package.json)
149
+
150
+ If the project uses npm scripts to run \`tauri dev\`, add
151
+ \`--features mcp-bridge\` to the dev scripts so the feature is
152
+ automatically enabled. For example:
153
+
154
+ \`\`\`json
155
+ {
156
+ "scripts": {
157
+ "dev": "tauri dev --features mcp-bridge",
158
+ "dev:ios": "tauri ios dev --features mcp-bridge",
159
+ "dev:android": "tauri android dev --features mcp-bridge"
160
+ }
161
+ }
162
+ \`\`\`
163
+
164
+ Do **not** add \`--features mcp-bridge\` to release-profile dev
165
+ scripts (e.g. those using \`--release\`), as \`debug_assertions\`
166
+ is false in release builds and the guard will exclude the plugin
167
+ anyway.
72
168
 
73
169
  ## Response Format
74
170
 
@@ -83,13 +179,19 @@ Only after the user says yes should you make any modifications.
83
179
  ## After Setup
84
180
 
85
181
  Once changes are approved and made:
86
- 1. Run the Tauri app in development mode (\`cargo tauri dev\`)
182
+ 1. Run the Tauri app in development mode if npm scripts were
183
+ updated, use \`npm run dev\`. Otherwise use
184
+ \`cargo tauri dev --features mcp-bridge\` directly.
87
185
  2. Use \`driver_session\` with action "start" to connect
88
186
  3. Use \`driver_session\` with action "status" to verify
89
187
 
90
188
  ## Notes
91
189
 
92
- - The plugin only runs in debug builds so it won't affect production
190
+ - The plugin is completely excluded from production builds both
191
+ \`cfg(feature = "mcp-bridge")\` and \`cfg(debug_assertions)\` must
192
+ be true, so even if the feature flag is accidentally enabled in a
193
+ release build, the plugin will not be included
194
+ - The \`mcp-bridge\` Cargo feature must be passed explicitly — either via npm dev scripts or \`cargo tauri dev --features mcp-bridge\`
93
195
  - The WebSocket server binds to \`0.0.0.0:9223\` by default
94
196
  - For localhost-only access, use \`Builder::new().bind_address("127.0.0.1").build()\`
95
197
  `;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hypothesi/tauri-mcp-server",
3
- "version": "0.8.0",
3
+ "version": "0.8.2",
4
4
  "mcpName": "io.github.hypothesi/mcp-server-tauri",
5
5
  "description": "A Model Context Protocol server for use with Tauri v2 applications",
6
6
  "type": "module",
@@ -49,13 +49,12 @@
49
49
  "@modelcontextprotocol/sdk": "0.6.1",
50
50
  "aria-api": "0.8.0",
51
51
  "execa": "9.6.0",
52
- "html2canvas": "1.4.1",
52
+ "html2canvas-pro": "1.6.6",
53
53
  "ws": "8.18.3",
54
54
  "zod": "3.25.76",
55
55
  "zod-to-json-schema": "3.25.0"
56
56
  },
57
57
  "devDependencies": {
58
- "@types/html2canvas": "0.5.35",
59
58
  "@types/node": "22.19.1",
60
59
  "@types/ws": "8.18.1",
61
60
  "esbuild": "0.25.12",