@hypothesi/tauri-mcp-server 0.8.1 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -127,43 +127,58 @@ export class PluginClient extends EventEmitter {
127
127
  }
128
128
  }
129
129
  /**
130
- * Send a command to the plugin and wait for response
130
+ * Send a command to the plugin and wait for response.
131
+ *
132
+ * Automatically retries on transient "not found" errors (e.g. window not
133
+ * yet registered after WebSocket connect) with exponential backoff.
131
134
  */
132
135
  async sendCommand(command, timeoutMs = 5000) {
133
- // If not connected, try to reconnect first
134
- if (!this._ws || this._ws.readyState !== WebSocket.OPEN) {
135
- try {
136
- await this.connect();
136
+ const maxRetries = 3;
137
+ const baseDelayMs = 100;
138
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
139
+ // If not connected, try to reconnect first
140
+ if (!this._ws || this._ws.readyState !== WebSocket.OPEN) {
141
+ try {
142
+ await this.connect();
143
+ }
144
+ catch {
145
+ throw new Error('Not connected to plugin and reconnection failed');
146
+ }
137
147
  }
138
- catch {
139
- throw new Error('Not connected to plugin and reconnection failed');
148
+ // Double-check connection after reconnect attempt
149
+ if (!this._ws || this._ws.readyState !== WebSocket.OPEN) {
150
+ throw new Error('Not connected to plugin');
140
151
  }
141
- }
142
- // Double-check connection after reconnect attempt
143
- if (!this._ws || this._ws.readyState !== WebSocket.OPEN) {
144
- throw new Error('Not connected to plugin');
145
- }
146
- // Generate unique ID for this request
147
- const id = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
148
- const commandWithId = { ...command, id };
149
- return new Promise((resolve, reject) => {
150
- // Set up timeout
151
- const timeout = setTimeout(() => {
152
- this._pendingRequests.delete(id);
153
- reject(new Error(`Request timeout after ${timeoutMs}ms`));
154
- }, timeoutMs);
155
- // Store pending request
156
- this._pendingRequests.set(id, { resolve, reject, timeout });
157
- // Send command
158
- // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
159
- this._ws.send(JSON.stringify(commandWithId), (error) => {
160
- if (error) {
161
- clearTimeout(timeout);
152
+ // Generate unique ID for this request
153
+ const id = `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
154
+ const commandWithId = { ...command, id };
155
+ const response = await new Promise((resolve, reject) => {
156
+ // Set up timeout
157
+ const timeout = setTimeout(() => {
162
158
  this._pendingRequests.delete(id);
163
- reject(error);
164
- }
159
+ reject(new Error(`Request timeout after ${timeoutMs}ms`));
160
+ }, timeoutMs);
161
+ // Store pending request
162
+ this._pendingRequests.set(id, { resolve, reject, timeout });
163
+ // Send command
164
+ // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
165
+ this._ws.send(JSON.stringify(commandWithId), (error) => {
166
+ if (error) {
167
+ clearTimeout(timeout);
168
+ this._pendingRequests.delete(id);
169
+ reject(error);
170
+ }
171
+ });
165
172
  });
166
- });
173
+ // Retry on "not found" errors (window not yet registered)
174
+ if (!response.success && response.error?.includes('not found') && attempt < maxRetries) {
175
+ await new Promise((r) => { setTimeout(r, baseDelayMs * Math.pow(2, attempt)); });
176
+ continue;
177
+ }
178
+ return response;
179
+ }
180
+ // Unreachable — loop always returns or throws — but satisfies TypeScript
181
+ throw new Error('Retry attempts exhausted');
167
182
  }
168
183
  /**
169
184
  * Check if connected
@@ -18,12 +18,13 @@
18
18
  *
19
19
  * @param {Object} params
20
20
  * @param {string} params.type - Snapshot type ('accessibility' or 'structure')
21
- * @param {string|null} params.selector - Optional CSS selector to scope snapshot
21
+ * @param {string|null} params.selector - Optional selector to scope snapshot (CSS, XPath, text, or ref ID)
22
+ * @param {string} params.strategy - Selector strategy: 'css', 'xpath', or 'text'
22
23
  */
23
24
  (function(params) {
24
25
  'use strict';
25
26
 
26
- const { type, selector } = params;
27
+ const { type, selector, strategy } = params;
27
28
 
28
29
  // ARIA states to include in snapshot (used by accessibility type)
29
30
  const ARIA_STATES = [
@@ -56,8 +57,9 @@
56
57
  return refMap.get(element);
57
58
  }
58
59
 
59
- window.__MCP_ARIA_REFS__ = refMap;
60
- window.__MCP_ARIA_REFS_REVERSE__ = reverseRefMap;
60
+ window.__MCP__ = window.__MCP__ || {};
61
+ window.__MCP__.refs = refMap;
62
+ window.__MCP__.reverseRefs = reverseRefMap;
61
63
 
62
64
  // ========================================================================
63
65
  // Visibility (using aria-api for correct aria-hidden inheritance)
@@ -444,18 +446,17 @@
444
446
 
445
447
  if (selector) {
446
448
  try {
447
- document.querySelector(selector);
449
+ var structureElements = window.__MCP__.resolveAll(selector, strategy);
448
450
  } catch (e) {
449
- return 'Error: Invalid CSS selector "' + selector + '": ' + e.message;
451
+ return 'Error: Invalid selector "' + selector + '" (strategy: ' + strategy + '): ' + e.message;
450
452
  }
451
453
 
452
- var structureElements = document.querySelectorAll(selector);
453
454
  if (structureElements.length === 0) {
454
- return 'Error: No elements found matching selector "' + selector + '"';
455
+ return 'Error: No elements found matching selector "' + selector + '" (strategy: ' + strategy + ')';
455
456
  }
456
457
 
457
- structureRoots = Array.from(structureElements);
458
- structureScopeInfo = '# Scoped to: ' + selector + '\n';
458
+ structureRoots = structureElements;
459
+ structureScopeInfo = '# Scoped to: ' + selector + (strategy !== 'css' ? ' (strategy: ' + strategy + ')' : '') + '\n';
459
460
  if (structureRoots.length > 1) structureScopeInfo += '# ' + structureRoots.length + ' elements matched\n';
460
461
  } else {
461
462
  structureRoots = [document.body];
@@ -497,18 +498,17 @@
497
498
 
498
499
  if (selector) {
499
500
  try {
500
- document.querySelector(selector);
501
+ var elements = window.__MCP__.resolveAll(selector, strategy);
501
502
  } catch (e) {
502
- return 'Error: Invalid CSS selector "' + selector + '": ' + e.message;
503
+ return 'Error: Invalid selector "' + selector + '" (strategy: ' + strategy + '): ' + e.message;
503
504
  }
504
505
 
505
- var elements = document.querySelectorAll(selector);
506
506
  if (elements.length === 0) {
507
- return 'Error: No elements found matching selector "' + selector + '"';
507
+ return 'Error: No elements found matching selector "' + selector + '" (strategy: ' + strategy + ')';
508
508
  }
509
509
 
510
- roots = Array.from(elements);
511
- scopeInfo = '# Scoped to: ' + selector + '\n';
510
+ roots = elements;
511
+ scopeInfo = '# Scoped to: ' + selector + (strategy !== 'css' ? ' (strategy: ' + strategy + ')' : '') + '\n';
512
512
  if (roots.length > 1) scopeInfo += '# ' + roots.length + ' elements matched\n';
513
513
  } else {
514
514
  roots = [document.body];
@@ -7,41 +7,8 @@
7
7
  */
8
8
  (function(params) {
9
9
  const { selector, strategy } = params;
10
- let element;
11
10
 
12
- // Check if it's a ref ID first (works with any strategy)
13
- const refMatch = selector.match(/^(?:ref=)?(e\d+)$/);
14
- if (refMatch) {
15
- const refId = refMatch[1],
16
- refMap = window.__MCP_ARIA_REFS_REVERSE__;
17
- if (refMap) {
18
- element = refMap.get(refId);
19
- }
20
- } else if (strategy === 'text') {
21
- // Find element containing text
22
- const xpath = "//*[contains(text(), '" + selector + "')]";
23
- const result = document.evaluate(
24
- xpath,
25
- document,
26
- null,
27
- XPathResult.FIRST_ORDERED_NODE_TYPE,
28
- null
29
- );
30
- element = result.singleNodeValue;
31
- } else if (strategy === 'xpath') {
32
- // XPath selector
33
- const result = document.evaluate(
34
- selector,
35
- document,
36
- null,
37
- XPathResult.FIRST_ORDERED_NODE_TYPE,
38
- null
39
- );
40
- element = result.singleNodeValue;
41
- } else {
42
- // CSS selector (default)
43
- element = document.querySelector(selector);
44
- }
11
+ var element = window.__MCP__.resolveRef(selector, strategy);
45
12
 
46
13
  if (element) {
47
14
  const outerHTML = element.outerHTML;
@@ -49,7 +16,10 @@
49
16
  const truncated = outerHTML.length > 5000
50
17
  ? outerHTML.substring(0, 5000) + '...'
51
18
  : outerHTML;
52
- return 'Found element: ' + truncated;
19
+ var msg = 'Found element: ' + truncated;
20
+ var count = window.__MCP__.countAll(selector, strategy);
21
+ if (count > 1) msg += '\n(+' + (count - 1) + ' more match' + (count - 1 === 1 ? '' : 'es') + ')';
22
+ return msg;
53
23
  }
54
24
 
55
25
  return 'Element not found';
@@ -2,29 +2,23 @@
2
2
  * Focus an element
3
3
  *
4
4
  * @param {Object} params
5
- * @param {string} params.selector - CSS selector or ref ID (e.g., "ref=e3") for element to focus
5
+ * @param {string} params.selector - CSS selector, XPath, text, or ref ID (e.g., "ref=e3") for element to focus
6
+ * @param {string} params.strategy - Selector strategy: 'css', 'xpath', or 'text'
6
7
  */
7
8
  (function(params) {
8
- const { selector } = params;
9
+ const { selector, strategy } = params;
9
10
 
10
- // Resolve element from CSS selector or ref ID (e.g., "ref=e3" or "e3")
11
11
  function resolveElement(selectorOrRef) {
12
12
  if (!selectorOrRef) return null;
13
- var refMatch = selectorOrRef.match(/^(?:ref=)?(e\d+)$/);
14
- if (refMatch) {
15
- var refId = refMatch[1],
16
- refMap = window.__MCP_ARIA_REFS_REVERSE__;
17
- if (!refMap) throw new Error('Ref "' + refId + '" not found. Run webview_dom_snapshot first to index elements.');
18
- var el = refMap.get(refId);
19
- if (!el) throw new Error('Ref "' + refId + '" not found. The DOM may have changed since the snapshot.');
20
- return el;
21
- }
22
- var el = document.querySelector(selectorOrRef);
13
+ var el = window.__MCP__.resolveRef(selectorOrRef, strategy);
23
14
  if (!el) throw new Error('Element not found: ' + selectorOrRef);
24
15
  return el;
25
16
  }
26
17
 
27
18
  const element = resolveElement(selector);
28
19
  element.focus();
29
- return `Focused element: ${selector}`;
20
+ var msg = 'Focused element: ' + selector;
21
+ var count = window.__MCP__.countAll(selector, strategy);
22
+ if (count > 1) msg += ' (+' + (count - 1) + ' more match' + (count - 1 === 1 ? '' : 'es') + ')';
23
+ return msg;
30
24
  })
@@ -2,46 +2,33 @@
2
2
  * Get computed CSS styles for elements
3
3
  *
4
4
  * @param {Object} params
5
- * @param {string} params.selector - CSS selector or ref ID (e.g., "ref=e3") for element(s)
5
+ * @param {string} params.selector - CSS selector, XPath, text, or ref ID (e.g., "ref=e3") for element(s)
6
+ * @param {string} params.strategy - Selector strategy: 'css', 'xpath', or 'text'
6
7
  * @param {string[]} params.properties - Specific CSS properties to retrieve
7
8
  * @param {boolean} params.multiple - Whether to get styles for all matching elements
8
9
  */
9
10
  (function(params) {
10
- const { selector, properties, multiple } = params;
11
+ const { selector, strategy, properties, multiple } = params;
11
12
 
12
- // Resolve element from CSS selector or ref ID (e.g., "ref=e3" or "e3")
13
- function resolveElement(selectorOrRef) {
14
- if (!selectorOrRef) return null;
15
- var refMatch = selectorOrRef.match(/^(?:ref=)?(e\d+)$/);
16
- if (refMatch) {
17
- var refId = refMatch[1],
18
- refMap = window.__MCP_ARIA_REFS_REVERSE__;
19
- if (!refMap) throw new Error('Ref "' + refId + '" not found. Run webview_dom_snapshot first to index elements.');
20
- var el = refMap.get(refId);
21
- if (!el) throw new Error('Ref "' + refId + '" not found. The DOM may have changed since the snapshot.');
22
- return el;
23
- }
24
- var el = document.querySelector(selectorOrRef);
25
- if (!el) throw new Error('Element not found: ' + selectorOrRef);
26
- return el;
27
- }
13
+ var elements;
28
14
 
29
- // Check if selector is a ref ID - if so, multiple doesn't apply
30
- const isRef = /^(?:ref=)?(e\d+)$/.test(selector);
31
- const elements = isRef
32
- ? [resolveElement(selector)]
33
- : (multiple ? Array.from(document.querySelectorAll(selector)) : [document.querySelector(selector)]);
15
+ if (multiple) {
16
+ elements = window.__MCP__.resolveAll(selector, strategy);
17
+ } else {
18
+ var el = window.__MCP__.resolveRef(selector, strategy);
19
+ elements = el ? [el] : [];
20
+ }
34
21
 
35
22
  if (!elements[0]) {
36
- throw new Error(`Element not found: ${selector}`);
23
+ throw new Error('Element not found: ' + selector);
37
24
  }
38
25
 
39
- const results = elements.map(element => {
26
+ const results = elements.map(function(element) {
40
27
  const styles = window.getComputedStyle(element);
41
28
 
42
29
  if (properties.length > 0) {
43
30
  const result = {};
44
- properties.forEach(prop => {
31
+ properties.forEach(function(prop) {
45
32
  result[prop] = styles.getPropertyValue(prop);
46
33
  });
47
34
  return result;
@@ -22,7 +22,11 @@ export function getHtml2CanvasSource() {
22
22
  // Resolve the path to html2canvas-pro.js (UMD build)
23
23
  // Note: We use the main entry point since the minified version isn't exported
24
24
  const html2canvasProPath = require.resolve('html2canvas-pro');
25
- html2canvasProSource = readFileSync(html2canvasProPath, 'utf-8');
25
+ html2canvasProSource = readFileSync(html2canvasProPath, 'utf-8')
26
+ // Strip sourceMappingURL to prevent the browser from trying to fetch the
27
+ // .map file relative to the page's base URL (which fails when the app is
28
+ // served under a sub-path like '/some/path/').
29
+ .replace(/\/\/[#@]\s*sourceMappingURL=.*/g, '');
26
30
  }
27
31
  return html2canvasProSource;
28
32
  }
@@ -13,6 +13,7 @@ function loadScript(name) {
13
13
  }
14
14
  // Load scripts once at module initialization
15
15
  export const SCRIPTS = {
16
+ resolveRef: loadScript('resolve-ref'),
16
17
  interact: loadScript('interact'),
17
18
  swipe: loadScript('swipe'),
18
19
  keyboard: loadScript('keyboard'),
@@ -22,6 +23,14 @@ export const SCRIPTS = {
22
23
  findElement: loadScript('find-element'),
23
24
  domSnapshot: loadScript('dom-snapshot'),
24
25
  };
26
+ /** Script ID used for resolve-ref in the script registry. */
27
+ export const RESOLVE_REF_SCRIPT_ID = '__mcp_resolve_ref__';
28
+ /**
29
+ * Get the resolve-ref script source code.
30
+ */
31
+ export function getResolveRefSource() {
32
+ return SCRIPTS.resolveRef;
33
+ }
25
34
  /**
26
35
  * Build a script invocation with parameters
27
36
  * The script should be an IIFE that accepts a params object
@@ -32,24 +41,28 @@ export function buildScript(script, params) {
32
41
  /**
33
42
  * Build a script for typing text (uses the keyboard script's typeText function)
34
43
  */
35
- export function buildTypeScript(selector, text) {
44
+ export function buildTypeScript(selector, text, strategy) {
36
45
  const escapedText = text.replace(/\\/g, '\\\\').replace(/'/g, "\\'");
46
+ const escapedSelector = selector.replace(/\\/g, '\\\\').replace(/'/g, "\\'");
47
+ const strat = strategy || 'css';
37
48
  return `
38
49
  (function() {
39
- const selector = '${selector}';
50
+ const selector = '${escapedSelector}';
51
+ const strategy = '${strat}';
40
52
  const text = '${escapedText}';
41
53
 
42
- const element = document.querySelector(selector);
43
- if (!element) {
44
- throw new Error('Element not found: ' + selector);
45
- }
54
+ var element = window.__MCP__.resolveRef(selector, strategy);
55
+ if (!element) throw new Error('Element not found: ' + selector);
46
56
 
47
57
  element.focus();
48
58
  element.value = text;
49
59
  element.dispatchEvent(new Event('input', { bubbles: true }));
50
60
  element.dispatchEvent(new Event('change', { bubbles: true }));
51
61
 
52
- return 'Typed "' + text + '" into ' + selector;
62
+ var msg = 'Typed "' + text + '" into ' + selector;
63
+ var count = window.__MCP__.countAll(selector, strategy);
64
+ if (count > 1) msg += ' (+' + (count - 1) + ' more match' + (count - 1 === 1 ? '' : 'es') + ')';
65
+ return msg;
53
66
  })()
54
67
  `;
55
68
  }
@@ -4,7 +4,8 @@
4
4
  *
5
5
  * @param {Object} params
6
6
  * @param {string} params.action - The action to perform
7
- * @param {string|null} params.selector - CSS selector or ref ID (e.g., "ref=e3") for the element
7
+ * @param {string|null} params.selector - CSS selector, XPath, text, or ref ID (e.g., "ref=e3") for the element
8
+ * @param {string} params.strategy - Selector strategy: 'css', 'xpath', or 'text'
8
9
  * @param {number|null} params.x - X coordinate
9
10
  * @param {number|null} params.y - Y coordinate
10
11
  * @param {number} params.duration - Duration for long-press
@@ -12,25 +13,22 @@
12
13
  * @param {number} params.scrollY - Vertical scroll amount
13
14
  */
14
15
  (function(params) {
15
- const { action, selector, x, y, duration, scrollX, scrollY } = params;
16
+ const { action, selector, strategy, x, y, duration, scrollX, scrollY } = params;
16
17
 
17
- // Resolve element from CSS selector or ref ID (e.g., "ref=e3" or "e3")
18
18
  function resolveElement(selectorOrRef) {
19
19
  if (!selectorOrRef) return null;
20
- var refMatch = selectorOrRef.match(/^(?:ref=)?(e\d+)$/);
21
- if (refMatch) {
22
- var refId = refMatch[1],
23
- refMap = window.__MCP_ARIA_REFS_REVERSE__;
24
- if (!refMap) throw new Error('Ref "' + refId + '" not found. Run webview_dom_snapshot first to index elements.');
25
- var el = refMap.get(refId);
26
- if (!el) throw new Error('Ref "' + refId + '" not found. The DOM may have changed since the snapshot.');
27
- return el;
28
- }
29
- var el = document.querySelector(selectorOrRef);
20
+ var el = window.__MCP__.resolveRef(selectorOrRef, strategy);
30
21
  if (!el) throw new Error('Element not found: ' + selectorOrRef);
31
22
  return el;
32
23
  }
33
24
 
25
+ function matchHint() {
26
+ if (!selector) return '';
27
+ var count = window.__MCP__.countAll(selector, strategy);
28
+ if (count > 1) return ' (+' + (count - 1) + ' more match' + (count - 1 === 1 ? '' : 'es') + ')';
29
+ return '';
30
+ }
31
+
34
32
  let element = null;
35
33
  let targetX, targetY;
36
34
 
@@ -70,7 +68,7 @@
70
68
  element.dispatchEvent(new MouseEvent('mouseup', eventOptions));
71
69
  element.dispatchEvent(new MouseEvent('click', eventOptions));
72
70
  }
73
- return `Clicked at (${targetX}, ${targetY})`;
71
+ return `Clicked at (${targetX}, ${targetY})` + matchHint();
74
72
  }
75
73
 
76
74
  if (action === 'double-click') {
@@ -83,7 +81,7 @@
83
81
  element.dispatchEvent(new MouseEvent('click', eventOptions));
84
82
  element.dispatchEvent(new MouseEvent('dblclick', eventOptions));
85
83
  }
86
- return `Double-clicked at (${targetX}, ${targetY})`;
84
+ return `Double-clicked at (${targetX}, ${targetY})` + matchHint();
87
85
  }
88
86
 
89
87
  if (action === 'long-press') {
@@ -93,7 +91,7 @@
93
91
  element.dispatchEvent(new MouseEvent('mouseup', eventOptions));
94
92
  }, duration);
95
93
  }
96
- return `Long-pressed at (${targetX}, ${targetY}) for ${duration}ms`;
94
+ return `Long-pressed at (${targetX}, ${targetY}) for ${duration}ms` + matchHint();
97
95
  }
98
96
 
99
97
  if (action === 'scroll') {
@@ -105,7 +103,7 @@
105
103
  scrollTarget.scrollLeft += scrollX;
106
104
  scrollTarget.scrollTop += scrollY;
107
105
  }
108
- return `Scrolled by (${scrollX}, ${scrollY}) pixels`;
106
+ return `Scrolled by (${scrollX}, ${scrollY}) pixels` + matchHint();
109
107
  }
110
108
  return 'No scroll performed (scrollX and scrollY are both 0)';
111
109
  }
@@ -0,0 +1,112 @@
1
+ /**
2
+ * Shared ref resolver - always available via window.__MCP__.resolveRef.
3
+ * Accepts a ref ID ("e3", "ref=e3", "[ref=e3]"), CSS selector, XPath, or text.
4
+ * Returns the DOM element, or null if not found.
5
+ *
6
+ * Reads window.__MCP__.reverseRefs dynamically at call time so it always
7
+ * uses the latest snapshot's data.
8
+ *
9
+ * Also provides:
10
+ * - resolveAll(selector, strategy) - returns an Array of matching elements
11
+ * - countAll(selector, strategy) - returns the total match count
12
+ */
13
+ (function() {
14
+ window.__MCP__ = window.__MCP__ || {};
15
+
16
+ var REF_PATTERN = /^\[?(?:ref=)?(e\d+)\]?$/;
17
+
18
+ function xpathForText(text) {
19
+ // Escape single quotes for XPath by splitting on ' and using concat()
20
+ if (text.indexOf("'") === -1) {
21
+ return "//*[contains(text(), '" + text + "')]";
22
+ }
23
+ var parts = text.split("'");
24
+ var expr = 'concat(' + parts.map(function(p, i) {
25
+ return (i > 0 ? ",\"'\",": '') + "'" + p + "'";
26
+ }).join('') + ')';
27
+ return '//*[contains(text(), ' + expr + ')]';
28
+ }
29
+
30
+ /**
31
+ * Resolve a single element by selector and strategy.
32
+ * @param {string} selectorOrRef - Selector, ref ID, XPath, or text
33
+ * @param {string} [strategy] - 'css' (default), 'xpath', or 'text'
34
+ * @returns {Element|null}
35
+ */
36
+ window.__MCP__.resolveRef = function(selectorOrRef, strategy) {
37
+ if (!selectorOrRef) return null;
38
+
39
+ // Ref IDs always take priority regardless of strategy
40
+ var refMatch = selectorOrRef.match(REF_PATTERN);
41
+ if (refMatch) {
42
+ var reverseRefs = window.__MCP__.reverseRefs;
43
+ if (!reverseRefs) {
44
+ throw new Error('Ref IDs require a snapshot. Run webview_dom_snapshot first to index elements.');
45
+ }
46
+ return reverseRefs.get(refMatch[1]) || null;
47
+ }
48
+
49
+ if (strategy === 'text') {
50
+ var xpath = xpathForText(selectorOrRef);
51
+ var result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
52
+ return result.singleNodeValue;
53
+ }
54
+
55
+ if (strategy === 'xpath') {
56
+ var result = document.evaluate(selectorOrRef, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
57
+ return result.singleNodeValue;
58
+ }
59
+
60
+ // Default: CSS selector
61
+ return document.querySelector(selectorOrRef);
62
+ };
63
+
64
+ /**
65
+ * Resolve all matching elements as an Array.
66
+ * @param {string} selector - Selector, XPath, or text
67
+ * @param {string} [strategy] - 'css' (default), 'xpath', or 'text'
68
+ * @returns {Element[]}
69
+ */
70
+ window.__MCP__.resolveAll = function(selector, strategy) {
71
+ if (!selector) return [];
72
+
73
+ // Ref IDs resolve to a single element
74
+ var refMatch = selector.match(REF_PATTERN);
75
+ if (refMatch) {
76
+ var el = window.__MCP__.resolveRef(selector);
77
+ return el ? [el] : [];
78
+ }
79
+
80
+ if (strategy === 'text') {
81
+ var xpath = xpathForText(selector);
82
+ var snapshot = document.evaluate(xpath, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
83
+ var results = [];
84
+ for (var i = 0; i < snapshot.snapshotLength; i++) {
85
+ results.push(snapshot.snapshotItem(i));
86
+ }
87
+ return results;
88
+ }
89
+
90
+ if (strategy === 'xpath') {
91
+ var snapshot = document.evaluate(selector, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
92
+ var results = [];
93
+ for (var i = 0; i < snapshot.snapshotLength; i++) {
94
+ results.push(snapshot.snapshotItem(i));
95
+ }
96
+ return results;
97
+ }
98
+
99
+ // Default: CSS
100
+ return Array.from(document.querySelectorAll(selector));
101
+ };
102
+
103
+ /**
104
+ * Count all matching elements.
105
+ * @param {string} selector - Selector, XPath, or text
106
+ * @param {string} [strategy] - 'css' (default), 'xpath', or 'text'
107
+ * @returns {number}
108
+ */
109
+ window.__MCP__.countAll = function(selector, strategy) {
110
+ return window.__MCP__.resolveAll(selector, strategy).length;
111
+ };
112
+ })();
@@ -4,42 +4,38 @@
4
4
  * @param {Object} params
5
5
  * @param {string} params.type - What to wait for: 'selector', 'text', 'ipc-event'
6
6
  * @param {string} params.value - Selector/ref ID, text, or event name to wait for
7
+ * @param {string} params.strategy - Selector strategy (applies when type is 'selector'): 'css', 'xpath', or 'text'
7
8
  * @param {number} params.timeout - Timeout in milliseconds
8
9
  */
9
10
  (async function(params) {
10
- const { type, value, timeout } = params;
11
+ const { type, value, strategy, timeout } = params;
11
12
  const startTime = Date.now();
12
13
 
13
- // Resolve element from CSS selector or ref ID (e.g., "ref=e3" or "e3")
14
14
  function resolveElement(selectorOrRef) {
15
15
  if (!selectorOrRef) return null;
16
- var refMatch = selectorOrRef.match(/^(?:ref=)?(e\d+)$/);
17
- if (refMatch) {
18
- var refId = refMatch[1],
19
- refMap = window.__MCP_ARIA_REFS_REVERSE__;
20
- if (!refMap) return null; // For wait-for, return null instead of throwing
21
- return refMap.get(refId) || null;
22
- }
23
- return document.querySelector(selectorOrRef);
16
+ return window.__MCP__.resolveRef(selectorOrRef, strategy);
24
17
  }
25
18
 
26
- return new Promise((resolve, reject) => {
19
+ return new Promise(function(resolve, reject) {
27
20
  function check() {
28
21
  if (Date.now() - startTime > timeout) {
29
- reject(new Error(`Timeout waiting for ${type}: ${value}`));
22
+ reject(new Error('Timeout waiting for ' + type + ': ' + value));
30
23
  return;
31
24
  }
32
25
 
33
26
  if (type === 'selector') {
34
- const element = resolveElement(value);
27
+ var element = resolveElement(value);
35
28
  if (element) {
36
- resolve(`Element found: ${value}`);
29
+ var msg = 'Element found: ' + value;
30
+ var count = window.__MCP__.countAll(value, strategy);
31
+ if (count > 1) msg += ' (+' + (count - 1) + ' more match' + (count - 1 === 1 ? '' : 'es') + ')';
32
+ resolve(msg);
37
33
  return;
38
34
  }
39
35
  } else if (type === 'text') {
40
- const found = document.body.innerText.includes(value);
36
+ var found = document.body.innerText.includes(value);
41
37
  if (found) {
42
- resolve(`Text found: ${value}`);
38
+ resolve('Text found: ' + value);
43
39
  return;
44
40
  }
45
41
  } else if (type === 'ipc-event') {
@@ -4,6 +4,7 @@ import { hasActiveSession, getDefaultSession, resolveTargetApp } from './session
4
4
  import { createMcpLogger } from '../logger.js';
5
5
  import { buildScreenshotScript, buildScreenshotCaptureScript, getHtml2CanvasSource, HTML2CANVAS_SCRIPT_ID, } from './scripts/html2canvas-loader.js';
6
6
  import { registerScript, isScriptRegistered } from './script-manager.js';
7
+ import { getResolveRefSource, RESOLVE_REF_SCRIPT_ID } from './scripts/index.js';
7
8
  /**
8
9
  * WebView Executor - Native IPC-based JavaScript execution
9
10
  *
@@ -44,6 +45,8 @@ export async function ensureReady() {
44
45
  if (session) {
45
46
  await connectPlugin(session.host, session.port);
46
47
  }
48
+ // Register the resolve-ref helper so ref-based selectors work in all tools
49
+ await registerScript(RESOLVE_REF_SCRIPT_ID, 'inline', getResolveRefSource());
47
50
  isInitialized = true;
48
51
  }
49
52
  /**
@@ -15,12 +15,22 @@ export const WindowTargetSchema = z.object({
15
15
  appIdentifier: z.union([z.string(), z.number()]).optional().describe('App port or bundle ID to target. Defaults to the only connected app or the default app if multiple are connected.'),
16
16
  });
17
17
  // ============================================================================
18
+ // Shared Selector Strategy
19
+ // ============================================================================
20
+ /**
21
+ * Reusable strategy field for tools that accept a selector.
22
+ * Defaults to 'css' for backward compatibility.
23
+ */
24
+ const selectorStrategyField = z.enum(['css', 'xpath', 'text']).default('css').describe('Selector strategy: "css" (default) for CSS selectors, "xpath" for XPath expressions, ' +
25
+ '"text" to find elements containing the given text. Ref IDs (e.g., "ref=e3") work with any strategy.');
26
+ // ============================================================================
18
27
  // Schemas
19
28
  // ============================================================================
20
29
  export const InteractSchema = WindowTargetSchema.extend({
21
30
  action: z.enum(['click', 'double-click', 'long-press', 'scroll', 'swipe', 'focus'])
22
31
  .describe('Type of interaction to perform'),
23
- selector: z.string().optional().describe('CSS selector for the element to interact with'),
32
+ selector: z.string().optional().describe('Element selector: CSS selector (default), XPath expression, text content, or ref ID (e.g., "ref=e3")'),
33
+ strategy: selectorStrategyField,
24
34
  x: z.number().optional().describe('X coordinate for direct coordinate interaction'),
25
35
  y: z.number().optional().describe('Y coordinate for direct coordinate interaction'),
26
36
  duration: z.number().optional()
@@ -42,7 +52,9 @@ export const ScreenshotSchema = WindowTargetSchema.extend({
42
52
  export const KeyboardSchema = WindowTargetSchema.extend({
43
53
  action: z.enum(['type', 'press', 'down', 'up'])
44
54
  .describe('Keyboard action type: "type" for typing text into an element, "press/down/up" for key events'),
45
- selector: z.string().optional().describe('CSS selector for element to type into (required for "type" action)'),
55
+ selector: z.string().optional().describe('Element selector for element to type into (required for "type" action): ' +
56
+ 'CSS selector (default), XPath, text content, or ref ID'),
57
+ strategy: selectorStrategyField,
46
58
  text: z.string().optional().describe('Text to type (required for "type" action)'),
47
59
  key: z.string().optional().describe('Key to press (required for "press/down/up" actions, e.g., "Enter", "a", "Escape")'),
48
60
  modifiers: z.array(z.enum(['Control', 'Alt', 'Shift', 'Meta'])).optional().describe('Modifier keys to hold'),
@@ -50,10 +62,12 @@ export const KeyboardSchema = WindowTargetSchema.extend({
50
62
  export const WaitForSchema = WindowTargetSchema.extend({
51
63
  type: z.enum(['selector', 'text', 'ipc-event']).describe('What to wait for'),
52
64
  value: z.string().describe('Selector, text content, or IPC event name to wait for'),
65
+ strategy: selectorStrategyField.describe('Selector strategy (applies when type is "selector"): "css" (default), "xpath", or "text".'),
53
66
  timeout: z.number().optional().default(5000).describe('Timeout in milliseconds (default: 5000ms)'),
54
67
  });
55
68
  export const GetStylesSchema = WindowTargetSchema.extend({
56
- selector: z.string().describe('CSS selector for element(s) to get styles from'),
69
+ selector: z.string().describe('Element selector: CSS selector (default), XPath expression, text content, or ref ID'),
70
+ strategy: selectorStrategyField,
57
71
  properties: z.array(z.string()).optional().describe('Specific CSS properties to retrieve. If omitted, returns all computed styles'),
58
72
  multiple: z.boolean().optional().default(false)
59
73
  .describe('Whether to get styles for all matching elements (true) or just the first (false)'),
@@ -68,8 +82,9 @@ export const FocusElementSchema = WindowTargetSchema.extend({
68
82
  selector: z.string().describe('CSS selector for element to focus'),
69
83
  });
70
84
  export const FindElementSchema = WindowTargetSchema.extend({
71
- selector: z.string(),
72
- strategy: z.enum(['css', 'xpath', 'text']).default('css'),
85
+ selector: z.string().describe('The selector to find: CSS selector (default), XPath expression, text content, or ref ID (e.g., "ref=e3"). ' +
86
+ 'Interpretation depends on strategy.'),
87
+ strategy: selectorStrategyField,
73
88
  });
74
89
  export const GetConsoleLogsSchema = WindowTargetSchema.extend({
75
90
  filter: z.string().optional().describe('Regex or keyword to filter logs'),
@@ -77,13 +92,14 @@ export const GetConsoleLogsSchema = WindowTargetSchema.extend({
77
92
  });
78
93
  export const DomSnapshotSchema = WindowTargetSchema.extend({
79
94
  type: z.enum(['accessibility', 'structure']).describe('Snapshot type'),
80
- selector: z.string().optional().describe('CSS selector to scope the snapshot. If omitted, snapshots entire document.'),
95
+ selector: z.string().optional().describe('Selector to scope the snapshot: CSS selector (default), XPath, text content, or ref ID. If omitted, snapshots entire document.'),
96
+ strategy: selectorStrategyField,
81
97
  });
82
98
  // ============================================================================
83
99
  // Implementation Functions
84
100
  // ============================================================================
85
101
  export async function interact(options) {
86
- const { action, selector, x, y, duration, scrollX, scrollY, fromX, fromY, toX, toY, windowId, appIdentifier } = options;
102
+ const { action, selector, strategy, x, y, duration, scrollX, scrollY, fromX, fromY, toX, toY, windowId, appIdentifier } = options;
87
103
  // Handle swipe action separately since it has different logic
88
104
  if (action === 'swipe') {
89
105
  return performSwipe({ fromX, fromY, toX, toY, duration, windowId, appIdentifier });
@@ -93,11 +109,12 @@ export async function interact(options) {
93
109
  if (!selector) {
94
110
  throw new Error('Focus action requires a selector');
95
111
  }
96
- return focusElement({ selector, windowId, appIdentifier });
112
+ return focusElement({ selector, strategy, windowId, appIdentifier });
97
113
  }
98
114
  const script = buildScript(SCRIPTS.interact, {
99
115
  action,
100
116
  selector: selector ?? null,
117
+ strategy: strategy ?? 'css',
101
118
  x: x ?? null,
102
119
  y: y ?? null,
103
120
  duration: duration ?? 500,
@@ -146,7 +163,7 @@ export async function screenshot(options = {}) {
146
163
  return result;
147
164
  }
148
165
  export async function keyboard(options) {
149
- const { action, selectorOrKey, textOrModifiers, modifiers, windowId, appIdentifier } = options;
166
+ const { action, selectorOrKey, strategy, textOrModifiers, modifiers, windowId, appIdentifier } = options;
150
167
  // Handle the different parameter combinations based on action
151
168
  if (action === 'type') {
152
169
  const selector = selectorOrKey;
@@ -154,7 +171,7 @@ export async function keyboard(options) {
154
171
  if (!selector || !text) {
155
172
  throw new Error('Type action requires both selector and text parameters');
156
173
  }
157
- const script = buildTypeScript(selector, text);
174
+ const script = buildTypeScript(selector, text, strategy);
158
175
  try {
159
176
  return await executeInWebview(script, windowId, appIdentifier);
160
177
  }
@@ -179,8 +196,8 @@ export async function keyboard(options) {
179
196
  }
180
197
  }
181
198
  export async function waitFor(options) {
182
- const { type, value, timeout = 5000, windowId, appIdentifier } = options;
183
- const script = buildScript(SCRIPTS.waitFor, { type, value, timeout });
199
+ const { type, value, strategy, timeout = 5000, windowId, appIdentifier } = options;
200
+ const script = buildScript(SCRIPTS.waitFor, { type, value, strategy: strategy ?? 'css', timeout });
184
201
  try {
185
202
  return await executeInWebview(script, windowId, appIdentifier);
186
203
  }
@@ -190,9 +207,10 @@ export async function waitFor(options) {
190
207
  }
191
208
  }
192
209
  export async function getStyles(options) {
193
- const { selector, properties, multiple = false, windowId, appIdentifier } = options;
210
+ const { selector, strategy, properties, multiple = false, windowId, appIdentifier } = options;
194
211
  const script = buildScript(SCRIPTS.getStyles, {
195
212
  selector,
213
+ strategy: strategy ?? 'css',
196
214
  properties: properties || [],
197
215
  multiple,
198
216
  });
@@ -232,8 +250,8 @@ export async function executeJavaScript(options) {
232
250
  }
233
251
  }
234
252
  export async function focusElement(options) {
235
- const { selector, windowId, appIdentifier } = options;
236
- const script = buildScript(SCRIPTS.focus, { selector });
253
+ const { selector, strategy, windowId, appIdentifier } = options;
254
+ const script = buildScript(SCRIPTS.focus, { selector, strategy: strategy ?? 'css' });
237
255
  try {
238
256
  return await executeInWebview(script, windowId, appIdentifier);
239
257
  }
@@ -274,13 +292,13 @@ export async function getConsoleLogs(options = {}) {
274
292
  * Uses aria-api for comprehensive, spec-compliant accessibility computation.
275
293
  */
276
294
  export async function domSnapshot(options) {
277
- const { type, selector, windowId, appIdentifier } = options;
295
+ const { type, selector, strategy, windowId, appIdentifier } = options;
278
296
  // Only load aria-api for accessibility snapshots
279
297
  if (type === 'accessibility') {
280
298
  await ensureAriaApiLoaded(windowId);
281
299
  }
282
300
  // Then execute the snapshot script
283
- const script = buildScript(SCRIPTS.domSnapshot, { type, selector: selector ?? null });
301
+ const script = buildScript(SCRIPTS.domSnapshot, { type, selector: selector ?? null, strategy: strategy ?? 'css' });
284
302
  try {
285
303
  return await executeInWebview(script, windowId, appIdentifier);
286
304
  }
@@ -169,6 +169,8 @@ export const TOOLS = [
169
169
  {
170
170
  name: 'webview_find_element',
171
171
  description: '[Tauri Apps Only] Find DOM elements in a running Tauri app\'s webview. ' +
172
+ 'Supports CSS selectors (default), XPath expressions, and text content matching via the strategy parameter. ' +
173
+ 'Returns the element\'s HTML. ' +
172
174
  'Requires active driver_session. ' +
173
175
  MULTI_APP_DESC + ' ' +
174
176
  'For browser pages or documentation sites, use Chrome DevTools MCP instead.',
@@ -212,6 +214,7 @@ export const TOOLS = [
212
214
  name: 'webview_interact',
213
215
  description: '[Tauri Apps Only] Click, scroll, swipe, focus, or perform gestures in a Tauri app webview. ' +
214
216
  'Supported actions: click, double-click, long-press, scroll, swipe, focus. ' +
217
+ 'Supports CSS selectors (default), XPath, and text content matching via the strategy parameter. ' +
215
218
  'Requires active driver_session. ' +
216
219
  'For browser interaction, use Chrome DevTools MCP instead.',
217
220
  category: TOOL_CATEGORIES.UI_AUTOMATION,
@@ -262,6 +265,8 @@ export const TOOLS = [
262
265
  {
263
266
  name: 'webview_keyboard',
264
267
  description: '[Tauri Apps Only] Type text or send keyboard events in a Tauri app. ' +
268
+ 'The selector parameter (for "type" action) supports CSS selectors (default), ' +
269
+ 'XPath, and text content matching via the strategy parameter. ' +
265
270
  'Requires active driver_session. ' +
266
271
  MULTI_APP_DESC + ' ' +
267
272
  'For browser keyboard input, use Chrome DevTools MCP instead.',
@@ -279,6 +284,7 @@ export const TOOLS = [
279
284
  return await keyboard({
280
285
  action: parsed.action,
281
286
  selectorOrKey: parsed.selector,
287
+ strategy: parsed.strategy,
282
288
  textOrModifiers: parsed.text,
283
289
  windowId: parsed.windowId,
284
290
  appIdentifier: parsed.appIdentifier,
@@ -296,6 +302,7 @@ export const TOOLS = [
296
302
  {
297
303
  name: 'webview_wait_for',
298
304
  description: '[Tauri Apps Only] Wait for elements, text, or IPC events in a Tauri app. ' +
305
+ 'When type is "selector", supports CSS (default), XPath, and text strategies via the strategy parameter. ' +
299
306
  'Requires active driver_session. ' +
300
307
  MULTI_APP_DESC + ' ' +
301
308
  'For browser waits, use Chrome DevTools MCP instead.',
@@ -311,6 +318,7 @@ export const TOOLS = [
311
318
  return await waitFor({
312
319
  type: parsed.type,
313
320
  value: parsed.value,
321
+ strategy: parsed.strategy,
314
322
  timeout: parsed.timeout,
315
323
  windowId: parsed.windowId,
316
324
  appIdentifier: parsed.appIdentifier,
@@ -320,6 +328,7 @@ export const TOOLS = [
320
328
  {
321
329
  name: 'webview_get_styles',
322
330
  description: '[Tauri Apps Only] Get computed CSS styles from elements in a Tauri app. ' +
331
+ 'Supports CSS selectors (default), XPath, and text content matching via the strategy parameter. ' +
323
332
  'Requires active driver_session. ' +
324
333
  MULTI_APP_DESC + ' ' +
325
334
  'For browser style inspection, use Chrome DevTools MCP instead.',
@@ -334,6 +343,7 @@ export const TOOLS = [
334
343
  const parsed = GetStylesSchema.parse(args);
335
344
  return await getStyles({
336
345
  selector: parsed.selector,
346
+ strategy: parsed.strategy,
337
347
  properties: parsed.properties,
338
348
  multiple: parsed.multiple,
339
349
  windowId: parsed.windowId,
@@ -378,6 +388,7 @@ export const TOOLS = [
378
388
  'with element tag names, IDs, CSS classes, and data-testid attributes (if present). ' +
379
389
  'Use this for understanding page layout, debugging CSS selectors, or locating elements by class/ID. ' +
380
390
  'Use the optional selector parameter to scope the snapshot to a subtree. ' +
391
+ 'The selector supports CSS (default), XPath, and text content matching via the strategy parameter. ' +
381
392
  'Requires active driver_session. ' +
382
393
  MULTI_APP_DESC,
383
394
  category: TOOL_CATEGORIES.UI_AUTOMATION,
@@ -392,6 +403,7 @@ export const TOOLS = [
392
403
  return await domSnapshot({
393
404
  type: parsed.type,
394
405
  selector: parsed.selector,
406
+ strategy: parsed.strategy,
395
407
  windowId: parsed.windowId,
396
408
  appIdentifier: parsed.appIdentifier,
397
409
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hypothesi/tauri-mcp-server",
3
- "version": "0.8.1",
3
+ "version": "0.8.3",
4
4
  "mcpName": "io.github.hypothesi/mcp-server-tauri",
5
5
  "description": "A Model Context Protocol server for use with Tauri v2 applications",
6
6
  "type": "module",