@hypothesi/tauri-mcp-server 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,12 +18,13 @@
18
18
  *
19
19
  * @param {Object} params
20
20
  * @param {string} params.type - Snapshot type ('accessibility' or 'structure')
21
- * @param {string|null} params.selector - Optional CSS selector to scope snapshot
21
+ * @param {string|null} params.selector - Optional selector to scope snapshot (CSS, XPath, text, or ref ID)
22
+ * @param {string} params.strategy - Selector strategy: 'css', 'xpath', or 'text'
22
23
  */
23
24
  (function(params) {
24
25
  'use strict';
25
26
 
26
- const { type, selector } = params;
27
+ const { type, selector, strategy } = params;
27
28
 
28
29
  // ARIA states to include in snapshot (used by accessibility type)
29
30
  const ARIA_STATES = [
@@ -445,18 +446,17 @@
445
446
 
446
447
  if (selector) {
447
448
  try {
448
- document.querySelector(selector);
449
+ var structureElements = window.__MCP__.resolveAll(selector, strategy);
449
450
  } catch (e) {
450
- return 'Error: Invalid CSS selector "' + selector + '": ' + e.message;
451
+ return 'Error: Invalid selector "' + selector + '" (strategy: ' + strategy + '): ' + e.message;
451
452
  }
452
453
 
453
- var structureElements = document.querySelectorAll(selector);
454
454
  if (structureElements.length === 0) {
455
- return 'Error: No elements found matching selector "' + selector + '"';
455
+ return 'Error: No elements found matching selector "' + selector + '" (strategy: ' + strategy + ')';
456
456
  }
457
457
 
458
- structureRoots = Array.from(structureElements);
459
- structureScopeInfo = '# Scoped to: ' + selector + '\n';
458
+ structureRoots = structureElements;
459
+ structureScopeInfo = '# Scoped to: ' + selector + (strategy !== 'css' ? ' (strategy: ' + strategy + ')' : '') + '\n';
460
460
  if (structureRoots.length > 1) structureScopeInfo += '# ' + structureRoots.length + ' elements matched\n';
461
461
  } else {
462
462
  structureRoots = [document.body];
@@ -498,18 +498,17 @@
498
498
 
499
499
  if (selector) {
500
500
  try {
501
- document.querySelector(selector);
501
+ var elements = window.__MCP__.resolveAll(selector, strategy);
502
502
  } catch (e) {
503
- return 'Error: Invalid CSS selector "' + selector + '": ' + e.message;
503
+ return 'Error: Invalid selector "' + selector + '" (strategy: ' + strategy + '): ' + e.message;
504
504
  }
505
505
 
506
- var elements = document.querySelectorAll(selector);
507
506
  if (elements.length === 0) {
508
- return 'Error: No elements found matching selector "' + selector + '"';
507
+ return 'Error: No elements found matching selector "' + selector + '" (strategy: ' + strategy + ')';
509
508
  }
510
509
 
511
- roots = Array.from(elements);
512
- scopeInfo = '# Scoped to: ' + selector + '\n';
510
+ roots = elements;
511
+ scopeInfo = '# Scoped to: ' + selector + (strategy !== 'css' ? ' (strategy: ' + strategy + ')' : '') + '\n';
513
512
  if (roots.length > 1) scopeInfo += '# ' + roots.length + ' elements matched\n';
514
513
  } else {
515
514
  roots = [document.body];
@@ -7,36 +7,8 @@
7
7
  */
8
8
  (function(params) {
9
9
  const { selector, strategy } = params;
10
- let element;
11
10
 
12
- // Check if it's a ref ID first (works with any strategy)
13
- if (/^\[?(?:ref=)?(e\d+)\]?$/.test(selector)) {
14
- element = window.__MCP__.resolveRef(selector);
15
- } else if (strategy === 'text') {
16
- // Find element containing text
17
- const xpath = "//*[contains(text(), '" + selector + "')]";
18
- const result = document.evaluate(
19
- xpath,
20
- document,
21
- null,
22
- XPathResult.FIRST_ORDERED_NODE_TYPE,
23
- null
24
- );
25
- element = result.singleNodeValue;
26
- } else if (strategy === 'xpath') {
27
- // XPath selector
28
- const result = document.evaluate(
29
- selector,
30
- document,
31
- null,
32
- XPathResult.FIRST_ORDERED_NODE_TYPE,
33
- null
34
- );
35
- element = result.singleNodeValue;
36
- } else {
37
- // CSS selector (default)
38
- element = window.__MCP__.resolveRef(selector);
39
- }
11
+ var element = window.__MCP__.resolveRef(selector, strategy);
40
12
 
41
13
  if (element) {
42
14
  const outerHTML = element.outerHTML;
@@ -44,7 +16,10 @@
44
16
  const truncated = outerHTML.length > 5000
45
17
  ? outerHTML.substring(0, 5000) + '...'
46
18
  : outerHTML;
47
- return 'Found element: ' + truncated;
19
+ var msg = 'Found element: ' + truncated;
20
+ var count = window.__MCP__.countAll(selector, strategy);
21
+ if (count > 1) msg += '\n(+' + (count - 1) + ' more match' + (count - 1 === 1 ? '' : 'es') + ')';
22
+ return msg;
48
23
  }
49
24
 
50
25
  return 'Element not found';
@@ -2,19 +2,23 @@
2
2
  * Focus an element
3
3
  *
4
4
  * @param {Object} params
5
- * @param {string} params.selector - CSS selector or ref ID (e.g., "ref=e3") for element to focus
5
+ * @param {string} params.selector - CSS selector, XPath, text, or ref ID (e.g., "ref=e3") for element to focus
6
+ * @param {string} params.strategy - Selector strategy: 'css', 'xpath', or 'text'
6
7
  */
7
8
  (function(params) {
8
- const { selector } = params;
9
+ const { selector, strategy } = params;
9
10
 
10
11
  function resolveElement(selectorOrRef) {
11
12
  if (!selectorOrRef) return null;
12
- var el = window.__MCP__.resolveRef(selectorOrRef);
13
+ var el = window.__MCP__.resolveRef(selectorOrRef, strategy);
13
14
  if (!el) throw new Error('Element not found: ' + selectorOrRef);
14
15
  return el;
15
16
  }
16
17
 
17
18
  const element = resolveElement(selector);
18
19
  element.focus();
19
- return `Focused element: ${selector}`;
20
+ var msg = 'Focused element: ' + selector;
21
+ var count = window.__MCP__.countAll(selector, strategy);
22
+ if (count > 1) msg += ' (+' + (count - 1) + ' more match' + (count - 1 === 1 ? '' : 'es') + ')';
23
+ return msg;
20
24
  })
@@ -2,36 +2,33 @@
2
2
  * Get computed CSS styles for elements
3
3
  *
4
4
  * @param {Object} params
5
- * @param {string} params.selector - CSS selector or ref ID (e.g., "ref=e3") for element(s)
5
+ * @param {string} params.selector - CSS selector, XPath, text, or ref ID (e.g., "ref=e3") for element(s)
6
+ * @param {string} params.strategy - Selector strategy: 'css', 'xpath', or 'text'
6
7
  * @param {string[]} params.properties - Specific CSS properties to retrieve
7
8
  * @param {boolean} params.multiple - Whether to get styles for all matching elements
8
9
  */
9
10
  (function(params) {
10
- const { selector, properties, multiple } = params;
11
+ const { selector, strategy, properties, multiple } = params;
11
12
 
12
- function resolveElement(selectorOrRef) {
13
- if (!selectorOrRef) return null;
14
- var el = window.__MCP__.resolveRef(selectorOrRef);
15
- if (!el) throw new Error('Element not found: ' + selectorOrRef);
16
- return el;
17
- }
13
+ var elements;
18
14
 
19
- // Check if selector is a ref ID - if so, multiple doesn't apply
20
- const isRef = /^\[?(?:ref=)?(e\d+)\]?$/.test(selector);
21
- const elements = isRef
22
- ? [resolveElement(selector)]
23
- : (multiple ? Array.from(document.querySelectorAll(selector)) : [document.querySelector(selector)]);
15
+ if (multiple) {
16
+ elements = window.__MCP__.resolveAll(selector, strategy);
17
+ } else {
18
+ var el = window.__MCP__.resolveRef(selector, strategy);
19
+ elements = el ? [el] : [];
20
+ }
24
21
 
25
22
  if (!elements[0]) {
26
- throw new Error(`Element not found: ${selector}`);
23
+ throw new Error('Element not found: ' + selector);
27
24
  }
28
25
 
29
- const results = elements.map(element => {
26
+ const results = elements.map(function(element) {
30
27
  const styles = window.getComputedStyle(element);
31
28
 
32
29
  if (properties.length > 0) {
33
30
  const result = {};
34
- properties.forEach(prop => {
31
+ properties.forEach(function(prop) {
35
32
  result[prop] = styles.getPropertyValue(prop);
36
33
  });
37
34
  return result;
@@ -22,7 +22,11 @@ export function getHtml2CanvasSource() {
22
22
  // Resolve the path to html2canvas-pro.js (UMD build)
23
23
  // Note: We use the main entry point since the minified version isn't exported
24
24
  const html2canvasProPath = require.resolve('html2canvas-pro');
25
- html2canvasProSource = readFileSync(html2canvasProPath, 'utf-8');
25
+ html2canvasProSource = readFileSync(html2canvasProPath, 'utf-8')
26
+ // Strip sourceMappingURL to prevent the browser from trying to fetch the
27
+ // .map file relative to the page's base URL (which fails when the app is
28
+ // served under a sub-path like '/some/path/').
29
+ .replace(/\/\/[#@]\s*sourceMappingURL=.*/g, '');
26
30
  }
27
31
  return html2canvasProSource;
28
32
  }
@@ -41,14 +41,17 @@ export function buildScript(script, params) {
41
41
  /**
42
42
  * Build a script for typing text (uses the keyboard script's typeText function)
43
43
  */
44
- export function buildTypeScript(selector, text) {
44
+ export function buildTypeScript(selector, text, strategy) {
45
45
  const escapedText = text.replace(/\\/g, '\\\\').replace(/'/g, "\\'");
46
+ const escapedSelector = selector.replace(/\\/g, '\\\\').replace(/'/g, "\\'");
47
+ const strat = strategy || 'css';
46
48
  return `
47
49
  (function() {
48
- const selector = '${selector}';
50
+ const selector = '${escapedSelector}';
51
+ const strategy = '${strat}';
49
52
  const text = '${escapedText}';
50
53
 
51
- var element = window.__MCP__.resolveRef(selector);
54
+ var element = window.__MCP__.resolveRef(selector, strategy);
52
55
  if (!element) throw new Error('Element not found: ' + selector);
53
56
 
54
57
  element.focus();
@@ -56,7 +59,10 @@ export function buildTypeScript(selector, text) {
56
59
  element.dispatchEvent(new Event('input', { bubbles: true }));
57
60
  element.dispatchEvent(new Event('change', { bubbles: true }));
58
61
 
59
- return 'Typed "' + text + '" into ' + selector;
62
+ var msg = 'Typed "' + text + '" into ' + selector;
63
+ var count = window.__MCP__.countAll(selector, strategy);
64
+ if (count > 1) msg += ' (+' + (count - 1) + ' more match' + (count - 1 === 1 ? '' : 'es') + ')';
65
+ return msg;
60
66
  })()
61
67
  `;
62
68
  }
@@ -4,7 +4,8 @@
4
4
  *
5
5
  * @param {Object} params
6
6
  * @param {string} params.action - The action to perform
7
- * @param {string|null} params.selector - CSS selector or ref ID (e.g., "ref=e3") for the element
7
+ * @param {string|null} params.selector - CSS selector, XPath, text, or ref ID (e.g., "ref=e3") for the element
8
+ * @param {string} params.strategy - Selector strategy: 'css', 'xpath', or 'text'
8
9
  * @param {number|null} params.x - X coordinate
9
10
  * @param {number|null} params.y - Y coordinate
10
11
  * @param {number} params.duration - Duration for long-press
@@ -12,15 +13,22 @@
12
13
  * @param {number} params.scrollY - Vertical scroll amount
13
14
  */
14
15
  (function(params) {
15
- const { action, selector, x, y, duration, scrollX, scrollY } = params;
16
+ const { action, selector, strategy, x, y, duration, scrollX, scrollY } = params;
16
17
 
17
18
  function resolveElement(selectorOrRef) {
18
19
  if (!selectorOrRef) return null;
19
- var el = window.__MCP__.resolveRef(selectorOrRef);
20
+ var el = window.__MCP__.resolveRef(selectorOrRef, strategy);
20
21
  if (!el) throw new Error('Element not found: ' + selectorOrRef);
21
22
  return el;
22
23
  }
23
24
 
25
+ function matchHint() {
26
+ if (!selector) return '';
27
+ var count = window.__MCP__.countAll(selector, strategy);
28
+ if (count > 1) return ' (+' + (count - 1) + ' more match' + (count - 1 === 1 ? '' : 'es') + ')';
29
+ return '';
30
+ }
31
+
24
32
  let element = null;
25
33
  let targetX, targetY;
26
34
 
@@ -60,7 +68,7 @@
60
68
  element.dispatchEvent(new MouseEvent('mouseup', eventOptions));
61
69
  element.dispatchEvent(new MouseEvent('click', eventOptions));
62
70
  }
63
- return `Clicked at (${targetX}, ${targetY})`;
71
+ return `Clicked at (${targetX}, ${targetY})` + matchHint();
64
72
  }
65
73
 
66
74
  if (action === 'double-click') {
@@ -73,7 +81,7 @@
73
81
  element.dispatchEvent(new MouseEvent('click', eventOptions));
74
82
  element.dispatchEvent(new MouseEvent('dblclick', eventOptions));
75
83
  }
76
- return `Double-clicked at (${targetX}, ${targetY})`;
84
+ return `Double-clicked at (${targetX}, ${targetY})` + matchHint();
77
85
  }
78
86
 
79
87
  if (action === 'long-press') {
@@ -83,7 +91,7 @@
83
91
  element.dispatchEvent(new MouseEvent('mouseup', eventOptions));
84
92
  }, duration);
85
93
  }
86
- return `Long-pressed at (${targetX}, ${targetY}) for ${duration}ms`;
94
+ return `Long-pressed at (${targetX}, ${targetY}) for ${duration}ms` + matchHint();
87
95
  }
88
96
 
89
97
  if (action === 'scroll') {
@@ -95,7 +103,7 @@
95
103
  scrollTarget.scrollLeft += scrollX;
96
104
  scrollTarget.scrollTop += scrollY;
97
105
  }
98
- return `Scrolled by (${scrollX}, ${scrollY}) pixels`;
106
+ return `Scrolled by (${scrollX}, ${scrollY}) pixels` + matchHint();
99
107
  }
100
108
  return 'No scroll performed (scrollX and scrollY are both 0)';
101
109
  }
@@ -1,16 +1,43 @@
1
1
  /**
2
2
  * Shared ref resolver - always available via window.__MCP__.resolveRef.
3
- * Accepts a ref ID ("e3", "ref=e3", "[ref=e3]") or CSS selector.
3
+ * Accepts a ref ID ("e3", "ref=e3", "[ref=e3]"), CSS selector, XPath, or text.
4
4
  * Returns the DOM element, or null if not found.
5
5
  *
6
6
  * Reads window.__MCP__.reverseRefs dynamically at call time so it always
7
7
  * uses the latest snapshot's data.
8
+ *
9
+ * Also provides:
10
+ * - resolveAll(selector, strategy) - returns an Array of matching elements
11
+ * - countAll(selector, strategy) - returns the total match count
8
12
  */
9
13
  (function() {
10
14
  window.__MCP__ = window.__MCP__ || {};
11
- window.__MCP__.resolveRef = function(selectorOrRef) {
15
+
16
+ var REF_PATTERN = /^\[?(?:ref=)?(e\d+)\]?$/;
17
+
18
+ function xpathForText(text) {
19
+ // Escape single quotes for XPath by splitting on ' and using concat()
20
+ if (text.indexOf("'") === -1) {
21
+ return "//*[contains(text(), '" + text + "')]";
22
+ }
23
+ var parts = text.split("'");
24
+ var expr = 'concat(' + parts.map(function(p, i) {
25
+ return (i > 0 ? ",\"'\",": '') + "'" + p + "'";
26
+ }).join('') + ')';
27
+ return '//*[contains(text(), ' + expr + ')]';
28
+ }
29
+
30
+ /**
31
+ * Resolve a single element by selector and strategy.
32
+ * @param {string} selectorOrRef - Selector, ref ID, XPath, or text
33
+ * @param {string} [strategy] - 'css' (default), 'xpath', or 'text'
34
+ * @returns {Element|null}
35
+ */
36
+ window.__MCP__.resolveRef = function(selectorOrRef, strategy) {
12
37
  if (!selectorOrRef) return null;
13
- var refMatch = selectorOrRef.match(/^\[?(?:ref=)?(e\d+)\]?$/);
38
+
39
+ // Ref IDs always take priority regardless of strategy
40
+ var refMatch = selectorOrRef.match(REF_PATTERN);
14
41
  if (refMatch) {
15
42
  var reverseRefs = window.__MCP__.reverseRefs;
16
43
  if (!reverseRefs) {
@@ -18,6 +45,68 @@
18
45
  }
19
46
  return reverseRefs.get(refMatch[1]) || null;
20
47
  }
48
+
49
+ if (strategy === 'text') {
50
+ var xpath = xpathForText(selectorOrRef);
51
+ var result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
52
+ return result.singleNodeValue;
53
+ }
54
+
55
+ if (strategy === 'xpath') {
56
+ var result = document.evaluate(selectorOrRef, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
57
+ return result.singleNodeValue;
58
+ }
59
+
60
+ // Default: CSS selector
21
61
  return document.querySelector(selectorOrRef);
22
62
  };
63
+
64
+ /**
65
+ * Resolve all matching elements as an Array.
66
+ * @param {string} selector - Selector, XPath, or text
67
+ * @param {string} [strategy] - 'css' (default), 'xpath', or 'text'
68
+ * @returns {Element[]}
69
+ */
70
+ window.__MCP__.resolveAll = function(selector, strategy) {
71
+ if (!selector) return [];
72
+
73
+ // Ref IDs resolve to a single element
74
+ var refMatch = selector.match(REF_PATTERN);
75
+ if (refMatch) {
76
+ var el = window.__MCP__.resolveRef(selector);
77
+ return el ? [el] : [];
78
+ }
79
+
80
+ if (strategy === 'text') {
81
+ var xpath = xpathForText(selector);
82
+ var snapshot = document.evaluate(xpath, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
83
+ var results = [];
84
+ for (var i = 0; i < snapshot.snapshotLength; i++) {
85
+ results.push(snapshot.snapshotItem(i));
86
+ }
87
+ return results;
88
+ }
89
+
90
+ if (strategy === 'xpath') {
91
+ var snapshot = document.evaluate(selector, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
92
+ var results = [];
93
+ for (var i = 0; i < snapshot.snapshotLength; i++) {
94
+ results.push(snapshot.snapshotItem(i));
95
+ }
96
+ return results;
97
+ }
98
+
99
+ // Default: CSS
100
+ return Array.from(document.querySelectorAll(selector));
101
+ };
102
+
103
+ /**
104
+ * Count all matching elements.
105
+ * @param {string} selector - Selector, XPath, or text
106
+ * @param {string} [strategy] - 'css' (default), 'xpath', or 'text'
107
+ * @returns {number}
108
+ */
109
+ window.__MCP__.countAll = function(selector, strategy) {
110
+ return window.__MCP__.resolveAll(selector, strategy).length;
111
+ };
23
112
  })();
@@ -4,34 +4,38 @@
4
4
  * @param {Object} params
5
5
  * @param {string} params.type - What to wait for: 'selector', 'text', 'ipc-event'
6
6
  * @param {string} params.value - Selector/ref ID, text, or event name to wait for
7
+ * @param {string} params.strategy - Selector strategy (applies when type is 'selector'): 'css', 'xpath', or 'text'
7
8
  * @param {number} params.timeout - Timeout in milliseconds
8
9
  */
9
10
  (async function(params) {
10
- const { type, value, timeout } = params;
11
+ const { type, value, strategy, timeout } = params;
11
12
  const startTime = Date.now();
12
13
 
13
14
  function resolveElement(selectorOrRef) {
14
15
  if (!selectorOrRef) return null;
15
- return window.__MCP__.resolveRef(selectorOrRef);
16
+ return window.__MCP__.resolveRef(selectorOrRef, strategy);
16
17
  }
17
18
 
18
- return new Promise((resolve, reject) => {
19
+ return new Promise(function(resolve, reject) {
19
20
  function check() {
20
21
  if (Date.now() - startTime > timeout) {
21
- reject(new Error(`Timeout waiting for ${type}: ${value}`));
22
+ reject(new Error('Timeout waiting for ' + type + ': ' + value));
22
23
  return;
23
24
  }
24
25
 
25
26
  if (type === 'selector') {
26
- const element = resolveElement(value);
27
+ var element = resolveElement(value);
27
28
  if (element) {
28
- resolve(`Element found: ${value}`);
29
+ var msg = 'Element found: ' + value;
30
+ var count = window.__MCP__.countAll(value, strategy);
31
+ if (count > 1) msg += ' (+' + (count - 1) + ' more match' + (count - 1 === 1 ? '' : 'es') + ')';
32
+ resolve(msg);
29
33
  return;
30
34
  }
31
35
  } else if (type === 'text') {
32
- const found = document.body.innerText.includes(value);
36
+ var found = document.body.innerText.includes(value);
33
37
  if (found) {
34
- resolve(`Text found: ${value}`);
38
+ resolve('Text found: ' + value);
35
39
  return;
36
40
  }
37
41
  } else if (type === 'ipc-event') {
@@ -15,12 +15,22 @@ export const WindowTargetSchema = z.object({
15
15
  appIdentifier: z.union([z.string(), z.number()]).optional().describe('App port or bundle ID to target. Defaults to the only connected app or the default app if multiple are connected.'),
16
16
  });
17
17
  // ============================================================================
18
+ // Shared Selector Strategy
19
+ // ============================================================================
20
+ /**
21
+ * Reusable strategy field for tools that accept a selector.
22
+ * Defaults to 'css' for backward compatibility.
23
+ */
24
+ const selectorStrategyField = z.enum(['css', 'xpath', 'text']).default('css').describe('Selector strategy: "css" (default) for CSS selectors, "xpath" for XPath expressions, ' +
25
+ '"text" to find elements containing the given text. Ref IDs (e.g., "ref=e3") work with any strategy.');
26
+ // ============================================================================
18
27
  // Schemas
19
28
  // ============================================================================
20
29
  export const InteractSchema = WindowTargetSchema.extend({
21
30
  action: z.enum(['click', 'double-click', 'long-press', 'scroll', 'swipe', 'focus'])
22
31
  .describe('Type of interaction to perform'),
23
- selector: z.string().optional().describe('CSS selector for the element to interact with'),
32
+ selector: z.string().optional().describe('Element selector: CSS selector (default), XPath expression, text content, or ref ID (e.g., "ref=e3")'),
33
+ strategy: selectorStrategyField,
24
34
  x: z.number().optional().describe('X coordinate for direct coordinate interaction'),
25
35
  y: z.number().optional().describe('Y coordinate for direct coordinate interaction'),
26
36
  duration: z.number().optional()
@@ -42,7 +52,9 @@ export const ScreenshotSchema = WindowTargetSchema.extend({
42
52
  export const KeyboardSchema = WindowTargetSchema.extend({
43
53
  action: z.enum(['type', 'press', 'down', 'up'])
44
54
  .describe('Keyboard action type: "type" for typing text into an element, "press/down/up" for key events'),
45
- selector: z.string().optional().describe('CSS selector for element to type into (required for "type" action)'),
55
+ selector: z.string().optional().describe('Element selector for element to type into (required for "type" action): ' +
56
+ 'CSS selector (default), XPath, text content, or ref ID'),
57
+ strategy: selectorStrategyField,
46
58
  text: z.string().optional().describe('Text to type (required for "type" action)'),
47
59
  key: z.string().optional().describe('Key to press (required for "press/down/up" actions, e.g., "Enter", "a", "Escape")'),
48
60
  modifiers: z.array(z.enum(['Control', 'Alt', 'Shift', 'Meta'])).optional().describe('Modifier keys to hold'),
@@ -50,10 +62,12 @@ export const KeyboardSchema = WindowTargetSchema.extend({
50
62
  export const WaitForSchema = WindowTargetSchema.extend({
51
63
  type: z.enum(['selector', 'text', 'ipc-event']).describe('What to wait for'),
52
64
  value: z.string().describe('Selector, text content, or IPC event name to wait for'),
65
+ strategy: selectorStrategyField.describe('Selector strategy (applies when type is "selector"): "css" (default), "xpath", or "text".'),
53
66
  timeout: z.number().optional().default(5000).describe('Timeout in milliseconds (default: 5000ms)'),
54
67
  });
55
68
  export const GetStylesSchema = WindowTargetSchema.extend({
56
- selector: z.string().describe('CSS selector for element(s) to get styles from'),
69
+ selector: z.string().describe('Element selector: CSS selector (default), XPath expression, text content, or ref ID'),
70
+ strategy: selectorStrategyField,
57
71
  properties: z.array(z.string()).optional().describe('Specific CSS properties to retrieve. If omitted, returns all computed styles'),
58
72
  multiple: z.boolean().optional().default(false)
59
73
  .describe('Whether to get styles for all matching elements (true) or just the first (false)'),
@@ -68,8 +82,9 @@ export const FocusElementSchema = WindowTargetSchema.extend({
68
82
  selector: z.string().describe('CSS selector for element to focus'),
69
83
  });
70
84
  export const FindElementSchema = WindowTargetSchema.extend({
71
- selector: z.string(),
72
- strategy: z.enum(['css', 'xpath', 'text']).default('css'),
85
+ selector: z.string().describe('The selector to find: CSS selector (default), XPath expression, text content, or ref ID (e.g., "ref=e3"). ' +
86
+ 'Interpretation depends on strategy.'),
87
+ strategy: selectorStrategyField,
73
88
  });
74
89
  export const GetConsoleLogsSchema = WindowTargetSchema.extend({
75
90
  filter: z.string().optional().describe('Regex or keyword to filter logs'),
@@ -77,13 +92,14 @@ export const GetConsoleLogsSchema = WindowTargetSchema.extend({
77
92
  });
78
93
  export const DomSnapshotSchema = WindowTargetSchema.extend({
79
94
  type: z.enum(['accessibility', 'structure']).describe('Snapshot type'),
80
- selector: z.string().optional().describe('CSS selector to scope the snapshot. If omitted, snapshots entire document.'),
95
+ selector: z.string().optional().describe('Selector to scope the snapshot: CSS selector (default), XPath, text content, or ref ID. If omitted, snapshots entire document.'),
96
+ strategy: selectorStrategyField,
81
97
  });
82
98
  // ============================================================================
83
99
  // Implementation Functions
84
100
  // ============================================================================
85
101
  export async function interact(options) {
86
- const { action, selector, x, y, duration, scrollX, scrollY, fromX, fromY, toX, toY, windowId, appIdentifier } = options;
102
+ const { action, selector, strategy, x, y, duration, scrollX, scrollY, fromX, fromY, toX, toY, windowId, appIdentifier } = options;
87
103
  // Handle swipe action separately since it has different logic
88
104
  if (action === 'swipe') {
89
105
  return performSwipe({ fromX, fromY, toX, toY, duration, windowId, appIdentifier });
@@ -93,11 +109,12 @@ export async function interact(options) {
93
109
  if (!selector) {
94
110
  throw new Error('Focus action requires a selector');
95
111
  }
96
- return focusElement({ selector, windowId, appIdentifier });
112
+ return focusElement({ selector, strategy, windowId, appIdentifier });
97
113
  }
98
114
  const script = buildScript(SCRIPTS.interact, {
99
115
  action,
100
116
  selector: selector ?? null,
117
+ strategy: strategy ?? 'css',
101
118
  x: x ?? null,
102
119
  y: y ?? null,
103
120
  duration: duration ?? 500,
@@ -146,7 +163,7 @@ export async function screenshot(options = {}) {
146
163
  return result;
147
164
  }
148
165
  export async function keyboard(options) {
149
- const { action, selectorOrKey, textOrModifiers, modifiers, windowId, appIdentifier } = options;
166
+ const { action, selectorOrKey, strategy, textOrModifiers, modifiers, windowId, appIdentifier } = options;
150
167
  // Handle the different parameter combinations based on action
151
168
  if (action === 'type') {
152
169
  const selector = selectorOrKey;
@@ -154,7 +171,7 @@ export async function keyboard(options) {
154
171
  if (!selector || !text) {
155
172
  throw new Error('Type action requires both selector and text parameters');
156
173
  }
157
- const script = buildTypeScript(selector, text);
174
+ const script = buildTypeScript(selector, text, strategy);
158
175
  try {
159
176
  return await executeInWebview(script, windowId, appIdentifier);
160
177
  }
@@ -179,8 +196,8 @@ export async function keyboard(options) {
179
196
  }
180
197
  }
181
198
  export async function waitFor(options) {
182
- const { type, value, timeout = 5000, windowId, appIdentifier } = options;
183
- const script = buildScript(SCRIPTS.waitFor, { type, value, timeout });
199
+ const { type, value, strategy, timeout = 5000, windowId, appIdentifier } = options;
200
+ const script = buildScript(SCRIPTS.waitFor, { type, value, strategy: strategy ?? 'css', timeout });
184
201
  try {
185
202
  return await executeInWebview(script, windowId, appIdentifier);
186
203
  }
@@ -190,9 +207,10 @@ export async function waitFor(options) {
190
207
  }
191
208
  }
192
209
  export async function getStyles(options) {
193
- const { selector, properties, multiple = false, windowId, appIdentifier } = options;
210
+ const { selector, strategy, properties, multiple = false, windowId, appIdentifier } = options;
194
211
  const script = buildScript(SCRIPTS.getStyles, {
195
212
  selector,
213
+ strategy: strategy ?? 'css',
196
214
  properties: properties || [],
197
215
  multiple,
198
216
  });
@@ -232,8 +250,8 @@ export async function executeJavaScript(options) {
232
250
  }
233
251
  }
234
252
  export async function focusElement(options) {
235
- const { selector, windowId, appIdentifier } = options;
236
- const script = buildScript(SCRIPTS.focus, { selector });
253
+ const { selector, strategy, windowId, appIdentifier } = options;
254
+ const script = buildScript(SCRIPTS.focus, { selector, strategy: strategy ?? 'css' });
237
255
  try {
238
256
  return await executeInWebview(script, windowId, appIdentifier);
239
257
  }
@@ -274,13 +292,13 @@ export async function getConsoleLogs(options = {}) {
274
292
  * Uses aria-api for comprehensive, spec-compliant accessibility computation.
275
293
  */
276
294
  export async function domSnapshot(options) {
277
- const { type, selector, windowId, appIdentifier } = options;
295
+ const { type, selector, strategy, windowId, appIdentifier } = options;
278
296
  // Only load aria-api for accessibility snapshots
279
297
  if (type === 'accessibility') {
280
298
  await ensureAriaApiLoaded(windowId);
281
299
  }
282
300
  // Then execute the snapshot script
283
- const script = buildScript(SCRIPTS.domSnapshot, { type, selector: selector ?? null });
301
+ const script = buildScript(SCRIPTS.domSnapshot, { type, selector: selector ?? null, strategy: strategy ?? 'css' });
284
302
  try {
285
303
  return await executeInWebview(script, windowId, appIdentifier);
286
304
  }
@@ -48,123 +48,27 @@ First, verify this is a Tauri v2 project:
48
48
  Examine these files and report what needs to be added or updated:
49
49
 
50
50
  ### 1. Rust Plugin Dependency
51
-
52
- Check \`src-tauri/Cargo.toml\` for \`tauri-plugin-mcp-bridge\`.
53
- It should be an **optional** dependency behind a Cargo feature
54
- so that it is completely excluded from production builds:
55
-
51
+ Check \`src-tauri/Cargo.toml\` for \`tauri-plugin-mcp-bridge\`. If missing or outdated, note that it needs:
56
52
  \`\`\`toml
57
53
  [dependencies]
58
- tauri-plugin-mcp-bridge = { version = "${PLUGIN_VERSION_CARGO}", optional = true }
59
- \`\`\`
60
-
61
- Under \`[features]\`, add a feature that enables it:
62
-
63
- \`\`\`toml
64
- [features]
65
- mcp-bridge = ["dep:tauri-plugin-mcp-bridge"]
54
+ tauri-plugin-mcp-bridge = "${PLUGIN_VERSION_CARGO}"
66
55
  \`\`\`
67
56
 
68
57
  ### 2. Plugin Registration
69
-
70
- Check \`src-tauri/src/lib.rs\` or \`src-tauri/src/main.rs\` for plugin
71
- registration. It should be gated behind the \`mcp-bridge\` feature flag:
72
-
58
+ Check \`src-tauri/src/lib.rs\` or \`src-tauri/src/main.rs\` for plugin registration. It should have:
73
59
  \`\`\`rust
74
- #[cfg(all(feature = "mcp-bridge", debug_assertions))]
60
+ #[cfg(debug_assertions)]
75
61
  {
76
62
  builder = builder.plugin(tauri_plugin_mcp_bridge::init());
77
63
  }
78
64
  \`\`\`
79
65
 
80
66
  ### 3. Global Tauri Setting
81
-
82
67
  Check \`src-tauri/tauri.conf.json\` for \`withGlobalTauri: true\` under the \`app\` section.
83
68
  **This is required** - without it, the MCP bridge cannot communicate with the webview.
84
69
 
85
- This setting should only be enabled for development. If the project
86
- uses a \`tauri.dev.conf.json\` overlay (applied only during
87
- \`cargo tauri dev\`), prefer placing it there:
88
-
89
- \`\`\`json
90
- {
91
- "app": {
92
- "withGlobalTauri": true
93
- }
94
- }
95
- \`\`\`
96
-
97
- ### 4. Plugin Capability (Conditional via build.rs)
98
-
99
- The \`mcp-bridge:default\` permission must **not** be added to
100
- \`src-tauri/capabilities/default.json\`. Instead, it should be
101
- conditionally generated by the build script so that it only exists
102
- when the \`mcp-bridge\` feature is active.
103
-
104
- Check \`src-tauri/build.rs\` and update it to conditionally write
105
- (or remove) a separate capability file before
106
- \`tauri_build::build()\` runs. Tauri auto-discovers all \`.json\`
107
- files in \`capabilities/\`, so this ensures the permission is only
108
- present when the feature is enabled:
109
-
110
- \`\`\`rust
111
- fn main() {
112
- let mcp_cap_path = std::path::Path::new("capabilities/mcp-bridge.json");
113
- #[cfg(all(feature = "mcp-bridge", debug_assertions))]
114
- {
115
- let cap = r#"{
116
- "identifier": "mcp-bridge",
117
- "description": "enables MCP bridge for development",
118
- "windows": [
119
- "main"
120
- ],
121
- "permissions": [
122
- "mcp-bridge:default"
123
- ]
124
- }"#;
125
- std::fs::write(mcp_cap_path, cap)
126
- .expect("failed to write mcp-bridge capability");
127
- }
128
- #[cfg(not(all(feature = "mcp-bridge", debug_assertions)))]
129
- {
130
- let _ = std::fs::remove_file(mcp_cap_path);
131
- }
132
-
133
- tauri_build::build()
134
- }
135
- \`\`\`
136
-
137
- If \`build.rs\` already has other logic, integrate the conditional
138
- block before the \`tauri_build::build()\` call.
139
-
140
- ### 5. Gitignore the Generated Capability File
141
-
142
- Since \`capabilities/mcp-bridge.json\` is generated at build time, add it to \`src-tauri/.gitignore\`:
143
-
144
- \`\`\`gitignore
145
- /capabilities/mcp-bridge.json
146
- \`\`\`
147
-
148
- ### 6. Dev Scripts (package.json)
149
-
150
- If the project uses npm scripts to run \`tauri dev\`, add
151
- \`--features mcp-bridge\` to the dev scripts so the feature is
152
- automatically enabled. For example:
153
-
154
- \`\`\`json
155
- {
156
- "scripts": {
157
- "dev": "tauri dev --features mcp-bridge",
158
- "dev:ios": "tauri ios dev --features mcp-bridge",
159
- "dev:android": "tauri android dev --features mcp-bridge"
160
- }
161
- }
162
- \`\`\`
163
-
164
- Do **not** add \`--features mcp-bridge\` to release-profile dev
165
- scripts (e.g. those using \`--release\`), as \`debug_assertions\`
166
- is false in release builds and the guard will exclude the plugin
167
- anyway.
70
+ ### 4. Plugin Permissions
71
+ Check \`src-tauri/capabilities/default.json\` (or similar) for \`"mcp-bridge:default"\` permission.
168
72
 
169
73
  ## Response Format
170
74
 
@@ -179,19 +83,13 @@ Only after the user says yes should you make any modifications.
179
83
  ## After Setup
180
84
 
181
85
  Once changes are approved and made:
182
- 1. Run the Tauri app in development mode if npm scripts were
183
- updated, use \`npm run dev\`. Otherwise use
184
- \`cargo tauri dev --features mcp-bridge\` directly.
86
+ 1. Run the Tauri app in development mode (\`cargo tauri dev\`)
185
87
  2. Use \`driver_session\` with action "start" to connect
186
88
  3. Use \`driver_session\` with action "status" to verify
187
89
 
188
90
  ## Notes
189
91
 
190
- - The plugin is completely excluded from production builds both
191
- \`cfg(feature = "mcp-bridge")\` and \`cfg(debug_assertions)\` must
192
- be true, so even if the feature flag is accidentally enabled in a
193
- release build, the plugin will not be included
194
- - The \`mcp-bridge\` Cargo feature must be passed explicitly — either via npm dev scripts or \`cargo tauri dev --features mcp-bridge\`
92
+ - The plugin only runs in debug builds so it won't affect production
195
93
  - The WebSocket server binds to \`0.0.0.0:9223\` by default
196
94
  - For localhost-only access, use \`Builder::new().bind_address("127.0.0.1").build()\`
197
95
  `;
@@ -271,6 +169,8 @@ export const TOOLS = [
271
169
  {
272
170
  name: 'webview_find_element',
273
171
  description: '[Tauri Apps Only] Find DOM elements in a running Tauri app\'s webview. ' +
172
+ 'Supports CSS selectors (default), XPath expressions, and text content matching via the strategy parameter. ' +
173
+ 'Returns the element\'s HTML. ' +
274
174
  'Requires active driver_session. ' +
275
175
  MULTI_APP_DESC + ' ' +
276
176
  'For browser pages or documentation sites, use Chrome DevTools MCP instead.',
@@ -314,6 +214,7 @@ export const TOOLS = [
314
214
  name: 'webview_interact',
315
215
  description: '[Tauri Apps Only] Click, scroll, swipe, focus, or perform gestures in a Tauri app webview. ' +
316
216
  'Supported actions: click, double-click, long-press, scroll, swipe, focus. ' +
217
+ 'Supports CSS selectors (default), XPath, and text content matching via the strategy parameter. ' +
317
218
  'Requires active driver_session. ' +
318
219
  'For browser interaction, use Chrome DevTools MCP instead.',
319
220
  category: TOOL_CATEGORIES.UI_AUTOMATION,
@@ -364,6 +265,8 @@ export const TOOLS = [
364
265
  {
365
266
  name: 'webview_keyboard',
366
267
  description: '[Tauri Apps Only] Type text or send keyboard events in a Tauri app. ' +
268
+ 'The selector parameter (for "type" action) supports CSS selectors (default), ' +
269
+ 'XPath, and text content matching via the strategy parameter. ' +
367
270
  'Requires active driver_session. ' +
368
271
  MULTI_APP_DESC + ' ' +
369
272
  'For browser keyboard input, use Chrome DevTools MCP instead.',
@@ -381,6 +284,7 @@ export const TOOLS = [
381
284
  return await keyboard({
382
285
  action: parsed.action,
383
286
  selectorOrKey: parsed.selector,
287
+ strategy: parsed.strategy,
384
288
  textOrModifiers: parsed.text,
385
289
  windowId: parsed.windowId,
386
290
  appIdentifier: parsed.appIdentifier,
@@ -398,6 +302,7 @@ export const TOOLS = [
398
302
  {
399
303
  name: 'webview_wait_for',
400
304
  description: '[Tauri Apps Only] Wait for elements, text, or IPC events in a Tauri app. ' +
305
+ 'When type is "selector", supports CSS (default), XPath, and text strategies via the strategy parameter. ' +
401
306
  'Requires active driver_session. ' +
402
307
  MULTI_APP_DESC + ' ' +
403
308
  'For browser waits, use Chrome DevTools MCP instead.',
@@ -413,6 +318,7 @@ export const TOOLS = [
413
318
  return await waitFor({
414
319
  type: parsed.type,
415
320
  value: parsed.value,
321
+ strategy: parsed.strategy,
416
322
  timeout: parsed.timeout,
417
323
  windowId: parsed.windowId,
418
324
  appIdentifier: parsed.appIdentifier,
@@ -422,6 +328,7 @@ export const TOOLS = [
422
328
  {
423
329
  name: 'webview_get_styles',
424
330
  description: '[Tauri Apps Only] Get computed CSS styles from elements in a Tauri app. ' +
331
+ 'Supports CSS selectors (default), XPath, and text content matching via the strategy parameter. ' +
425
332
  'Requires active driver_session. ' +
426
333
  MULTI_APP_DESC + ' ' +
427
334
  'For browser style inspection, use Chrome DevTools MCP instead.',
@@ -436,6 +343,7 @@ export const TOOLS = [
436
343
  const parsed = GetStylesSchema.parse(args);
437
344
  return await getStyles({
438
345
  selector: parsed.selector,
346
+ strategy: parsed.strategy,
439
347
  properties: parsed.properties,
440
348
  multiple: parsed.multiple,
441
349
  windowId: parsed.windowId,
@@ -480,6 +388,7 @@ export const TOOLS = [
480
388
  'with element tag names, IDs, CSS classes, and data-testid attributes (if present). ' +
481
389
  'Use this for understanding page layout, debugging CSS selectors, or locating elements by class/ID. ' +
482
390
  'Use the optional selector parameter to scope the snapshot to a subtree. ' +
391
+ 'The selector supports CSS (default), XPath, and text content matching via the strategy parameter. ' +
483
392
  'Requires active driver_session. ' +
484
393
  MULTI_APP_DESC,
485
394
  category: TOOL_CATEGORIES.UI_AUTOMATION,
@@ -494,6 +403,7 @@ export const TOOLS = [
494
403
  return await domSnapshot({
495
404
  type: parsed.type,
496
405
  selector: parsed.selector,
406
+ strategy: parsed.strategy,
497
407
  windowId: parsed.windowId,
498
408
  appIdentifier: parsed.appIdentifier,
499
409
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hypothesi/tauri-mcp-server",
3
- "version": "0.8.2",
3
+ "version": "0.8.3",
4
4
  "mcpName": "io.github.hypothesi/mcp-server-tauri",
5
5
  "description": "A Model Context Protocol server for use with Tauri v2 applications",
6
6
  "type": "module",