npm - @hypothesi/tauri-mcp-server - Versions diffs - 0.8.2 → 0.9.0 - Mend

@hypothesi/tauri-mcp-server 0.8.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +4 -2
package/dist/driver/element-picker.js +272 -0
package/dist/driver/scripts/dom-snapshot.js +13 -14
package/dist/driver/scripts/element-picker.js +395 -0
package/dist/driver/scripts/find-element.js +5 -30
package/dist/driver/scripts/focus.js +8 -4
package/dist/driver/scripts/get-styles.js +13 -16
package/dist/driver/scripts/html2canvas-loader.js +5 -1
package/dist/driver/scripts/index.js +11 -4
package/dist/driver/scripts/interact.js +15 -7
package/dist/driver/scripts/resolve-ref.js +92 -3
package/dist/driver/scripts/wait-for.js +12 -8
package/dist/driver/webview-interactions.js +35 -17
package/dist/prompts-registry.js +46 -0
package/dist/tools-registry.js +71 -110
package/package.json +1 -1

package/dist/driver/scripts/resolve-ref.js CHANGED Viewed

@@ -1,16 +1,43 @@
 /**
  * Shared ref resolver - always available via window.__MCP__.resolveRef.
- * Accepts a ref ID ("e3", "ref=e3", "[ref=e3]") or CSS selector.
+ * Accepts a ref ID ("e3", "ref=e3", "[ref=e3]"), CSS selector, XPath, or text.
  * Returns the DOM element, or null if not found.
  *
  * Reads window.__MCP__.reverseRefs dynamically at call time so it always
  * uses the latest snapshot's data.
+ *
+ * Also provides:
+ * - resolveAll(selector, strategy) - returns an Array of matching elements
+ * - countAll(selector, strategy)   - returns the total match count
  */
 (function() {
    window.__MCP__ = window.__MCP__ || {};
-   window.__MCP__.resolveRef = function(selectorOrRef) {
+   var REF_PATTERN = /^\[?(?:ref=)?(e\d+)\]?$/;
+   function xpathForText(text) {
+      // Escape single quotes for XPath by splitting on ' and using concat()
+      if (text.indexOf("'") === -1) {
+         return "//*[contains(text(), '" + text + "')]";
+      }
+      var parts = text.split("'");
+      var expr = 'concat(' + parts.map(function(p, i) {
+         return (i > 0 ? ",\"'\",": '') + "'" + p + "'";
+      }).join('') + ')';
+      return '//*[contains(text(), ' + expr + ')]';
+   }
+   /**
+    * Resolve a single element by selector and strategy.
+    * @param {string} selectorOrRef - Selector, ref ID, XPath, or text
+    * @param {string} [strategy]    - 'css' (default), 'xpath', or 'text'
+    * @returns {Element|null}
+    */
+   window.__MCP__.resolveRef = function(selectorOrRef, strategy) {
       if (!selectorOrRef) return null;
-      var refMatch = selectorOrRef.match(/^\[?(?:ref=)?(e\d+)\]?$/);
+      // Ref IDs always take priority regardless of strategy
+      var refMatch = selectorOrRef.match(REF_PATTERN);
       if (refMatch) {
          var reverseRefs = window.__MCP__.reverseRefs;
          if (!reverseRefs) {
@@ -18,6 +45,68 @@
          }
          return reverseRefs.get(refMatch[1]) || null;
       }
+      if (strategy === 'text') {
+         var xpath = xpathForText(selectorOrRef);
+         var result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
+         return result.singleNodeValue;
+      }
+      if (strategy === 'xpath') {
+         var result = document.evaluate(selectorOrRef, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
+         return result.singleNodeValue;
+      }
+      // Default: CSS selector
       return document.querySelector(selectorOrRef);
    };
+   /**
+    * Resolve all matching elements as an Array.
+    * @param {string} selector  - Selector, XPath, or text
+    * @param {string} [strategy] - 'css' (default), 'xpath', or 'text'
+    * @returns {Element[]}
+    */
+   window.__MCP__.resolveAll = function(selector, strategy) {
+      if (!selector) return [];
+      // Ref IDs resolve to a single element
+      var refMatch = selector.match(REF_PATTERN);
+      if (refMatch) {
+         var el = window.__MCP__.resolveRef(selector);
+         return el ? [el] : [];
+      }
+      if (strategy === 'text') {
+         var xpath = xpathForText(selector);
+         var snapshot = document.evaluate(xpath, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
+         var results = [];
+         for (var i = 0; i < snapshot.snapshotLength; i++) {
+            results.push(snapshot.snapshotItem(i));
+         }
+         return results;
+      }
+      if (strategy === 'xpath') {
+         var snapshot = document.evaluate(selector, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
+         var results = [];
+         for (var i = 0; i < snapshot.snapshotLength; i++) {
+            results.push(snapshot.snapshotItem(i));
+         }
+         return results;
+      }
+      // Default: CSS
+      return Array.from(document.querySelectorAll(selector));
+   };
+   /**
+    * Count all matching elements.
+    * @param {string} selector  - Selector, XPath, or text
+    * @param {string} [strategy] - 'css' (default), 'xpath', or 'text'
+    * @returns {number}
+    */
+   window.__MCP__.countAll = function(selector, strategy) {
+      return window.__MCP__.resolveAll(selector, strategy).length;
+   };
 })();

package/dist/driver/scripts/wait-for.js CHANGED Viewed

@@ -4,34 +4,38 @@
  * @param {Object} params
  * @param {string} params.type - What to wait for: 'selector', 'text', 'ipc-event'
  * @param {string} params.value - Selector/ref ID, text, or event name to wait for
+ * @param {string} params.strategy - Selector strategy (applies when type is 'selector'): 'css', 'xpath', or 'text'
  * @param {number} params.timeout - Timeout in milliseconds
  */
 (async function(params) {
-   const { type, value, timeout } = params;
+   const { type, value, strategy, timeout } = params;
    const startTime = Date.now();
    function resolveElement(selectorOrRef) {
       if (!selectorOrRef) return null;
-      return window.__MCP__.resolveRef(selectorOrRef);
+      return window.__MCP__.resolveRef(selectorOrRef, strategy);
    }
-   return new Promise((resolve, reject) => {
+   return new Promise(function(resolve, reject) {
       function check() {
          if (Date.now() - startTime > timeout) {
-            reject(new Error(`Timeout waiting for ${type}: ${value}`));
+            reject(new Error('Timeout waiting for ' + type + ': ' + value));
             return;
          }
          if (type === 'selector') {
-            const element = resolveElement(value);
+            var element = resolveElement(value);
             if (element) {
-               resolve(`Element found: ${value}`);
+               var msg = 'Element found: ' + value;
+               var count = window.__MCP__.countAll(value, strategy);
+               if (count > 1) msg += ' (+' + (count - 1) + ' more match' + (count - 1 === 1 ? '' : 'es') + ')';
+               resolve(msg);
                return;
             }
          } else if (type === 'text') {
-            const found = document.body.innerText.includes(value);
+            var found = document.body.innerText.includes(value);
             if (found) {
-               resolve(`Text found: ${value}`);
+               resolve('Text found: ' + value);
                return;
             }
          } else if (type === 'ipc-event') {

package/dist/driver/webview-interactions.js CHANGED Viewed

@@ -15,12 +15,22 @@ export const WindowTargetSchema = z.object({
     appIdentifier: z.union([z.string(), z.number()]).optional().describe('App port or bundle ID to target. Defaults to the only connected app or the default app if multiple are connected.'),
 });
 // ============================================================================
+// Shared Selector Strategy
+// ============================================================================
+/**
+ * Reusable strategy field for tools that accept a selector.
+ * Defaults to 'css' for backward compatibility.
+ */
+const selectorStrategyField = z.enum(['css', 'xpath', 'text']).default('css').describe('Selector strategy: "css" (default) for CSS selectors, "xpath" for XPath expressions, ' +
+    '"text" to find elements containing the given text. Ref IDs (e.g., "ref=e3") work with any strategy.');
+// ============================================================================
 // Schemas
 // ============================================================================
 export const InteractSchema = WindowTargetSchema.extend({
     action: z.enum(['click', 'double-click', 'long-press', 'scroll', 'swipe', 'focus'])
         .describe('Type of interaction to perform'),
-    selector: z.string().optional().describe('CSS selector for the element to interact with'),
+    selector: z.string().optional().describe('Element selector: CSS selector (default), XPath expression, text content, or ref ID (e.g., "ref=e3")'),
+    strategy: selectorStrategyField,
     x: z.number().optional().describe('X coordinate for direct coordinate interaction'),
     y: z.number().optional().describe('Y coordinate for direct coordinate interaction'),
     duration: z.number().optional()
@@ -42,7 +52,9 @@ export const ScreenshotSchema = WindowTargetSchema.extend({
 export const KeyboardSchema = WindowTargetSchema.extend({
     action: z.enum(['type', 'press', 'down', 'up'])
         .describe('Keyboard action type: "type" for typing text into an element, "press/down/up" for key events'),
-    selector: z.string().optional().describe('CSS selector for element to type into (required for "type" action)'),
+    selector: z.string().optional().describe('Element selector for element to type into (required for "type" action): ' +
+        'CSS selector (default), XPath, text content, or ref ID'),
+    strategy: selectorStrategyField,
     text: z.string().optional().describe('Text to type (required for "type" action)'),
     key: z.string().optional().describe('Key to press (required for "press/down/up" actions, e.g., "Enter", "a", "Escape")'),
     modifiers: z.array(z.enum(['Control', 'Alt', 'Shift', 'Meta'])).optional().describe('Modifier keys to hold'),
@@ -50,10 +62,12 @@ export const KeyboardSchema = WindowTargetSchema.extend({
 export const WaitForSchema = WindowTargetSchema.extend({
     type: z.enum(['selector', 'text', 'ipc-event']).describe('What to wait for'),
     value: z.string().describe('Selector, text content, or IPC event name to wait for'),
+    strategy: selectorStrategyField.describe('Selector strategy (applies when type is "selector"): "css" (default), "xpath", or "text".'),
     timeout: z.number().optional().default(5000).describe('Timeout in milliseconds (default: 5000ms)'),
 });
 export const GetStylesSchema = WindowTargetSchema.extend({
-    selector: z.string().describe('CSS selector for element(s) to get styles from'),
+    selector: z.string().describe('Element selector: CSS selector (default), XPath expression, text content, or ref ID'),
+    strategy: selectorStrategyField,
     properties: z.array(z.string()).optional().describe('Specific CSS properties to retrieve. If omitted, returns all computed styles'),
     multiple: z.boolean().optional().default(false)
         .describe('Whether to get styles for all matching elements (true) or just the first (false)'),
@@ -68,8 +82,9 @@ export const FocusElementSchema = WindowTargetSchema.extend({
     selector: z.string().describe('CSS selector for element to focus'),
 });
 export const FindElementSchema = WindowTargetSchema.extend({
-    selector: z.string(),
-    strategy: z.enum(['css', 'xpath', 'text']).default('css'),
+    selector: z.string().describe('The selector to find: CSS selector (default), XPath expression, text content, or ref ID (e.g., "ref=e3"). ' +
+        'Interpretation depends on strategy.'),
+    strategy: selectorStrategyField,
 });
 export const GetConsoleLogsSchema = WindowTargetSchema.extend({
     filter: z.string().optional().describe('Regex or keyword to filter logs'),
@@ -77,13 +92,14 @@ export const GetConsoleLogsSchema = WindowTargetSchema.extend({
 });
 export const DomSnapshotSchema = WindowTargetSchema.extend({
     type: z.enum(['accessibility', 'structure']).describe('Snapshot type'),
-    selector: z.string().optional().describe('CSS selector to scope the snapshot. If omitted, snapshots entire document.'),
+    selector: z.string().optional().describe('Selector to scope the snapshot: CSS selector (default), XPath, text content, or ref ID. If omitted, snapshots entire document.'),
+    strategy: selectorStrategyField,
 });
 // ============================================================================
 // Implementation Functions
 // ============================================================================
 export async function interact(options) {
-    const { action, selector, x, y, duration, scrollX, scrollY, fromX, fromY, toX, toY, windowId, appIdentifier } = options;
+    const { action, selector, strategy, x, y, duration, scrollX, scrollY, fromX, fromY, toX, toY, windowId, appIdentifier } = options;
     // Handle swipe action separately since it has different logic
     if (action === 'swipe') {
         return performSwipe({ fromX, fromY, toX, toY, duration, windowId, appIdentifier });
@@ -93,11 +109,12 @@ export async function interact(options) {
         if (!selector) {
             throw new Error('Focus action requires a selector');
         }
-        return focusElement({ selector, windowId, appIdentifier });
+        return focusElement({ selector, strategy, windowId, appIdentifier });
     }
     const script = buildScript(SCRIPTS.interact, {
         action,
         selector: selector ?? null,
+        strategy: strategy ?? 'css',
         x: x ?? null,
         y: y ?? null,
         duration: duration ?? 500,
@@ -146,7 +163,7 @@ export async function screenshot(options = {}) {
     return result;
 }
 export async function keyboard(options) {
-    const { action, selectorOrKey, textOrModifiers, modifiers, windowId, appIdentifier } = options;
+    const { action, selectorOrKey, strategy, textOrModifiers, modifiers, windowId, appIdentifier } = options;
     // Handle the different parameter combinations based on action
     if (action === 'type') {
         const selector = selectorOrKey;
@@ -154,7 +171,7 @@ export async function keyboard(options) {
         if (!selector || !text) {
             throw new Error('Type action requires both selector and text parameters');
         }
-        const script = buildTypeScript(selector, text);
+        const script = buildTypeScript(selector, text, strategy);
         try {
             return await executeInWebview(script, windowId, appIdentifier);
         }
@@ -179,8 +196,8 @@ export async function keyboard(options) {
     }
 }
 export async function waitFor(options) {
-    const { type, value, timeout = 5000, windowId, appIdentifier } = options;
-    const script = buildScript(SCRIPTS.waitFor, { type, value, timeout });
+    const { type, value, strategy, timeout = 5000, windowId, appIdentifier } = options;
+    const script = buildScript(SCRIPTS.waitFor, { type, value, strategy: strategy ?? 'css', timeout });
     try {
         return await executeInWebview(script, windowId, appIdentifier);
     }
@@ -190,9 +207,10 @@ export async function waitFor(options) {
     }
 }
 export async function getStyles(options) {
-    const { selector, properties, multiple = false, windowId, appIdentifier } = options;
+    const { selector, strategy, properties, multiple = false, windowId, appIdentifier } = options;
     const script = buildScript(SCRIPTS.getStyles, {
         selector,
+        strategy: strategy ?? 'css',
         properties: properties || [],
         multiple,
     });
@@ -232,8 +250,8 @@ export async function executeJavaScript(options) {
     }
 }
 export async function focusElement(options) {
-    const { selector, windowId, appIdentifier } = options;
-    const script = buildScript(SCRIPTS.focus, { selector });
+    const { selector, strategy, windowId, appIdentifier } = options;
+    const script = buildScript(SCRIPTS.focus, { selector, strategy: strategy ?? 'css' });
     try {
         return await executeInWebview(script, windowId, appIdentifier);
     }
@@ -274,13 +292,13 @@ export async function getConsoleLogs(options = {}) {
  * Uses aria-api for comprehensive, spec-compliant accessibility computation.
  */
 export async function domSnapshot(options) {
-    const { type, selector, windowId, appIdentifier } = options;
+    const { type, selector, strategy, windowId, appIdentifier } = options;
     // Only load aria-api for accessibility snapshots
     if (type === 'accessibility') {
         await ensureAriaApiLoaded(windowId);
     }
     // Then execute the snapshot script
-    const script = buildScript(SCRIPTS.domSnapshot, { type, selector: selector ?? null });
+    const script = buildScript(SCRIPTS.domSnapshot, { type, selector: selector ?? null, strategy: strategy ?? 'css' });
     try {
         return await executeInWebview(script, windowId, appIdentifier);
     }

package/dist/prompts-registry.js CHANGED Viewed

@@ -91,6 +91,28 @@ Once changes are approved and made:
 - The plugin only runs in debug builds so it won't affect production
 - The WebSocket server binds to \`0.0.0.0:9223\` by default
 - For localhost-only access, use \`Builder::new().bind_address("127.0.0.1").build()\``;
+const SELECT_ELEMENT_PROMPT = (message) => {
+    const lines = [
+        'The user wants to visually select an element in their running Tauri app so they can discuss it with you.',
+        '',
+        'Follow these steps:',
+        '',
+        '1. **Ensure a session is active** - Use `driver_session` with action "start" if not already connected',
+        '',
+        '2. **Activate the element picker** - Call `webview_select_element` to show the picker overlay in the app.',
+        'The user will see a blue highlight following their cursor and can click to select an element.',
+        'They can press Escape or click X to cancel.',
+        '',
+        '3. **Review the result** - You will receive the element\'s metadata (tag, id, classes, CSS selector, XPath,',
+        'bounding rect, attributes, computed styles, parent chain) and an annotated screenshot with the element highlighted.',
+        '',
+        '4. **Respond to the user** - Use the element context and screenshot to address their request.',
+    ];
+    if (message) {
+        lines.push('', '## User\'s Message About the Element', '', message);
+    }
+    return lines.join('\n');
+};
 /**
  * Complete registry of all available prompts
  */
@@ -114,6 +136,30 @@ export const PROMPTS = [
             ];
         },
     },
+    {
+        name: 'select',
+        description: 'Visually select an element in the running Tauri app. ' +
+            'Activates a picker overlay — click an element to send its metadata and an annotated screenshot to the agent. ' +
+            'Optionally include a message describing what you want to do with the element.',
+        arguments: [
+            {
+                name: 'message',
+                description: 'What you want to discuss or do with the selected element (e.g. "this button should be green instead of blue")',
+                required: false,
+            },
+        ],
+        handler: (args) => {
+            return [
+                {
+                    role: 'user',
+                    content: {
+                        type: 'text',
+                        text: SELECT_ELEMENT_PROMPT(args.message),
+                    },
+                },
+            ];
+        },
+    },
     {
         name: 'setup',
         description: 'Set up or update the MCP Bridge plugin in a Tauri project. ' +

package/dist/tools-registry.js CHANGED Viewed

@@ -8,6 +8,7 @@ import { manageDriverSession, ManageDriverSessionSchema, } from './driver/sessio
 import { readLogs, ReadLogsSchema } from './monitor/logs.js';
 import { executeIPCCommand, manageIPCMonitoring, getIPCEvents, emitTestEvent, getBackendState, manageWindow, ExecuteIPCCommandSchema, ManageIPCMonitoringSchema, GetIPCEventsSchema, EmitTestEventSchema, GetBackendStateSchema, ManageWindowSchema, } from './driver/plugin-commands.js';
 import { interact, screenshot, keyboard, waitFor, getStyles, executeJavaScript, findElement, domSnapshot, InteractSchema, ScreenshotSchema, KeyboardSchema, WaitForSchema, GetStylesSchema, ExecuteJavaScriptSchema, FindElementSchema, DomSnapshotSchema, } from './driver/webview-interactions.js';
+import { selectElement, getPointedElement, SelectElementSchema, GetPointedElementSchema, } from './driver/element-picker.js';
 import { PLUGIN_VERSION_CARGO } from './version.js';
 /**
  * Standard multi-app description for webview tools.
@@ -48,123 +49,27 @@ First, verify this is a Tauri v2 project:
 Examine these files and report what needs to be added or updated:
 ### 1. Rust Plugin Dependency
-Check \`src-tauri/Cargo.toml\` for \`tauri-plugin-mcp-bridge\`.
-It should be an **optional** dependency behind a Cargo feature
-so that it is completely excluded from production builds:
+Check \`src-tauri/Cargo.toml\` for \`tauri-plugin-mcp-bridge\`. If missing or outdated, note that it needs:
 \`\`\`toml
 [dependencies]
-tauri-plugin-mcp-bridge = { version = "${PLUGIN_VERSION_CARGO}", optional = true }
-\`\`\`
-Under \`[features]\`, add a feature that enables it:
-\`\`\`toml
-[features]
-mcp-bridge = ["dep:tauri-plugin-mcp-bridge"]
+tauri-plugin-mcp-bridge = "${PLUGIN_VERSION_CARGO}"
 \`\`\`
 ### 2. Plugin Registration
-Check \`src-tauri/src/lib.rs\` or \`src-tauri/src/main.rs\` for plugin
-registration. It should be gated behind the \`mcp-bridge\` feature flag:
+Check \`src-tauri/src/lib.rs\` or \`src-tauri/src/main.rs\` for plugin registration. It should have:
 \`\`\`rust
-#[cfg(all(feature = "mcp-bridge", debug_assertions))]
+#[cfg(debug_assertions)]
 {
     builder = builder.plugin(tauri_plugin_mcp_bridge::init());
 }
 \`\`\`
 ### 3. Global Tauri Setting
 Check \`src-tauri/tauri.conf.json\` for \`withGlobalTauri: true\` under the \`app\` section.
 **This is required** - without it, the MCP bridge cannot communicate with the webview.
-This setting should only be enabled for development. If the project
-uses a \`tauri.dev.conf.json\` overlay (applied only during
-\`cargo tauri dev\`), prefer placing it there:
-\`\`\`json
-{
-   "app": {
-      "withGlobalTauri": true
-   }
-}
-\`\`\`
-### 4. Plugin Capability (Conditional via build.rs)
-The \`mcp-bridge:default\` permission must **not** be added to
-\`src-tauri/capabilities/default.json\`. Instead, it should be
-conditionally generated by the build script so that it only exists
-when the \`mcp-bridge\` feature is active.
-Check \`src-tauri/build.rs\` and update it to conditionally write
-(or remove) a separate capability file before
-\`tauri_build::build()\` runs. Tauri auto-discovers all \`.json\`
-files in \`capabilities/\`, so this ensures the permission is only
-present when the feature is enabled:
-\`\`\`rust
-fn main() {
-   let mcp_cap_path = std::path::Path::new("capabilities/mcp-bridge.json");
-   #[cfg(all(feature = "mcp-bridge", debug_assertions))]
-   {
-      let cap = r#"{
-   "identifier": "mcp-bridge",
-   "description": "enables MCP bridge for development",
-   "windows": [
-      "main"
-   ],
-   "permissions": [
-      "mcp-bridge:default"
-   ]
-}"#;
-      std::fs::write(mcp_cap_path, cap)
-         .expect("failed to write mcp-bridge capability");
-   }
-   #[cfg(not(all(feature = "mcp-bridge", debug_assertions)))]
-   {
-      let _ = std::fs::remove_file(mcp_cap_path);
-   }
-   tauri_build::build()
-}
-\`\`\`
-If \`build.rs\` already has other logic, integrate the conditional
-block before the \`tauri_build::build()\` call.
-### 5. Gitignore the Generated Capability File
-Since \`capabilities/mcp-bridge.json\` is generated at build time, add it to \`src-tauri/.gitignore\`:
-\`\`\`gitignore
-/capabilities/mcp-bridge.json
-\`\`\`
-### 6. Dev Scripts (package.json)
-If the project uses npm scripts to run \`tauri dev\`, add
-\`--features mcp-bridge\` to the dev scripts so the feature is
-automatically enabled. For example:
-\`\`\`json
-{
-   "scripts": {
-      "dev": "tauri dev --features mcp-bridge",
-      "dev:ios": "tauri ios dev --features mcp-bridge",
-      "dev:android": "tauri android dev --features mcp-bridge"
-   }
-}
-\`\`\`
-Do **not** add \`--features mcp-bridge\` to release-profile dev
-scripts (e.g. those using \`--release\`), as \`debug_assertions\`
-is false in release builds and the guard will exclude the plugin
-anyway.
+### 4. Plugin Permissions
+Check \`src-tauri/capabilities/default.json\` (or similar) for \`"mcp-bridge:default"\` permission.
 ## Response Format
@@ -179,19 +84,13 @@ Only after the user says yes should you make any modifications.
 ## After Setup
 Once changes are approved and made:
-1. Run the Tauri app in development mode — if npm scripts were
-   updated, use \`npm run dev\`. Otherwise use
-   \`cargo tauri dev --features mcp-bridge\` directly.
+1. Run the Tauri app in development mode (\`cargo tauri dev\`)
 2. Use \`driver_session\` with action "start" to connect
 3. Use \`driver_session\` with action "status" to verify
 ## Notes
-- The plugin is completely excluded from production builds — both
-  \`cfg(feature = "mcp-bridge")\` and \`cfg(debug_assertions)\` must
-  be true, so even if the feature flag is accidentally enabled in a
-  release build, the plugin will not be included
-- The \`mcp-bridge\` Cargo feature must be passed explicitly — either via npm dev scripts or \`cargo tauri dev --features mcp-bridge\`
+- The plugin only runs in debug builds so it won't affect production
 - The WebSocket server binds to \`0.0.0.0:9223\` by default
 - For localhost-only access, use \`Builder::new().bind_address("127.0.0.1").build()\`
 `;
@@ -271,6 +170,8 @@ export const TOOLS = [
     {
         name: 'webview_find_element',
         description: '[Tauri Apps Only] Find DOM elements in a running Tauri app\'s webview. ' +
+            'Supports CSS selectors (default), XPath expressions, and text content matching via the strategy parameter. ' +
+            'Returns the element\'s HTML. ' +
             'Requires active driver_session. ' +
             MULTI_APP_DESC + ' ' +
             'For browser pages or documentation sites, use Chrome DevTools MCP instead.',
@@ -314,6 +215,7 @@ export const TOOLS = [
         name: 'webview_interact',
         description: '[Tauri Apps Only] Click, scroll, swipe, focus, or perform gestures in a Tauri app webview. ' +
             'Supported actions: click, double-click, long-press, scroll, swipe, focus. ' +
+            'Supports CSS selectors (default), XPath, and text content matching via the strategy parameter. ' +
             'Requires active driver_session. ' +
             'For browser interaction, use Chrome DevTools MCP instead.',
         category: TOOL_CATEGORIES.UI_AUTOMATION,
@@ -364,6 +266,8 @@ export const TOOLS = [
     {
         name: 'webview_keyboard',
         description: '[Tauri Apps Only] Type text or send keyboard events in a Tauri app. ' +
+            'The selector parameter (for "type" action) supports CSS selectors (default), ' +
+            'XPath, and text content matching via the strategy parameter. ' +
             'Requires active driver_session. ' +
             MULTI_APP_DESC + ' ' +
             'For browser keyboard input, use Chrome DevTools MCP instead.',
@@ -381,6 +285,7 @@ export const TOOLS = [
                 return await keyboard({
                     action: parsed.action,
                     selectorOrKey: parsed.selector,
+                    strategy: parsed.strategy,
                     textOrModifiers: parsed.text,
                     windowId: parsed.windowId,
                     appIdentifier: parsed.appIdentifier,
@@ -398,6 +303,7 @@ export const TOOLS = [
     {
         name: 'webview_wait_for',
         description: '[Tauri Apps Only] Wait for elements, text, or IPC events in a Tauri app. ' +
+            'When type is "selector", supports CSS (default), XPath, and text strategies via the strategy parameter. ' +
             'Requires active driver_session. ' +
             MULTI_APP_DESC + ' ' +
             'For browser waits, use Chrome DevTools MCP instead.',
@@ -413,6 +319,7 @@ export const TOOLS = [
             return await waitFor({
                 type: parsed.type,
                 value: parsed.value,
+                strategy: parsed.strategy,
                 timeout: parsed.timeout,
                 windowId: parsed.windowId,
                 appIdentifier: parsed.appIdentifier,
@@ -422,6 +329,7 @@ export const TOOLS = [
     {
         name: 'webview_get_styles',
         description: '[Tauri Apps Only] Get computed CSS styles from elements in a Tauri app. ' +
+            'Supports CSS selectors (default), XPath, and text content matching via the strategy parameter. ' +
             'Requires active driver_session. ' +
             MULTI_APP_DESC + ' ' +
             'For browser style inspection, use Chrome DevTools MCP instead.',
@@ -436,6 +344,7 @@ export const TOOLS = [
             const parsed = GetStylesSchema.parse(args);
             return await getStyles({
                 selector: parsed.selector,
+                strategy: parsed.strategy,
                 properties: parsed.properties,
                 multiple: parsed.multiple,
                 windowId: parsed.windowId,
@@ -480,6 +389,7 @@ export const TOOLS = [
             'with element tag names, IDs, CSS classes, and data-testid attributes (if present). ' +
             'Use this for understanding page layout, debugging CSS selectors, or locating elements by class/ID. ' +
             'Use the optional selector parameter to scope the snapshot to a subtree. ' +
+            'The selector supports CSS (default), XPath, and text content matching via the strategy parameter. ' +
             'Requires active driver_session. ' +
             MULTI_APP_DESC,
         category: TOOL_CATEGORIES.UI_AUTOMATION,
@@ -494,6 +404,57 @@ export const TOOLS = [
             return await domSnapshot({
                 type: parsed.type,
                 selector: parsed.selector,
+                strategy: parsed.strategy,
+                windowId: parsed.windowId,
+                appIdentifier: parsed.appIdentifier,
+            });
+        },
+    },
+    // Element Picker Tools
+    {
+        name: 'webview_select_element',
+        description: '[Tauri Apps Only] Activates an element picker overlay in the Tauri app. ' +
+            'The user visually selects an element by clicking it, and the tool returns ' +
+            'rich element metadata (tag, id, classes, attributes, text, bounding rect, ' +
+            'CSS selector, computed styles, parent chain) plus an annotated screenshot ' +
+            'with the element highlighted. ' +
+            'Requires active driver_session. ' +
+            MULTI_APP_DESC,
+        category: TOOL_CATEGORIES.UI_AUTOMATION,
+        schema: SelectElementSchema,
+        annotations: {
+            title: 'Select Element (Visual Picker)',
+            readOnlyHint: true,
+            openWorldHint: false,
+        },
+        handler: async (args) => {
+            const parsed = SelectElementSchema.parse(args);
+            return await selectElement({
+                timeout: parsed.timeout,
+                windowId: parsed.windowId,
+                appIdentifier: parsed.appIdentifier,
+            });
+        },
+    },
+    {
+        name: 'webview_get_pointed_element',
+        description: '[Tauri Apps Only] Retrieves element metadata for an element the user previously ' +
+            'pointed at via Alt+Shift+Click in the Tauri app. Returns the same rich metadata ' +
+            'as webview_select_element (tag, id, classes, attributes, text, bounding rect, ' +
+            'CSS selector, computed styles, parent chain) plus an annotated screenshot. ' +
+            'The user must Alt+Shift+Click an element first before calling this tool. ' +
+            'Requires active driver_session. ' +
+            MULTI_APP_DESC,
+        category: TOOL_CATEGORIES.UI_AUTOMATION,
+        schema: GetPointedElementSchema,
+        annotations: {
+            title: 'Get Pointed Element',
+            readOnlyHint: true,
+            openWorldHint: false,
+        },
+        handler: async (args) => {
+            const parsed = GetPointedElementSchema.parse(args);
+            return await getPointedElement({
                 windowId: parsed.windowId,
                 appIdentifier: parsed.appIdentifier,
             });