npm - @hypothesi/tauri-mcp-server - Versions diffs - 0.1.2 → 0.2.0 - Mend

@hypothesi/tauri-mcp-server 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +116 -0
package/dist/driver/plugin-commands.js +29 -0
package/dist/driver/script-manager.js +97 -0
package/dist/driver/scripts/html2canvas-loader.js +54 -42
package/dist/driver/webview-executor.js +84 -35
package/dist/driver/webview-interactions.js +58 -33
package/dist/index.js +29 -2
package/dist/prompts-registry.js +54 -0
package/dist/tools-registry.js +265 -39
package/dist/types/window.js +4 -0
package/package.json +58 -58

package/dist/driver/webview-interactions.js CHANGED Viewed

@@ -1,10 +1,20 @@
 import { z } from 'zod';
-import { executeInWebview, captureScreenshot, getConsoleLogs as getConsoleLogsFromCapture } from './webview-executor.js';
+import { executeInWebview, executeInWebviewWithContext, captureScreenshot, getConsoleLogs as getConsoleLogsFromCapture, } from './webview-executor.js';
 import { SCRIPTS, buildScript, buildTypeScript, buildKeyEventScript } from './scripts/index.js';
 // ============================================================================
+// Base Schema for Window Targeting
+// ============================================================================
+/**
+ * Base schema mixin for tools that can target a specific window.
+ * All webview tools extend this to support multi-window applications.
+ */
+export const WindowTargetSchema = z.object({
+    windowId: z.string().optional().describe('Window label to target (defaults to "main")'),
+});
+// ============================================================================
 // Schemas
 // ============================================================================
-export const InteractSchema = z.object({
+export const InteractSchema = WindowTargetSchema.extend({
     action: z.enum(['click', 'double-click', 'long-press', 'scroll', 'swipe'])
         .describe('Type of interaction to perform'),
     selector: z.string().optional().describe('CSS selector for the element to interact with'),
@@ -14,17 +24,16 @@ export const InteractSchema = z.object({
         .describe('Duration in ms for long-press or swipe (default: 500ms for long-press, 300ms for swipe)'),
     scrollX: z.number().optional().describe('Horizontal scroll amount in pixels (positive = right)'),
     scrollY: z.number().optional().describe('Vertical scroll amount in pixels (positive = down)'),
-    // Swipe-specific parameters
     fromX: z.number().optional().describe('Starting X coordinate for swipe'),
     fromY: z.number().optional().describe('Starting Y coordinate for swipe'),
     toX: z.number().optional().describe('Ending X coordinate for swipe'),
     toY: z.number().optional().describe('Ending Y coordinate for swipe'),
 });
-export const ScreenshotSchema = z.object({
+export const ScreenshotSchema = WindowTargetSchema.extend({
     format: z.enum(['png', 'jpeg']).optional().default('png').describe('Image format'),
     quality: z.number().min(0).max(100).optional().describe('JPEG quality (0-100, only for jpeg format)'),
 });
-export const KeyboardSchema = z.object({
+export const KeyboardSchema = WindowTargetSchema.extend({
     action: z.enum(['type', 'press', 'down', 'up'])
         .describe('Keyboard action type: "type" for typing text into an element, "press/down/up" for key events'),
     selector: z.string().optional().describe('CSS selector for element to type into (required for "type" action)'),
@@ -32,29 +41,29 @@ export const KeyboardSchema = z.object({
     key: z.string().optional().describe('Key to press (required for "press/down/up" actions, e.g., "Enter", "a", "Escape")'),
     modifiers: z.array(z.enum(['Control', 'Alt', 'Shift', 'Meta'])).optional().describe('Modifier keys to hold'),
 });
-export const WaitForSchema = z.object({
+export const WaitForSchema = WindowTargetSchema.extend({
     type: z.enum(['selector', 'text', 'ipc-event']).describe('What to wait for'),
     value: z.string().describe('Selector, text content, or IPC event name to wait for'),
     timeout: z.number().optional().default(5000).describe('Timeout in milliseconds (default: 5000ms)'),
 });
-export const GetStylesSchema = z.object({
+export const GetStylesSchema = WindowTargetSchema.extend({
     selector: z.string().describe('CSS selector for element(s) to get styles from'),
     properties: z.array(z.string()).optional().describe('Specific CSS properties to retrieve. If omitted, returns all computed styles'),
     multiple: z.boolean().optional().default(false)
         .describe('Whether to get styles for all matching elements (true) or just the first (false)'),
 });
-export const ExecuteJavaScriptSchema = z.object({
+export const ExecuteJavaScriptSchema = WindowTargetSchema.extend({
     script: z.string().describe('JavaScript code to execute in the webview context'),
     args: z.array(z.unknown()).optional().describe('Arguments to pass to the script'),
 });
-export const FocusElementSchema = z.object({
+export const FocusElementSchema = WindowTargetSchema.extend({
     selector: z.string().describe('CSS selector for element to focus'),
 });
-export const FindElementSchema = z.object({
+export const FindElementSchema = WindowTargetSchema.extend({
     selector: z.string(),
     strategy: z.enum(['css', 'xpath', 'text']).default('css'),
 });
-export const GetConsoleLogsSchema = z.object({
+export const GetConsoleLogsSchema = WindowTargetSchema.extend({
     filter: z.string().optional().describe('Regex or keyword to filter logs'),
     since: z.string().optional().describe('ISO timestamp to filter logs since'),
 });
@@ -62,10 +71,10 @@ export const GetConsoleLogsSchema = z.object({
 // Implementation Functions
 // ============================================================================
 export async function interact(options) {
-    const { action, selector, x, y, duration, scrollX, scrollY, fromX, fromY, toX, toY } = options;
+    const { action, selector, x, y, duration, scrollX, scrollY, fromX, fromY, toX, toY, windowId } = options;
     // Handle swipe action separately since it has different logic
     if (action === 'swipe') {
-        return performSwipe(fromX, fromY, toX, toY, duration);
+        return performSwipe({ fromX, fromY, toX, toY, duration, windowId });
     }
     const script = buildScript(SCRIPTS.interact, {
         action,
@@ -77,31 +86,34 @@ export async function interact(options) {
         scrollY: scrollY ?? 0,
     });
     try {
-        return await executeInWebview(script);
+        return await executeInWebview(script, windowId);
     }
     catch (error) {
         const message = error instanceof Error ? error.message : String(error);
         throw new Error(`Interaction failed: ${message}`);
     }
 }
-async function performSwipe(fromX, fromY, toX, toY, duration = 300) {
+async function performSwipe(options) {
+    const { fromX, fromY, toX, toY, duration = 300, windowId } = options;
     if (fromX === undefined || fromY === undefined || toX === undefined || toY === undefined) {
         throw new Error('Swipe action requires fromX, fromY, toX, and toY coordinates');
     }
     const script = buildScript(SCRIPTS.swipe, { fromX, fromY, toX, toY, duration });
     try {
-        return await executeInWebview(script);
+        return await executeInWebview(script, windowId);
     }
     catch (error) {
         const message = error instanceof Error ? error.message : String(error);
         throw new Error(`Swipe failed: ${message}`);
     }
 }
-export async function screenshot(quality, format = 'png') {
+export async function screenshot(options = {}) {
+    const { quality, format = 'png', windowId } = options;
     // Use the native screenshot function from webview-executor
-    return captureScreenshot(format, quality);
+    return captureScreenshot({ format, quality, windowId });
 }
-export async function keyboard(action, selectorOrKey, textOrModifiers, modifiers) {
+export async function keyboard(options) {
+    const { action, selectorOrKey, textOrModifiers, modifiers, windowId } = options;
     // Handle the different parameter combinations based on action
     if (action === 'type') {
         const selector = selectorOrKey;
@@ -111,7 +123,7 @@ export async function keyboard(action, selectorOrKey, textOrModifiers, modifiers
         }
         const script = buildTypeScript(selector, text);
         try {
-            return await executeInWebview(script);
+            return await executeInWebview(script, windowId);
         }
         catch (error) {
             const message = error instanceof Error ? error.message : String(error);
@@ -126,38 +138,41 @@ export async function keyboard(action, selectorOrKey, textOrModifiers, modifiers
     }
     const script = buildKeyEventScript(action, key, mods || []);
     try {
-        return await executeInWebview(script);
+        return await executeInWebview(script, windowId);
     }
     catch (error) {
         const message = error instanceof Error ? error.message : String(error);
         throw new Error(`Keyboard action failed: ${message}`);
     }
 }
-export async function waitFor(type, value, timeout = 5000) {
+export async function waitFor(options) {
+    const { type, value, timeout = 5000, windowId } = options;
     const script = buildScript(SCRIPTS.waitFor, { type, value, timeout });
     try {
-        return await executeInWebview(script);
+        return await executeInWebview(script, windowId);
     }
     catch (error) {
         const message = error instanceof Error ? error.message : String(error);
         throw new Error(`Wait failed: ${message}`);
     }
 }
-export async function getStyles(selector, properties, multiple = false) {
+export async function getStyles(options) {
+    const { selector, properties, multiple = false, windowId } = options;
     const script = buildScript(SCRIPTS.getStyles, {
         selector,
         properties: properties || [],
         multiple,
     });
     try {
-        return await executeInWebview(script);
+        return await executeInWebview(script, windowId);
     }
     catch (error) {
         const message = error instanceof Error ? error.message : String(error);
         throw new Error(`Get styles failed: ${message}`);
     }
 }
-export async function executeJavaScript(script, args) {
+export async function executeJavaScript(options) {
+    const { script, args, windowId } = options;
     // If args are provided, we need to inject them into the script context
     const wrappedScript = args && args.length > 0
         ? `
@@ -168,18 +183,26 @@ export async function executeJavaScript(script, args) {
       `
         : script;
     try {
-        const result = await executeInWebview(wrappedScript);
-        return result;
+        const { result, windowLabel, warning } = await executeInWebviewWithContext(wrappedScript, windowId);
+        // Build response with window context
+        let response = result;
+        if (warning) {
+            response = `⚠️ ${warning}\n\n${response}`;
+        }
+        // Add window info footer for clarity
+        response += `\n\n[Executed in window: ${windowLabel}]`;
+        return response;
     }
     catch (error) {
         const message = error instanceof Error ? error.message : String(error);
         throw new Error(`JavaScript execution failed: ${message}`);
     }
 }
-export async function focusElement(selector) {
+export async function focusElement(options) {
+    const { selector, windowId } = options;
     const script = buildScript(SCRIPTS.focus, { selector });
     try {
-        return await executeInWebview(script);
+        return await executeInWebview(script, windowId);
     }
     catch (error) {
         const message = error instanceof Error ? error.message : String(error);
@@ -189,10 +212,11 @@ export async function focusElement(selector) {
 /**
  * Find an element using various selector strategies.
  */
-export async function findElement(selector, strategy) {
+export async function findElement(options) {
+    const { selector, strategy, windowId } = options;
     const script = buildScript(SCRIPTS.findElement, { selector, strategy });
     try {
-        return await executeInWebview(script);
+        return await executeInWebview(script, windowId);
     }
     catch (error) {
         const message = error instanceof Error ? error.message : String(error);
@@ -202,7 +226,8 @@ export async function findElement(selector, strategy) {
 /**
  * Get console logs from the webview.
  */
-export async function getConsoleLogs(filter, since) {
+export async function getConsoleLogs(options = {}) {
+    const { filter, since } = options;
     try {
         return await getConsoleLogsFromCapture(filter, since);
     }

package/dist/index.js CHANGED Viewed

@@ -1,13 +1,14 @@
 #!/usr/bin/env node
 import { Server } from '@modelcontextprotocol/sdk/server/index.js';
 import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
-import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
+import { CallToolRequestSchema, ListToolsRequestSchema, ListPromptsRequestSchema, GetPromptRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
 import { zodToJsonSchema } from 'zod-to-json-schema';
 import { readFileSync } from 'fs';
 import { fileURLToPath } from 'url';
 import { dirname, join } from 'path';
-// Import the single source of truth for all tools
+// Import the single source of truth for all tools and prompts
 import { TOOLS, TOOL_MAP } from './tools-registry.js';
+import { PROMPTS, PROMPT_MAP } from './prompts-registry.js';
 /* eslint-disable no-process-exit */
 // Read version from package.json
 const currentDir = dirname(fileURLToPath(import.meta.url));
@@ -20,6 +21,7 @@ const server = new Server({
 }, {
     capabilities: {
         tools: {},
+        prompts: {},
     },
 });
 // Handle connection errors gracefully - don't crash on broken pipe
@@ -45,6 +47,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
                 name: tool.name,
                 description: tool.description,
                 inputSchema: zodToJsonSchema(tool.schema),
+                annotations: tool.annotations,
             };
         }),
     };
@@ -67,6 +70,30 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
         };
     }
 });
+// Prompt list handler - generated from registry
+server.setRequestHandler(ListPromptsRequestSchema, async () => {
+    return {
+        prompts: PROMPTS.map((prompt) => {
+            return {
+                name: prompt.name,
+                description: prompt.description,
+                arguments: prompt.arguments,
+            };
+        }),
+    };
+});
+// Get prompt handler - returns prompt messages for a specific prompt
+server.setRequestHandler(GetPromptRequestSchema, async (request) => {
+    const prompt = PROMPT_MAP.get(request.params.name);
+    if (!prompt) {
+        throw new Error(`Unknown prompt: ${request.params.name}`);
+    }
+    const args = (request.params.arguments || {});
+    return {
+        description: prompt.description,
+        messages: prompt.handler(args),
+    };
+});
 // Start server
 async function main() {
     const transport = new StdioServerTransport();

package/dist/prompts-registry.js ADDED Viewed

@@ -0,0 +1,54 @@
+/**
+ * Single source of truth for all MCP prompt definitions
+ * Prompts are user-controlled templates that appear as slash commands in MCP clients
+ */
+const FIX_WEBVIEW_ERRORS_PROMPT = `I need help finding and fixing JavaScript errors in my Tauri app's webview.
+Please follow these steps:
+1. **Start a session** - Use \`tauri_driver_session\` with action "start" to connect to the running Tauri app
+2. **Get console logs** - Use \`tauri_driver_get_console_logs\` to retrieve any JavaScript errors or warnings from the webview console
+3. **Analyze the errors** - Look at the error messages, stack traces, and identify:
+   - What type of error it is (TypeError, ReferenceError, SyntaxError, etc.)
+   - Which file and line number the error originates from
+   - What the root cause might be
+4. **Find the source code** - Use code search or file reading tools to locate the problematic code in my project
+5. **Propose a fix** - Explain what's wrong and suggest a concrete fix for each error found
+6. **Stop the session** - Use \`tauri_driver_session\` with action "stop" to clean up
+If no errors are found, let me know the app is running cleanly.
+If the session fails to start, help me troubleshoot the connection (is the app running? is the MCP bridge plugin installed?).`;
+/**
+ * Complete registry of all available prompts
+ */
+export const PROMPTS = [
+    {
+        name: 'fix-webview-errors',
+        description: '[Tauri Apps Only] Find and fix JavaScript errors in a running Tauri app. ' +
+            'Use ONLY for Tauri projects (with src-tauri/ and tauri.conf.json). ' +
+            'For browser debugging, use Chrome DevTools MCP instead. ' +
+            'For Electron apps, this prompt will NOT work.',
+        arguments: [],
+        handler: () => {
+            return [
+                {
+                    role: 'user',
+                    content: {
+                        type: 'text',
+                        text: FIX_WEBVIEW_ERRORS_PROMPT,
+                    },
+                },
+            ];
+        },
+    },
+];
+/**
+ * Create a Map for fast prompt lookup by name
+ */
+export const PROMPT_MAP = new Map(PROMPTS.map((prompt) => { return [prompt.name, prompt]; }));