@hypothesi/tauri-mcp-server 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,20 @@
1
1
  import { z } from 'zod';
2
- import { executeInWebview, captureScreenshot, getConsoleLogs as getConsoleLogsFromCapture } from './webview-executor.js';
2
+ import { executeInWebview, executeInWebviewWithContext, captureScreenshot, getConsoleLogs as getConsoleLogsFromCapture, } from './webview-executor.js';
3
3
  import { SCRIPTS, buildScript, buildTypeScript, buildKeyEventScript } from './scripts/index.js';
4
4
  // ============================================================================
5
+ // Base Schema for Window Targeting
6
+ // ============================================================================
7
+ /**
8
+ * Base schema mixin for tools that can target a specific window.
9
+ * All webview tools extend this to support multi-window applications.
10
+ */
11
+ export const WindowTargetSchema = z.object({
12
+ windowId: z.string().optional().describe('Window label to target (defaults to "main")'),
13
+ });
14
+ // ============================================================================
5
15
  // Schemas
6
16
  // ============================================================================
7
- export const InteractSchema = z.object({
17
+ export const InteractSchema = WindowTargetSchema.extend({
8
18
  action: z.enum(['click', 'double-click', 'long-press', 'scroll', 'swipe'])
9
19
  .describe('Type of interaction to perform'),
10
20
  selector: z.string().optional().describe('CSS selector for the element to interact with'),
@@ -14,17 +24,16 @@ export const InteractSchema = z.object({
14
24
  .describe('Duration in ms for long-press or swipe (default: 500ms for long-press, 300ms for swipe)'),
15
25
  scrollX: z.number().optional().describe('Horizontal scroll amount in pixels (positive = right)'),
16
26
  scrollY: z.number().optional().describe('Vertical scroll amount in pixels (positive = down)'),
17
- // Swipe-specific parameters
18
27
  fromX: z.number().optional().describe('Starting X coordinate for swipe'),
19
28
  fromY: z.number().optional().describe('Starting Y coordinate for swipe'),
20
29
  toX: z.number().optional().describe('Ending X coordinate for swipe'),
21
30
  toY: z.number().optional().describe('Ending Y coordinate for swipe'),
22
31
  });
23
- export const ScreenshotSchema = z.object({
32
+ export const ScreenshotSchema = WindowTargetSchema.extend({
24
33
  format: z.enum(['png', 'jpeg']).optional().default('png').describe('Image format'),
25
34
  quality: z.number().min(0).max(100).optional().describe('JPEG quality (0-100, only for jpeg format)'),
26
35
  });
27
- export const KeyboardSchema = z.object({
36
+ export const KeyboardSchema = WindowTargetSchema.extend({
28
37
  action: z.enum(['type', 'press', 'down', 'up'])
29
38
  .describe('Keyboard action type: "type" for typing text into an element, "press/down/up" for key events'),
30
39
  selector: z.string().optional().describe('CSS selector for element to type into (required for "type" action)'),
@@ -32,29 +41,29 @@ export const KeyboardSchema = z.object({
32
41
  key: z.string().optional().describe('Key to press (required for "press/down/up" actions, e.g., "Enter", "a", "Escape")'),
33
42
  modifiers: z.array(z.enum(['Control', 'Alt', 'Shift', 'Meta'])).optional().describe('Modifier keys to hold'),
34
43
  });
35
- export const WaitForSchema = z.object({
44
+ export const WaitForSchema = WindowTargetSchema.extend({
36
45
  type: z.enum(['selector', 'text', 'ipc-event']).describe('What to wait for'),
37
46
  value: z.string().describe('Selector, text content, or IPC event name to wait for'),
38
47
  timeout: z.number().optional().default(5000).describe('Timeout in milliseconds (default: 5000ms)'),
39
48
  });
40
- export const GetStylesSchema = z.object({
49
+ export const GetStylesSchema = WindowTargetSchema.extend({
41
50
  selector: z.string().describe('CSS selector for element(s) to get styles from'),
42
51
  properties: z.array(z.string()).optional().describe('Specific CSS properties to retrieve. If omitted, returns all computed styles'),
43
52
  multiple: z.boolean().optional().default(false)
44
53
  .describe('Whether to get styles for all matching elements (true) or just the first (false)'),
45
54
  });
46
- export const ExecuteJavaScriptSchema = z.object({
55
+ export const ExecuteJavaScriptSchema = WindowTargetSchema.extend({
47
56
  script: z.string().describe('JavaScript code to execute in the webview context'),
48
57
  args: z.array(z.unknown()).optional().describe('Arguments to pass to the script'),
49
58
  });
50
- export const FocusElementSchema = z.object({
59
+ export const FocusElementSchema = WindowTargetSchema.extend({
51
60
  selector: z.string().describe('CSS selector for element to focus'),
52
61
  });
53
- export const FindElementSchema = z.object({
62
+ export const FindElementSchema = WindowTargetSchema.extend({
54
63
  selector: z.string(),
55
64
  strategy: z.enum(['css', 'xpath', 'text']).default('css'),
56
65
  });
57
- export const GetConsoleLogsSchema = z.object({
66
+ export const GetConsoleLogsSchema = WindowTargetSchema.extend({
58
67
  filter: z.string().optional().describe('Regex or keyword to filter logs'),
59
68
  since: z.string().optional().describe('ISO timestamp to filter logs since'),
60
69
  });
@@ -62,10 +71,10 @@ export const GetConsoleLogsSchema = z.object({
62
71
  // Implementation Functions
63
72
  // ============================================================================
64
73
  export async function interact(options) {
65
- const { action, selector, x, y, duration, scrollX, scrollY, fromX, fromY, toX, toY } = options;
74
+ const { action, selector, x, y, duration, scrollX, scrollY, fromX, fromY, toX, toY, windowId } = options;
66
75
  // Handle swipe action separately since it has different logic
67
76
  if (action === 'swipe') {
68
- return performSwipe(fromX, fromY, toX, toY, duration);
77
+ return performSwipe({ fromX, fromY, toX, toY, duration, windowId });
69
78
  }
70
79
  const script = buildScript(SCRIPTS.interact, {
71
80
  action,
@@ -77,31 +86,34 @@ export async function interact(options) {
77
86
  scrollY: scrollY ?? 0,
78
87
  });
79
88
  try {
80
- return await executeInWebview(script);
89
+ return await executeInWebview(script, windowId);
81
90
  }
82
91
  catch (error) {
83
92
  const message = error instanceof Error ? error.message : String(error);
84
93
  throw new Error(`Interaction failed: ${message}`);
85
94
  }
86
95
  }
87
- async function performSwipe(fromX, fromY, toX, toY, duration = 300) {
96
+ async function performSwipe(options) {
97
+ const { fromX, fromY, toX, toY, duration = 300, windowId } = options;
88
98
  if (fromX === undefined || fromY === undefined || toX === undefined || toY === undefined) {
89
99
  throw new Error('Swipe action requires fromX, fromY, toX, and toY coordinates');
90
100
  }
91
101
  const script = buildScript(SCRIPTS.swipe, { fromX, fromY, toX, toY, duration });
92
102
  try {
93
- return await executeInWebview(script);
103
+ return await executeInWebview(script, windowId);
94
104
  }
95
105
  catch (error) {
96
106
  const message = error instanceof Error ? error.message : String(error);
97
107
  throw new Error(`Swipe failed: ${message}`);
98
108
  }
99
109
  }
100
- export async function screenshot(quality, format = 'png') {
110
+ export async function screenshot(options = {}) {
111
+ const { quality, format = 'png', windowId } = options;
101
112
  // Use the native screenshot function from webview-executor
102
- return captureScreenshot(format, quality);
113
+ return captureScreenshot({ format, quality, windowId });
103
114
  }
104
- export async function keyboard(action, selectorOrKey, textOrModifiers, modifiers) {
115
+ export async function keyboard(options) {
116
+ const { action, selectorOrKey, textOrModifiers, modifiers, windowId } = options;
105
117
  // Handle the different parameter combinations based on action
106
118
  if (action === 'type') {
107
119
  const selector = selectorOrKey;
@@ -111,7 +123,7 @@ export async function keyboard(action, selectorOrKey, textOrModifiers, modifiers
111
123
  }
112
124
  const script = buildTypeScript(selector, text);
113
125
  try {
114
- return await executeInWebview(script);
126
+ return await executeInWebview(script, windowId);
115
127
  }
116
128
  catch (error) {
117
129
  const message = error instanceof Error ? error.message : String(error);
@@ -126,38 +138,41 @@ export async function keyboard(action, selectorOrKey, textOrModifiers, modifiers
126
138
  }
127
139
  const script = buildKeyEventScript(action, key, mods || []);
128
140
  try {
129
- return await executeInWebview(script);
141
+ return await executeInWebview(script, windowId);
130
142
  }
131
143
  catch (error) {
132
144
  const message = error instanceof Error ? error.message : String(error);
133
145
  throw new Error(`Keyboard action failed: ${message}`);
134
146
  }
135
147
  }
136
- export async function waitFor(type, value, timeout = 5000) {
148
+ export async function waitFor(options) {
149
+ const { type, value, timeout = 5000, windowId } = options;
137
150
  const script = buildScript(SCRIPTS.waitFor, { type, value, timeout });
138
151
  try {
139
- return await executeInWebview(script);
152
+ return await executeInWebview(script, windowId);
140
153
  }
141
154
  catch (error) {
142
155
  const message = error instanceof Error ? error.message : String(error);
143
156
  throw new Error(`Wait failed: ${message}`);
144
157
  }
145
158
  }
146
- export async function getStyles(selector, properties, multiple = false) {
159
+ export async function getStyles(options) {
160
+ const { selector, properties, multiple = false, windowId } = options;
147
161
  const script = buildScript(SCRIPTS.getStyles, {
148
162
  selector,
149
163
  properties: properties || [],
150
164
  multiple,
151
165
  });
152
166
  try {
153
- return await executeInWebview(script);
167
+ return await executeInWebview(script, windowId);
154
168
  }
155
169
  catch (error) {
156
170
  const message = error instanceof Error ? error.message : String(error);
157
171
  throw new Error(`Get styles failed: ${message}`);
158
172
  }
159
173
  }
160
- export async function executeJavaScript(script, args) {
174
+ export async function executeJavaScript(options) {
175
+ const { script, args, windowId } = options;
161
176
  // If args are provided, we need to inject them into the script context
162
177
  const wrappedScript = args && args.length > 0
163
178
  ? `
@@ -168,18 +183,26 @@ export async function executeJavaScript(script, args) {
168
183
  `
169
184
  : script;
170
185
  try {
171
- const result = await executeInWebview(wrappedScript);
172
- return result;
186
+ const { result, windowLabel, warning } = await executeInWebviewWithContext(wrappedScript, windowId);
187
+ // Build response with window context
188
+ let response = result;
189
+ if (warning) {
190
+ response = `⚠️ ${warning}\n\n${response}`;
191
+ }
192
+ // Add window info footer for clarity
193
+ response += `\n\n[Executed in window: ${windowLabel}]`;
194
+ return response;
173
195
  }
174
196
  catch (error) {
175
197
  const message = error instanceof Error ? error.message : String(error);
176
198
  throw new Error(`JavaScript execution failed: ${message}`);
177
199
  }
178
200
  }
179
- export async function focusElement(selector) {
201
+ export async function focusElement(options) {
202
+ const { selector, windowId } = options;
180
203
  const script = buildScript(SCRIPTS.focus, { selector });
181
204
  try {
182
- return await executeInWebview(script);
205
+ return await executeInWebview(script, windowId);
183
206
  }
184
207
  catch (error) {
185
208
  const message = error instanceof Error ? error.message : String(error);
@@ -189,10 +212,11 @@ export async function focusElement(selector) {
189
212
  /**
190
213
  * Find an element using various selector strategies.
191
214
  */
192
- export async function findElement(selector, strategy) {
215
+ export async function findElement(options) {
216
+ const { selector, strategy, windowId } = options;
193
217
  const script = buildScript(SCRIPTS.findElement, { selector, strategy });
194
218
  try {
195
- return await executeInWebview(script);
219
+ return await executeInWebview(script, windowId);
196
220
  }
197
221
  catch (error) {
198
222
  const message = error instanceof Error ? error.message : String(error);
@@ -202,7 +226,8 @@ export async function findElement(selector, strategy) {
202
226
  /**
203
227
  * Get console logs from the webview.
204
228
  */
205
- export async function getConsoleLogs(filter, since) {
229
+ export async function getConsoleLogs(options = {}) {
230
+ const { filter, since } = options;
206
231
  try {
207
232
  return await getConsoleLogsFromCapture(filter, since);
208
233
  }
package/dist/index.js CHANGED
@@ -1,13 +1,14 @@
1
1
  #!/usr/bin/env node
2
2
  import { Server } from '@modelcontextprotocol/sdk/server/index.js';
3
3
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4
- import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
4
+ import { CallToolRequestSchema, ListToolsRequestSchema, ListPromptsRequestSchema, GetPromptRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
5
5
  import { zodToJsonSchema } from 'zod-to-json-schema';
6
6
  import { readFileSync } from 'fs';
7
7
  import { fileURLToPath } from 'url';
8
8
  import { dirname, join } from 'path';
9
- // Import the single source of truth for all tools
9
+ // Import the single source of truth for all tools and prompts
10
10
  import { TOOLS, TOOL_MAP } from './tools-registry.js';
11
+ import { PROMPTS, PROMPT_MAP } from './prompts-registry.js';
11
12
  /* eslint-disable no-process-exit */
12
13
  // Read version from package.json
13
14
  const currentDir = dirname(fileURLToPath(import.meta.url));
@@ -20,6 +21,7 @@ const server = new Server({
20
21
  }, {
21
22
  capabilities: {
22
23
  tools: {},
24
+ prompts: {},
23
25
  },
24
26
  });
25
27
  // Handle connection errors gracefully - don't crash on broken pipe
@@ -45,6 +47,7 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
45
47
  name: tool.name,
46
48
  description: tool.description,
47
49
  inputSchema: zodToJsonSchema(tool.schema),
50
+ annotations: tool.annotations,
48
51
  };
49
52
  }),
50
53
  };
@@ -67,6 +70,30 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
67
70
  };
68
71
  }
69
72
  });
73
+ // Prompt list handler - generated from registry
74
+ server.setRequestHandler(ListPromptsRequestSchema, async () => {
75
+ return {
76
+ prompts: PROMPTS.map((prompt) => {
77
+ return {
78
+ name: prompt.name,
79
+ description: prompt.description,
80
+ arguments: prompt.arguments,
81
+ };
82
+ }),
83
+ };
84
+ });
85
+ // Get prompt handler - returns prompt messages for a specific prompt
86
+ server.setRequestHandler(GetPromptRequestSchema, async (request) => {
87
+ const prompt = PROMPT_MAP.get(request.params.name);
88
+ if (!prompt) {
89
+ throw new Error(`Unknown prompt: ${request.params.name}`);
90
+ }
91
+ const args = (request.params.arguments || {});
92
+ return {
93
+ description: prompt.description,
94
+ messages: prompt.handler(args),
95
+ };
96
+ });
70
97
  // Start server
71
98
  async function main() {
72
99
  const transport = new StdioServerTransport();
@@ -0,0 +1,54 @@
1
+ /**
2
+ * Single source of truth for all MCP prompt definitions
3
+ * Prompts are user-controlled templates that appear as slash commands in MCP clients
4
+ */
5
+ const FIX_WEBVIEW_ERRORS_PROMPT = `I need help finding and fixing JavaScript errors in my Tauri app's webview.
6
+
7
+ Please follow these steps:
8
+
9
+ 1. **Start a session** - Use \`tauri_driver_session\` with action "start" to connect to the running Tauri app
10
+
11
+ 2. **Get console logs** - Use \`tauri_driver_get_console_logs\` to retrieve any JavaScript errors or warnings from the webview console
12
+
13
+ 3. **Analyze the errors** - Look at the error messages, stack traces, and identify:
14
+ - What type of error it is (TypeError, ReferenceError, SyntaxError, etc.)
15
+ - Which file and line number the error originates from
16
+ - What the root cause might be
17
+
18
+ 4. **Find the source code** - Use code search or file reading tools to locate the problematic code in my project
19
+
20
+ 5. **Propose a fix** - Explain what's wrong and suggest a concrete fix for each error found
21
+
22
+ 6. **Stop the session** - Use \`tauri_driver_session\` with action "stop" to clean up
23
+
24
+ If no errors are found, let me know the app is running cleanly.
25
+
26
+ If the session fails to start, help me troubleshoot the connection (is the app running? is the MCP bridge plugin installed?).`;
27
+ /**
28
+ * Complete registry of all available prompts
29
+ */
30
+ export const PROMPTS = [
31
+ {
32
+ name: 'fix-webview-errors',
33
+ description: '[Tauri Apps Only] Find and fix JavaScript errors in a running Tauri app. ' +
34
+ 'Use ONLY for Tauri projects (with src-tauri/ and tauri.conf.json). ' +
35
+ 'For browser debugging, use Chrome DevTools MCP instead. ' +
36
+ 'For Electron apps, this prompt will NOT work.',
37
+ arguments: [],
38
+ handler: () => {
39
+ return [
40
+ {
41
+ role: 'user',
42
+ content: {
43
+ type: 'text',
44
+ text: FIX_WEBVIEW_ERRORS_PROMPT,
45
+ },
46
+ },
47
+ ];
48
+ },
49
+ },
50
+ ];
51
+ /**
52
+ * Create a Map for fast prompt lookup by name
53
+ */
54
+ export const PROMPT_MAP = new Map(PROMPTS.map((prompt) => { return [prompt.name, prompt]; }));