@hypothesi/tauri-mcp-server 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@
4
4
  *
5
5
  * @param {Object} params
6
6
  * @param {string} params.action - The action to perform
7
- * @param {string|null} params.selector - CSS selector for the element
7
+ * @param {string|null} params.selector - CSS selector or ref ID (e.g., "ref=e3") for the element
8
8
  * @param {number|null} params.x - X coordinate
9
9
  * @param {number|null} params.y - Y coordinate
10
10
  * @param {number} params.duration - Duration for long-press
@@ -14,24 +14,35 @@
14
14
  (function(params) {
15
15
  const { action, selector, x, y, duration, scrollX, scrollY } = params;
16
16
 
17
+ // Resolve element from CSS selector or ref ID (e.g., "ref=e3" or "e3")
18
+ function resolveElement(selectorOrRef) {
19
+ if (!selectorOrRef) return null;
20
+ var refMatch = selectorOrRef.match(/^(?:ref=)?(e\d+)$/);
21
+ if (refMatch) {
22
+ var refId = refMatch[1],
23
+ refMap = window.__MCP_ARIA_REFS_REVERSE__;
24
+ if (!refMap) throw new Error('Ref "' + refId + '" not found. Run webview_dom_snapshot first to index elements.');
25
+ var el = refMap.get(refId);
26
+ if (!el) throw new Error('Ref "' + refId + '" not found. The DOM may have changed since the snapshot.');
27
+ return el;
28
+ }
29
+ var el = document.querySelector(selectorOrRef);
30
+ if (!el) throw new Error('Element not found: ' + selectorOrRef);
31
+ return el;
32
+ }
33
+
17
34
  let element = null;
18
35
  let targetX, targetY;
19
36
 
20
37
  // For scroll action, we don't necessarily need a selector or coordinates
21
38
  if (action === 'scroll') {
22
39
  if (selector) {
23
- element = document.querySelector(selector);
24
- if (!element) {
25
- throw new Error(`Element not found: ${selector}`);
26
- }
40
+ element = resolveElement(selector);
27
41
  }
28
42
  } else {
29
43
  // For other actions, we need either selector or coordinates
30
44
  if (selector) {
31
- element = document.querySelector(selector);
32
- if (!element) {
33
- throw new Error(`Element not found: ${selector}`);
34
- }
45
+ element = resolveElement(selector);
35
46
  const rect = element.getBoundingClientRect();
36
47
  targetX = rect.left + rect.width / 2;
37
48
  targetY = rect.top + rect.height / 2;
@@ -3,13 +3,26 @@
3
3
  *
4
4
  * @param {Object} params
5
5
  * @param {string} params.type - What to wait for: 'selector', 'text', 'ipc-event'
6
- * @param {string} params.value - Selector, text, or event name to wait for
6
+ * @param {string} params.value - Selector/ref ID, text, or event name to wait for
7
7
  * @param {number} params.timeout - Timeout in milliseconds
8
8
  */
9
9
  (async function(params) {
10
10
  const { type, value, timeout } = params;
11
11
  const startTime = Date.now();
12
12
 
13
+ // Resolve element from CSS selector or ref ID (e.g., "ref=e3" or "e3")
14
+ function resolveElement(selectorOrRef) {
15
+ if (!selectorOrRef) return null;
16
+ var refMatch = selectorOrRef.match(/^(?:ref=)?(e\d+)$/);
17
+ if (refMatch) {
18
+ var refId = refMatch[1],
19
+ refMap = window.__MCP_ARIA_REFS_REVERSE__;
20
+ if (!refMap) return null; // For wait-for, return null instead of throwing
21
+ return refMap.get(refId) || null;
22
+ }
23
+ return document.querySelector(selectorOrRef);
24
+ }
25
+
13
26
  return new Promise((resolve, reject) => {
14
27
  function check() {
15
28
  if (Date.now() - startTime > timeout) {
@@ -18,7 +31,7 @@
18
31
  }
19
32
 
20
33
  if (type === 'selector') {
21
- const element = document.querySelector(value);
34
+ const element = resolveElement(value);
22
35
  if (element) {
23
36
  resolve(`Element found: ${value}`);
24
37
  return;
@@ -75,7 +75,7 @@ function getAppDiscovery(host) {
75
75
  */
76
76
  export function resolveTargetApp(portOrIdentifier) {
77
77
  if (activeSessions.size === 0) {
78
- throw new Error('No active session. Call tauri_driver_session with action "start" first to connect to a Tauri app.');
78
+ throw new Error('No active session. Call driver_session with action "start" first to connect to a Tauri app.');
79
79
  }
80
80
  // Single app - return it
81
81
  if (activeSessions.size === 1) {
@@ -127,7 +127,7 @@ function formatAppNotFoundError(identifier) {
127
127
  .join('\n');
128
128
  return (`App "${identifier}" not found.\n\n` +
129
129
  `Connected apps:\n${appList}\n\n` +
130
- 'Use tauri_driver_session with action "status" to list all connected apps.');
130
+ 'Use driver_session with action "status" to list all connected apps.');
131
131
  }
132
132
  /**
133
133
  * Promote the next default app when the current default is removed.
@@ -23,13 +23,13 @@ const driverLogger = createMcpLogger('DRIVER');
23
23
  * This is called automatically by all tool functions.
24
24
  *
25
25
  * Initialization includes:
26
- * - Verifying an active session exists (via tauri_driver_session)
26
+ * - Verifying an active session exists (via driver_session)
27
27
  * - Connecting to the plugin WebSocket using session config
28
28
  * - Console capture is already initialized by bridge.js in the Tauri app
29
29
  *
30
30
  * This function is idempotent - calling it multiple times is safe.
31
31
  *
32
- * @throws Error if no session is active (tauri_driver_session must be called first)
32
+ * @throws Error if no session is active (driver_session must be called first)
33
33
  */
34
34
  export async function ensureReady() {
35
35
  if (isInitialized) {
@@ -37,7 +37,7 @@ export async function ensureReady() {
37
37
  }
38
38
  // Require an active session to prevent connecting to wrong app
39
39
  if (!hasActiveSession()) {
40
- throw new Error('No active session. Call tauri_driver_session with action "start" first to connect to a Tauri app.');
40
+ throw new Error('No active session. Call driver_session with action "start" first to connect to a Tauri app.');
41
41
  }
42
42
  // Get default session for initial connection
43
43
  const session = getDefaultSession();
@@ -289,7 +289,7 @@ async function prepareHtml2canvasScript(format, quality) {
289
289
  * @returns Screenshot result with image content
290
290
  */
291
291
  export async function captureScreenshot(options = {}) {
292
- const { format = 'png', quality = 90, windowId, appIdentifier, maxWidth } = options;
292
+ const { format = 'jpeg', quality = 80, windowId, appIdentifier, maxWidth } = options;
293
293
  // Primary implementation: Use native platform-specific APIs
294
294
  // - macOS: WKWebView takeSnapshot
295
295
  // - Windows: WebView2 CapturePreview
@@ -425,6 +425,6 @@ export const GetConsoleLogsSchema = z.object({
425
425
  since: z.string().optional().describe('ISO timestamp to filter logs since'),
426
426
  });
427
427
  export const CaptureScreenshotSchema = z.object({
428
- format: z.enum(['png', 'jpeg']).optional().default('png').describe('Image format'),
428
+ format: z.enum(['png', 'jpeg']).optional().default('jpeg').describe('Image format'),
429
429
  quality: z.number().min(0).max(100).optional().describe('JPEG quality (0-100)'),
430
430
  });
@@ -33,8 +33,8 @@ export const InteractSchema = WindowTargetSchema.extend({
33
33
  toY: z.number().optional().describe('Ending Y coordinate for swipe'),
34
34
  });
35
35
  export const ScreenshotSchema = WindowTargetSchema.extend({
36
- format: z.enum(['png', 'jpeg']).optional().default('png').describe('Image format'),
37
- quality: z.number().min(0).max(100).optional().describe('JPEG quality (0-100, only for jpeg format)'),
36
+ format: z.enum(['png', 'jpeg']).optional().default('jpeg').describe('Image format'),
37
+ quality: z.number().min(0).max(100).optional().default(80).describe('JPEG quality (0-100, only for jpeg format)'),
38
38
  filePath: z.string().optional().describe('File path to save the screenshot to instead of returning as base64'),
39
39
  maxWidth: z.number().int().positive().optional().describe('Maximum width in pixels. Images wider than this will be scaled down proportionally. ' +
40
40
  'Can also be set via TAURI_MCP_SCREENSHOT_MAX_WIDTH environment variable.'),
@@ -75,6 +75,10 @@ export const GetConsoleLogsSchema = WindowTargetSchema.extend({
75
75
  filter: z.string().optional().describe('Regex or keyword to filter logs'),
76
76
  since: z.string().optional().describe('ISO timestamp to filter logs since'),
77
77
  });
78
+ export const DomSnapshotSchema = WindowTargetSchema.extend({
79
+ type: z.enum(['accessibility', 'structure']).describe('Snapshot type'),
80
+ selector: z.string().optional().describe('CSS selector to scope the snapshot. If omitted, snapshots entire document.'),
81
+ });
78
82
  // ============================================================================
79
83
  // Implementation Functions
80
84
  // ============================================================================
@@ -123,7 +127,7 @@ async function performSwipe(options) {
123
127
  }
124
128
  }
125
129
  export async function screenshot(options = {}) {
126
- const { quality, format = 'png', windowId, filePath, appIdentifier, maxWidth } = options;
130
+ const { quality, format = 'jpeg', windowId, filePath, appIdentifier, maxWidth } = options;
127
131
  // Use the native screenshot function from webview-executor
128
132
  const result = await captureScreenshot({ format, quality, windowId, appIdentifier, maxWidth });
129
133
  // If filePath is provided, write to file instead of returning base64
@@ -265,3 +269,35 @@ export async function getConsoleLogs(options = {}) {
265
269
  throw new Error(`Failed to get console logs: ${message}`);
266
270
  }
267
271
  }
272
+ /**
273
+ * Generate a structured DOM snapshot for AI consumption.
274
+ * Uses aria-api for comprehensive, spec-compliant accessibility computation.
275
+ */
276
+ export async function domSnapshot(options) {
277
+ const { type, selector, windowId, appIdentifier } = options;
278
+ // Only load aria-api for accessibility snapshots
279
+ if (type === 'accessibility') {
280
+ await ensureAriaApiLoaded(windowId);
281
+ }
282
+ // Then execute the snapshot script
283
+ const script = buildScript(SCRIPTS.domSnapshot, { type, selector: selector ?? null });
284
+ try {
285
+ return await executeInWebview(script, windowId, appIdentifier);
286
+ }
287
+ catch (error) {
288
+ const message = error instanceof Error ? error.message : String(error);
289
+ throw new Error(`DOM snapshot failed: ${message}`);
290
+ }
291
+ }
292
+ /**
293
+ * Ensure aria-api library is loaded in the webview.
294
+ * Uses the script manager to inject the library if not already present.
295
+ */
296
+ async function ensureAriaApiLoaded(windowId) {
297
+ const { getAriaApiSource, ARIA_API_SCRIPT_ID: ariaApiScriptId } = await import('./scripts/aria-api-loader.js');
298
+ const { registerScript, isScriptRegistered } = await import('./script-manager.js');
299
+ if (await isScriptRegistered(ariaApiScriptId)) {
300
+ return;
301
+ }
302
+ await registerScript(ariaApiScriptId, 'inline', getAriaApiSource(), windowId);
303
+ }
@@ -7,9 +7,9 @@ const FIX_WEBVIEW_ERRORS_PROMPT = `I need help finding and fixing JavaScript err
7
7
 
8
8
  Please follow these steps:
9
9
 
10
- 1. **Start a session** - Use \`tauri_driver_session\` with action "start" to connect to the running Tauri app
10
+ 1. **Start a session** - Use \`driver_session\` with action "start" to connect to the running Tauri app
11
11
 
12
- 2. **Get console logs** - Use \`tauri_read_logs\` with source "console" to retrieve JavaScript errors or warnings
12
+ 2. **Get console logs** - Use \`read_logs\` with source "console" to retrieve JavaScript errors or warnings
13
13
 
14
14
  3. **Analyze the errors** - Look at the error messages, stack traces, and identify:
15
15
  - What type of error it is (TypeError, ReferenceError, SyntaxError, etc.)
@@ -20,7 +20,7 @@ Please follow these steps:
20
20
 
21
21
  5. **Propose a fix** - Explain what's wrong and suggest a concrete fix for each error found
22
22
 
23
- 6. **Stop the session** - Use \`tauri_driver_session\` with action "stop" to clean up
23
+ 6. **Stop the session** - Use \`driver_session\` with action "stop" to clean up
24
24
 
25
25
  If no errors are found, let me know the app is running cleanly.
26
26
 
@@ -83,8 +83,8 @@ Only after I say yes should you make any modifications.
83
83
 
84
84
  Once changes are approved and made:
85
85
  1. Run the Tauri app in development mode (\`cargo tauri dev\`)
86
- 2. Use \`tauri_driver_session\` with action "start" to connect
87
- 3. Use \`tauri_driver_session\` with action "status" to verify
86
+ 2. Use \`driver_session\` with action "start" to connect
87
+ 3. Use \`driver_session\` with action "status" to verify
88
88
 
89
89
  ## Notes
90
90
 
@@ -7,7 +7,7 @@ import { listDevices, ListDevicesSchema } from './manager/mobile.js';
7
7
  import { manageDriverSession, ManageDriverSessionSchema, } from './driver/session-manager.js';
8
8
  import { readLogs, ReadLogsSchema } from './monitor/logs.js';
9
9
  import { executeIPCCommand, manageIPCMonitoring, getIPCEvents, emitTestEvent, getBackendState, manageWindow, ExecuteIPCCommandSchema, ManageIPCMonitoringSchema, GetIPCEventsSchema, EmitTestEventSchema, GetBackendStateSchema, ManageWindowSchema, } from './driver/plugin-commands.js';
10
- import { interact, screenshot, keyboard, waitFor, getStyles, executeJavaScript, findElement, InteractSchema, ScreenshotSchema, KeyboardSchema, WaitForSchema, GetStylesSchema, ExecuteJavaScriptSchema, FindElementSchema, } from './driver/webview-interactions.js';
10
+ import { interact, screenshot, keyboard, waitFor, getStyles, executeJavaScript, findElement, domSnapshot, InteractSchema, ScreenshotSchema, KeyboardSchema, WaitForSchema, GetStylesSchema, ExecuteJavaScriptSchema, FindElementSchema, DomSnapshotSchema, } from './driver/webview-interactions.js';
11
11
  import { PLUGIN_VERSION_CARGO } from './version.js';
12
12
  /**
13
13
  * Standard multi-app description for webview tools.
@@ -84,8 +84,8 @@ Only after the user says yes should you make any modifications.
84
84
 
85
85
  Once changes are approved and made:
86
86
  1. Run the Tauri app in development mode (\`cargo tauri dev\`)
87
- 2. Use \`tauri_driver_session\` with action "start" to connect
88
- 3. Use \`tauri_driver_session\` with action "status" to verify
87
+ 2. Use \`driver_session\` with action "start" to connect
88
+ 3. Use \`driver_session\` with action "status" to verify
89
89
 
90
90
  ## Notes
91
91
 
@@ -100,9 +100,9 @@ Once changes are approved and made:
100
100
  export const TOOLS = [
101
101
  // Setup & Configuration Tools
102
102
  {
103
- name: 'tauri_get_setup_instructions',
103
+ name: 'get_setup_instructions',
104
104
  description: 'Get instructions for setting up or updating the MCP Bridge plugin in a Tauri project. ' +
105
- 'Call this tool when: (1) tauri_driver_session fails to connect, (2) you detect the plugin ' +
105
+ 'Call this tool when: (1) driver_session fails to connect, (2) you detect the plugin ' +
106
106
  'is not installed or outdated, or (3) the user asks about setup. ' +
107
107
  'Returns step-by-step guidance that you should follow to help the user configure their project. ' +
108
108
  'IMPORTANT: The instructions require you to examine the project first and ask for permission ' +
@@ -122,7 +122,7 @@ export const TOOLS = [
122
122
  },
123
123
  // Mobile Development Tools
124
124
  {
125
- name: 'tauri_list_devices',
125
+ name: 'list_devices',
126
126
  description: '[Tauri Mobile Apps Only] List Android emulators/devices and iOS simulators. ' +
127
127
  'Use for Tauri mobile development (tauri android dev, tauri ios dev). ' +
128
128
  'Not needed for desktop-only Tauri apps or web projects.',
@@ -140,7 +140,7 @@ export const TOOLS = [
140
140
  },
141
141
  // UI Automation Tools
142
142
  {
143
- name: 'tauri_driver_session',
143
+ name: 'driver_session',
144
144
  description: '[Tauri Apps Only] Start/stop automation session to connect to a RUNNING Tauri app. ' +
145
145
  'Supports multiple concurrent app connections - each app runs on a unique port. ' +
146
146
  'The most recently connected app becomes the "default" app used when no appIdentifier is specified. ' +
@@ -148,7 +148,7 @@ export const TOOLS = [
148
148
  'or array format with "isDefault" indicator when multiple apps connected. ' +
149
149
  'Action "stop" without appIdentifier stops ALL sessions; with appIdentifier stops only that app. ' +
150
150
  'The identifier field (e.g., "com.example.myapp") uniquely identifies each app. ' +
151
- 'REQUIRED before using other tauri_webview_* or tauri_plugin_* tools. ' +
151
+ 'REQUIRED before using other webview_* or ipc_* tools. ' +
152
152
  'Connects via WebSocket to the MCP Bridge plugin in the Tauri app. ' +
153
153
  'For browser automation, use Chrome DevTools MCP instead. ' +
154
154
  'For Electron apps, this tool will NOT work.',
@@ -167,9 +167,9 @@ export const TOOLS = [
167
167
  },
168
168
  },
169
169
  {
170
- name: 'tauri_webview_find_element',
170
+ name: 'webview_find_element',
171
171
  description: '[Tauri Apps Only] Find DOM elements in a running Tauri app\'s webview. ' +
172
- 'Requires active tauri_driver_session. ' +
172
+ 'Requires active driver_session. ' +
173
173
  MULTI_APP_DESC + ' ' +
174
174
  'For browser pages or documentation sites, use Chrome DevTools MCP instead.',
175
175
  category: TOOL_CATEGORIES.UI_AUTOMATION,
@@ -190,10 +190,10 @@ export const TOOLS = [
190
190
  },
191
191
  },
192
192
  {
193
- name: 'tauri_read_logs',
193
+ name: 'read_logs',
194
194
  description: '[Tauri Apps Only] Read logs from various sources: "console" for webview JS logs, ' +
195
195
  '"android" for logcat, "ios" for simulator logs, "system" for desktop logs. ' +
196
- 'Requires active tauri_driver_session for console logs. ' +
196
+ 'Requires active driver_session for console logs. ' +
197
197
  'Use for debugging Tauri app issues at any level.',
198
198
  category: TOOL_CATEGORIES.UI_AUTOMATION,
199
199
  schema: ReadLogsSchema,
@@ -209,10 +209,10 @@ export const TOOLS = [
209
209
  },
210
210
  // WebView Interaction Tools
211
211
  {
212
- name: 'tauri_webview_interact',
212
+ name: 'webview_interact',
213
213
  description: '[Tauri Apps Only] Click, scroll, swipe, focus, or perform gestures in a Tauri app webview. ' +
214
214
  'Supported actions: click, double-click, long-press, scroll, swipe, focus. ' +
215
- 'Requires active tauri_driver_session. ' +
215
+ 'Requires active driver_session. ' +
216
216
  'For browser interaction, use Chrome DevTools MCP instead.',
217
217
  category: TOOL_CATEGORIES.UI_AUTOMATION,
218
218
  schema: InteractSchema,
@@ -228,9 +228,9 @@ export const TOOLS = [
228
228
  },
229
229
  },
230
230
  {
231
- name: 'tauri_webview_screenshot',
231
+ name: 'webview_screenshot',
232
232
  description: '[Tauri Apps Only] Screenshot a running Tauri app\'s webview. ' +
233
- 'Requires active tauri_driver_session. Captures only visible viewport. ' +
233
+ 'Requires active driver_session. Captures only visible viewport. ' +
234
234
  MULTI_APP_DESC + ' ' +
235
235
  'For browser screenshots, use Chrome DevTools MCP instead. ' +
236
236
  'For Electron apps, this will NOT work.',
@@ -260,9 +260,9 @@ export const TOOLS = [
260
260
  },
261
261
  },
262
262
  {
263
- name: 'tauri_webview_keyboard',
263
+ name: 'webview_keyboard',
264
264
  description: '[Tauri Apps Only] Type text or send keyboard events in a Tauri app. ' +
265
- 'Requires active tauri_driver_session. ' +
265
+ 'Requires active driver_session. ' +
266
266
  MULTI_APP_DESC + ' ' +
267
267
  'For browser keyboard input, use Chrome DevTools MCP instead.',
268
268
  category: TOOL_CATEGORIES.UI_AUTOMATION,
@@ -294,9 +294,9 @@ export const TOOLS = [
294
294
  },
295
295
  },
296
296
  {
297
- name: 'tauri_webview_wait_for',
297
+ name: 'webview_wait_for',
298
298
  description: '[Tauri Apps Only] Wait for elements, text, or IPC events in a Tauri app. ' +
299
- 'Requires active tauri_driver_session. ' +
299
+ 'Requires active driver_session. ' +
300
300
  MULTI_APP_DESC + ' ' +
301
301
  'For browser waits, use Chrome DevTools MCP instead.',
302
302
  category: TOOL_CATEGORIES.UI_AUTOMATION,
@@ -318,9 +318,9 @@ export const TOOLS = [
318
318
  },
319
319
  },
320
320
  {
321
- name: 'tauri_webview_get_styles',
321
+ name: 'webview_get_styles',
322
322
  description: '[Tauri Apps Only] Get computed CSS styles from elements in a Tauri app. ' +
323
- 'Requires active tauri_driver_session. ' +
323
+ 'Requires active driver_session. ' +
324
324
  MULTI_APP_DESC + ' ' +
325
325
  'For browser style inspection, use Chrome DevTools MCP instead.',
326
326
  category: TOOL_CATEGORIES.UI_AUTOMATION,
@@ -342,9 +342,9 @@ export const TOOLS = [
342
342
  },
343
343
  },
344
344
  {
345
- name: 'tauri_webview_execute_js',
345
+ name: 'webview_execute_js',
346
346
  description: '[Tauri Apps Only] Execute JavaScript in a Tauri app\'s webview context. ' +
347
- 'Requires active tauri_driver_session. Has access to window.__TAURI__. ' +
347
+ 'Requires active driver_session. Has access to window.__TAURI__. ' +
348
348
  'If you need a return value, it must be JSON-serializable. ' +
349
349
  'For functions that return values, use an IIFE: "(() => { return 5; })()" not "() => { return 5; }". ' +
350
350
  MULTI_APP_DESC + ' ' +
@@ -367,11 +367,41 @@ export const TOOLS = [
367
367
  });
368
368
  },
369
369
  },
370
+ {
371
+ name: 'webview_dom_snapshot',
372
+ description: '[Tauri Apps Only] Get a structured DOM snapshot of a Tauri app\'s webview. ' +
373
+ 'Supports different snapshot types for AI consumption. ' +
374
+ 'The "accessibility" type returns a YAML representation of the accessibility tree ' +
375
+ 'similar to Playwright\'s aria snapshots, including roles, names, states, and element refs. ' +
376
+ 'Use this for understanding UI semantics, finding interactive elements, or accessibility testing. ' +
377
+ 'The "structure" type returns a YAML representation of the DOM hierarchy ' +
378
+ 'with element tag names, IDs, CSS classes, and data-testid attributes (if present). ' +
379
+ 'Use this for understanding page layout, debugging CSS selectors, or locating elements by class/ID. ' +
380
+ 'Use the optional selector parameter to scope the snapshot to a subtree. ' +
381
+ 'Requires active driver_session. ' +
382
+ MULTI_APP_DESC,
383
+ category: TOOL_CATEGORIES.UI_AUTOMATION,
384
+ schema: DomSnapshotSchema,
385
+ annotations: {
386
+ title: 'DOM Snapshot',
387
+ readOnlyHint: true,
388
+ openWorldHint: false,
389
+ },
390
+ handler: async (args) => {
391
+ const parsed = DomSnapshotSchema.parse(args);
392
+ return await domSnapshot({
393
+ type: parsed.type,
394
+ selector: parsed.selector,
395
+ windowId: parsed.windowId,
396
+ appIdentifier: parsed.appIdentifier,
397
+ });
398
+ },
399
+ },
370
400
  // IPC & Plugin Tools
371
401
  {
372
- name: 'tauri_ipc_execute_command',
402
+ name: 'ipc_execute_command',
373
403
  description: '[Tauri Apps Only] Execute Tauri IPC commands (invoke Rust backend functions). ' +
374
- 'Requires active tauri_driver_session. This is Tauri-specific IPC, not browser APIs. ' +
404
+ 'Requires active driver_session. This is Tauri-specific IPC, not browser APIs. ' +
375
405
  'For Electron IPC or browser APIs, use appropriate tools for those frameworks.',
376
406
  category: TOOL_CATEGORIES.IPC_PLUGIN,
377
407
  schema: ExecuteIPCCommandSchema,
@@ -391,9 +421,9 @@ export const TOOLS = [
391
421
  },
392
422
  },
393
423
  {
394
- name: 'tauri_ipc_monitor',
424
+ name: 'ipc_monitor',
395
425
  description: '[Tauri Apps Only] Monitor Tauri IPC calls between frontend and Rust backend. ' +
396
- 'Requires active tauri_driver_session. Captures invoke() calls and responses. ' +
426
+ 'Requires active driver_session. Captures invoke() calls and responses. ' +
397
427
  'This is Tauri-specific; for browser network monitoring, use Chrome DevTools MCP.',
398
428
  category: TOOL_CATEGORIES.IPC_PLUGIN,
399
429
  schema: ManageIPCMonitoringSchema,
@@ -410,7 +440,7 @@ export const TOOLS = [
410
440
  },
411
441
  },
412
442
  {
413
- name: 'tauri_ipc_get_captured',
443
+ name: 'ipc_get_captured',
414
444
  description: '[Tauri Apps Only] Get captured Tauri IPC traffic (requires ipc_monitor started). ' +
415
445
  'Shows captured commands (invoke calls) and events with arguments and responses. ' +
416
446
  'For browser network requests, use Chrome DevTools MCP instead.',
@@ -427,9 +457,9 @@ export const TOOLS = [
427
457
  },
428
458
  },
429
459
  {
430
- name: 'tauri_ipc_emit_event',
460
+ name: 'ipc_emit_event',
431
461
  description: '[Tauri Apps Only] Emit a Tauri event to test event handlers. ' +
432
- 'Requires active tauri_driver_session. Events are Tauri-specific (not DOM events). ' +
462
+ 'Requires active driver_session. Events are Tauri-specific (not DOM events). ' +
433
463
  'For browser DOM events, use Chrome DevTools MCP instead.',
434
464
  category: TOOL_CATEGORIES.IPC_PLUGIN,
435
465
  schema: EmitTestEventSchema,
@@ -445,9 +475,9 @@ export const TOOLS = [
445
475
  },
446
476
  },
447
477
  {
448
- name: 'tauri_ipc_get_backend_state',
478
+ name: 'ipc_get_backend_state',
449
479
  description: '[Tauri Apps Only] Get Tauri backend state: app metadata, Tauri version, environment. ' +
450
- 'Requires active tauri_driver_session. ' +
480
+ 'Requires active driver_session. ' +
451
481
  'Use to verify you\'re connected to a Tauri app and get app info.',
452
482
  category: TOOL_CATEGORIES.IPC_PLUGIN,
453
483
  schema: GetBackendStateSchema,
@@ -463,12 +493,12 @@ export const TOOLS = [
463
493
  },
464
494
  // Window Management Tools
465
495
  {
466
- name: 'tauri_manage_window',
496
+ name: 'manage_window',
467
497
  description: '[Tauri Apps Only] Manage Tauri windows. Actions: ' +
468
498
  '"list" - List all windows with labels, titles, URLs, and state. ' +
469
499
  '"info" - Get detailed info for a window (size, position, title, focus, visibility). ' +
470
500
  '"resize" - Resize a window (requires width/height, uses logical pixels by default). ' +
471
- 'Requires active tauri_driver_session. ' +
501
+ 'Requires active driver_session. ' +
472
502
  'For browser windows, use Chrome DevTools MCP instead.',
473
503
  category: TOOL_CATEGORIES.UI_AUTOMATION,
474
504
  schema: ManageWindowSchema,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hypothesi/tauri-mcp-server",
3
- "version": "0.7.0",
3
+ "version": "0.8.0",
4
4
  "mcpName": "io.github.hypothesi/mcp-server-tauri",
5
5
  "description": "A Model Context Protocol server for use with Tauri v2 applications",
6
6
  "type": "module",
@@ -8,6 +8,7 @@
8
8
  "mcp-server-tauri": "./dist/index.js"
9
9
  },
10
10
  "scripts": {
11
+ "prebuild": "tsx scripts/bundle-aria-api.ts",
11
12
  "build": "tsc && cp -r src/driver/scripts/*.js dist/driver/scripts/ && chmod +x dist/index.js",
12
13
  "start": "node dist/index.js",
13
14
  "test": "vitest run",
@@ -35,14 +36,18 @@
35
36
  "model-context-protocol",
36
37
  "tauri",
37
38
  "automation",
39
+ "screenshot",
40
+ "verification",
38
41
  "testing",
39
42
  "debugging",
40
43
  "cursor",
41
44
  "windsurf",
42
- "vscode"
45
+ "vscode",
46
+ "claude-code"
43
47
  ],
44
48
  "dependencies": {
45
49
  "@modelcontextprotocol/sdk": "0.6.1",
50
+ "aria-api": "0.8.0",
46
51
  "execa": "9.6.0",
47
52
  "html2canvas": "1.4.1",
48
53
  "ws": "8.18.3",
@@ -53,6 +58,7 @@
53
58
  "@types/html2canvas": "0.5.35",
54
59
  "@types/node": "22.19.1",
55
60
  "@types/ws": "8.18.1",
61
+ "esbuild": "0.25.12",
56
62
  "vitest": "4.0.13"
57
63
  },
58
64
  "publishConfig": {