@hypothesi/tauri-mcp-server 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,86 @@
1
+ /**
2
+ * html2canvas library loader
3
+ *
4
+ * Loads the html2canvas library from node_modules and provides it as a string
5
+ * that can be injected into the webview.
6
+ */
7
+ import { readFileSync } from 'fs';
8
+ import { createRequire } from 'module';
9
+ // Use createRequire to resolve the path to html2canvas in node_modules
10
+ const require = createRequire(import.meta.url);
11
+ let html2canvasSource = null;
12
+ /**
13
+ * Get the html2canvas library source code.
14
+ * Loaded lazily and cached.
15
+ */
16
+ export function getHtml2CanvasSource() {
17
+ if (html2canvasSource === null) {
18
+ // Resolve the path to html2canvas.min.js
19
+ const html2canvasPath = require.resolve('html2canvas/dist/html2canvas.min.js');
20
+ html2canvasSource = readFileSync(html2canvasPath, 'utf-8');
21
+ }
22
+ return html2canvasSource;
23
+ }
24
+ /**
25
+ * Build a script that injects html2canvas and captures a screenshot.
26
+ */
27
+ export function buildScreenshotScript(format, quality) {
28
+ const html2canvas = getHtml2CanvasSource();
29
+ // Note: This script is wrapped by executeAsyncInWebview, so we don't need an IIFE
30
+ // The wrapper adds: (async () => { const scriptPromise = (async () => { ...script... })(); ... })()
31
+ return `
32
+ try {
33
+ // Inject html2canvas if not already present
34
+ // The library uses UMD and may set on globalThis, self, or window
35
+ if (typeof html2canvas === 'undefined') {
36
+ ${html2canvas}
37
+ // After loading, html2canvas should be on globalThis/self/window
38
+ }
39
+
40
+ // Get the html2canvas function (may be on window, self, or globalThis)
41
+ const html2canvasFn = typeof html2canvas !== 'undefined' ? html2canvas :
42
+ (typeof window !== 'undefined' && window.html2canvas) ? window.html2canvas :
43
+ (typeof self !== 'undefined' && self.html2canvas) ? self.html2canvas :
44
+ (typeof globalThis !== 'undefined' && globalThis.html2canvas) ? globalThis.html2canvas : null;
45
+
46
+ if (!html2canvasFn) {
47
+ throw new Error('html2canvas failed to load - function not found on any global');
48
+ }
49
+
50
+ // Capture the entire document
51
+ const element = document.documentElement;
52
+ if (!element) {
53
+ throw new Error('document.documentElement is null');
54
+ }
55
+
56
+ // Configure html2canvas options
57
+ const options = {
58
+ backgroundColor: null,
59
+ scale: window.devicePixelRatio || 1,
60
+ logging: false,
61
+ useCORS: true,
62
+ allowTaint: false,
63
+ imageTimeout: 5000,
64
+ };
65
+
66
+ // Capture the webview
67
+ const canvas = await html2canvasFn(element, options);
68
+ if (!canvas) {
69
+ throw new Error('html2canvas returned null canvas');
70
+ }
71
+
72
+ // Convert to data URL
73
+ const mimeType = '${format}' === 'jpeg' ? 'image/jpeg' : 'image/png';
74
+ const dataUrl = canvas.toDataURL(mimeType, ${quality / 100});
75
+
76
+ if (!dataUrl || !dataUrl.startsWith('data:image/')) {
77
+ throw new Error('canvas.toDataURL returned invalid result: ' + (dataUrl ? dataUrl.substring(0, 50) : 'null'));
78
+ }
79
+
80
+ return dataUrl;
81
+ } catch (screenshotError) {
82
+ // Re-throw with more context
83
+ throw new Error('Screenshot capture failed: ' + (screenshotError.message || String(screenshotError)));
84
+ }
85
+ `;
86
+ }
@@ -0,0 +1,94 @@
1
+ /**
2
+ * Script loader for webview injection scripts
3
+ *
4
+ * These scripts are loaded at build time and injected into the webview at runtime.
5
+ * Each script is an IIFE that accepts a params object.
6
+ */
7
+ import { readFileSync } from 'fs';
8
+ import { dirname, join } from 'path';
9
+ import { fileURLToPath } from 'url';
10
+ const currentDir = dirname(fileURLToPath(import.meta.url));
11
+ function loadScript(name) {
12
+ return readFileSync(join(currentDir, `${name}.js`), 'utf-8');
13
+ }
14
+ // Load scripts once at module initialization
15
+ export const SCRIPTS = {
16
+ interact: loadScript('interact'),
17
+ swipe: loadScript('swipe'),
18
+ keyboard: loadScript('keyboard'),
19
+ waitFor: loadScript('wait-for'),
20
+ getStyles: loadScript('get-styles'),
21
+ focus: loadScript('focus'),
22
+ findElement: loadScript('find-element'),
23
+ };
24
+ /**
25
+ * Build a script invocation with parameters
26
+ * The script should be an IIFE that accepts a params object
27
+ */
28
+ export function buildScript(script, params) {
29
+ return `(${script})(${JSON.stringify(params)})`;
30
+ }
31
+ /**
32
+ * Build a script for typing text (uses the keyboard script's typeText function)
33
+ */
34
+ export function buildTypeScript(selector, text) {
35
+ const escapedText = text.replace(/\\/g, '\\\\').replace(/'/g, "\\'");
36
+ return `
37
+ (function() {
38
+ const selector = '${selector}';
39
+ const text = '${escapedText}';
40
+
41
+ const element = document.querySelector(selector);
42
+ if (!element) {
43
+ throw new Error('Element not found: ' + selector);
44
+ }
45
+
46
+ element.focus();
47
+ element.value = text;
48
+ element.dispatchEvent(new Event('input', { bubbles: true }));
49
+ element.dispatchEvent(new Event('change', { bubbles: true }));
50
+
51
+ return 'Typed "' + text + '" into ' + selector;
52
+ })()
53
+ `;
54
+ }
55
+ /**
56
+ * Build a script for key events (press, down, up)
57
+ */
58
+ export function buildKeyEventScript(action, key, modifiers = []) {
59
+ return `
60
+ (function() {
61
+ const action = '${action}';
62
+ const key = '${key}';
63
+ const modifiers = ${JSON.stringify(modifiers)};
64
+
65
+ const eventOptions = {
66
+ key: key,
67
+ code: key,
68
+ bubbles: true,
69
+ cancelable: true,
70
+ ctrlKey: modifiers.includes('Control'),
71
+ altKey: modifiers.includes('Alt'),
72
+ shiftKey: modifiers.includes('Shift'),
73
+ metaKey: modifiers.includes('Meta'),
74
+ };
75
+
76
+ const activeElement = document.activeElement || document.body;
77
+
78
+ if (action === 'press') {
79
+ activeElement.dispatchEvent(new KeyboardEvent('keydown', eventOptions));
80
+ activeElement.dispatchEvent(new KeyboardEvent('keypress', eventOptions));
81
+ activeElement.dispatchEvent(new KeyboardEvent('keyup', eventOptions));
82
+ return 'Pressed key: ' + key + (modifiers.length ? ' with ' + modifiers.join('+') : '');
83
+ } else if (action === 'down') {
84
+ activeElement.dispatchEvent(new KeyboardEvent('keydown', eventOptions));
85
+ return 'Key down: ' + key + (modifiers.length ? ' with ' + modifiers.join('+') : '');
86
+ } else if (action === 'up') {
87
+ activeElement.dispatchEvent(new KeyboardEvent('keyup', eventOptions));
88
+ return 'Key up: ' + key + (modifiers.length ? ' with ' + modifiers.join('+') : '');
89
+ }
90
+
91
+ throw new Error('Unknown action: ' + action);
92
+ })()
93
+ `;
94
+ }
@@ -0,0 +1,103 @@
1
+ /**
2
+ * Webview interaction script - handles click, double-click, long-press, and scroll actions
3
+ * This script is injected into the webview and executed with parameters.
4
+ *
5
+ * @param {Object} params
6
+ * @param {string} params.action - The action to perform
7
+ * @param {string|null} params.selector - CSS selector for the element
8
+ * @param {number|null} params.x - X coordinate
9
+ * @param {number|null} params.y - Y coordinate
10
+ * @param {number} params.duration - Duration for long-press
11
+ * @param {number} params.scrollX - Horizontal scroll amount
12
+ * @param {number} params.scrollY - Vertical scroll amount
13
+ */
14
+ (function(params) {
15
+ const { action, selector, x, y, duration, scrollX, scrollY } = params;
16
+
17
+ let element = null;
18
+ let targetX, targetY;
19
+
20
+ // For scroll action, we don't necessarily need a selector or coordinates
21
+ if (action === 'scroll') {
22
+ if (selector) {
23
+ element = document.querySelector(selector);
24
+ if (!element) {
25
+ throw new Error(`Element not found: ${selector}`);
26
+ }
27
+ }
28
+ } else {
29
+ // For other actions, we need either selector or coordinates
30
+ if (selector) {
31
+ element = document.querySelector(selector);
32
+ if (!element) {
33
+ throw new Error(`Element not found: ${selector}`);
34
+ }
35
+ const rect = element.getBoundingClientRect();
36
+ targetX = rect.left + rect.width / 2;
37
+ targetY = rect.top + rect.height / 2;
38
+ } else if (x !== null && y !== null) {
39
+ targetX = x;
40
+ targetY = y;
41
+ element = document.elementFromPoint(x, y);
42
+ } else {
43
+ throw new Error('Either selector or coordinates (x, y) must be provided');
44
+ }
45
+ }
46
+
47
+ // Perform the interaction
48
+ const eventOptions = {
49
+ bubbles: true,
50
+ cancelable: true,
51
+ view: window,
52
+ clientX: targetX,
53
+ clientY: targetY,
54
+ };
55
+
56
+ if (action === 'click') {
57
+ if (element) {
58
+ element.dispatchEvent(new MouseEvent('mousedown', eventOptions));
59
+ element.dispatchEvent(new MouseEvent('mouseup', eventOptions));
60
+ element.dispatchEvent(new MouseEvent('click', eventOptions));
61
+ }
62
+ return `Clicked at (${targetX}, ${targetY})`;
63
+ }
64
+
65
+ if (action === 'double-click') {
66
+ if (element) {
67
+ element.dispatchEvent(new MouseEvent('mousedown', eventOptions));
68
+ element.dispatchEvent(new MouseEvent('mouseup', eventOptions));
69
+ element.dispatchEvent(new MouseEvent('click', eventOptions));
70
+ element.dispatchEvent(new MouseEvent('mousedown', eventOptions));
71
+ element.dispatchEvent(new MouseEvent('mouseup', eventOptions));
72
+ element.dispatchEvent(new MouseEvent('click', eventOptions));
73
+ element.dispatchEvent(new MouseEvent('dblclick', eventOptions));
74
+ }
75
+ return `Double-clicked at (${targetX}, ${targetY})`;
76
+ }
77
+
78
+ if (action === 'long-press') {
79
+ if (element) {
80
+ element.dispatchEvent(new MouseEvent('mousedown', eventOptions));
81
+ setTimeout(() => {
82
+ element.dispatchEvent(new MouseEvent('mouseup', eventOptions));
83
+ }, duration);
84
+ }
85
+ return `Long-pressed at (${targetX}, ${targetY}) for ${duration}ms`;
86
+ }
87
+
88
+ if (action === 'scroll') {
89
+ const scrollTarget = element || window;
90
+ if (scrollX !== 0 || scrollY !== 0) {
91
+ if (scrollTarget === window) {
92
+ window.scrollBy(scrollX, scrollY);
93
+ } else {
94
+ scrollTarget.scrollLeft += scrollX;
95
+ scrollTarget.scrollTop += scrollY;
96
+ }
97
+ return `Scrolled by (${scrollX}, ${scrollY}) pixels`;
98
+ }
99
+ return 'No scroll performed (scrollX and scrollY are both 0)';
100
+ }
101
+
102
+ throw new Error(`Unknown action: ${action}`);
103
+ })
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Keyboard interaction scripts
3
+ */
4
+
5
+ /**
6
+ * Type text into an element
7
+ * @param {Object} params
8
+ * @param {string} params.selector - CSS selector for the element
9
+ * @param {string} params.text - Text to type
10
+ */
11
+ function typeText(params) {
12
+ const { selector, text } = params;
13
+
14
+ const element = document.querySelector(selector);
15
+ if (!element) {
16
+ throw new Error(`Element not found: ${selector}`);
17
+ }
18
+
19
+ // Focus the element
20
+ element.focus();
21
+
22
+ // Set the value
23
+ element.value = text;
24
+
25
+ // Trigger input event for frameworks that listen to it
26
+ element.dispatchEvent(new Event('input', { bubbles: true }));
27
+ element.dispatchEvent(new Event('change', { bubbles: true }));
28
+
29
+ return `Typed "${text}" into ${selector}`;
30
+ }
31
+
32
+ /**
33
+ * Send keyboard events (press, down, up)
34
+ * @param {Object} params
35
+ * @param {string} params.action - 'press', 'down', or 'up'
36
+ * @param {string} params.key - Key to press
37
+ * @param {string[]} params.modifiers - Modifier keys
38
+ */
39
+ function keyEvent(params) {
40
+ const { action, key, modifiers } = params;
41
+
42
+ const eventOptions = {
43
+ key: key,
44
+ code: key,
45
+ bubbles: true,
46
+ cancelable: true,
47
+ ctrlKey: modifiers.includes('Control'),
48
+ altKey: modifiers.includes('Alt'),
49
+ shiftKey: modifiers.includes('Shift'),
50
+ metaKey: modifiers.includes('Meta'),
51
+ };
52
+
53
+ const activeElement = document.activeElement || document.body;
54
+
55
+ if (action === 'press') {
56
+ activeElement.dispatchEvent(new KeyboardEvent('keydown', eventOptions));
57
+ activeElement.dispatchEvent(new KeyboardEvent('keypress', eventOptions));
58
+ activeElement.dispatchEvent(new KeyboardEvent('keyup', eventOptions));
59
+ return `Pressed key: ${key}${modifiers.length ? ' with ' + modifiers.join('+') : ''}`;
60
+ }
61
+
62
+ if (action === 'down') {
63
+ activeElement.dispatchEvent(new KeyboardEvent('keydown', eventOptions));
64
+ return `Key down: ${key}${modifiers.length ? ' with ' + modifiers.join('+') : ''}`;
65
+ }
66
+
67
+ if (action === 'up') {
68
+ activeElement.dispatchEvent(new KeyboardEvent('keyup', eventOptions));
69
+ return `Key up: ${key}${modifiers.length ? ' with ' + modifiers.join('+') : ''}`;
70
+ }
71
+
72
+ throw new Error(`Unknown action: ${action}`);
73
+ }
74
+
75
+ // Export for use
76
+ ({ typeText, keyEvent })
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Swipe gesture script - handles touch/mouse swipe actions
3
+ * Falls back to mouse events on desktop environments
4
+ *
5
+ * @param {Object} params
6
+ * @param {number} params.fromX - Starting X coordinate
7
+ * @param {number} params.fromY - Starting Y coordinate
8
+ * @param {number} params.toX - Ending X coordinate
9
+ * @param {number} params.toY - Ending Y coordinate
10
+ * @param {number} params.duration - Duration of swipe in ms
11
+ */
12
+ (function(params) {
13
+ const { fromX, fromY, toX, toY, duration } = params;
14
+
15
+ const element = document.elementFromPoint(fromX, fromY) || document.body;
16
+
17
+ function simulateWithMouse() {
18
+ const mouseDown = new MouseEvent('mousedown', {
19
+ clientX: fromX,
20
+ clientY: fromY,
21
+ bubbles: true,
22
+ cancelable: true,
23
+ });
24
+
25
+ const mouseMove = new MouseEvent('mousemove', {
26
+ clientX: toX,
27
+ clientY: toY,
28
+ bubbles: true,
29
+ cancelable: true,
30
+ });
31
+
32
+ const mouseUp = new MouseEvent('mouseup', {
33
+ clientX: toX,
34
+ clientY: toY,
35
+ bubbles: true,
36
+ cancelable: true,
37
+ });
38
+
39
+ element.dispatchEvent(mouseDown);
40
+ setTimeout(() => {
41
+ element.dispatchEvent(mouseMove);
42
+ element.dispatchEvent(mouseUp);
43
+ }, duration);
44
+ }
45
+
46
+ // Check if TouchEvent is available (mobile/touch devices)
47
+ if (typeof TouchEvent !== 'undefined') {
48
+ try {
49
+ const touchStart = new TouchEvent('touchstart', {
50
+ touches: [{
51
+ clientX: fromX,
52
+ clientY: fromY,
53
+ target: element,
54
+ }],
55
+ });
56
+
57
+ const touchMove = new TouchEvent('touchmove', {
58
+ touches: [{
59
+ clientX: toX,
60
+ clientY: toY,
61
+ target: element,
62
+ }],
63
+ });
64
+
65
+ const touchEnd = new TouchEvent('touchend', {
66
+ changedTouches: [{
67
+ clientX: toX,
68
+ clientY: toY,
69
+ target: element,
70
+ }],
71
+ });
72
+
73
+ element.dispatchEvent(touchStart);
74
+ setTimeout(() => {
75
+ element.dispatchEvent(touchMove);
76
+ element.dispatchEvent(touchEnd);
77
+ }, duration);
78
+ } catch (e) {
79
+ // Fallback to mouse events if TouchEvent construction fails
80
+ simulateWithMouse();
81
+ }
82
+ } else {
83
+ // Use mouse events for desktop
84
+ simulateWithMouse();
85
+ }
86
+
87
+ return `Swiped from (${fromX}, ${fromY}) to (${toX}, ${toY}) in ${duration}ms`;
88
+ })
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Wait for conditions script - waits for selectors, text, or events
3
+ *
4
+ * @param {Object} params
5
+ * @param {string} params.type - What to wait for: 'selector', 'text', 'ipc-event'
6
+ * @param {string} params.value - Selector, text, or event name to wait for
7
+ * @param {number} params.timeout - Timeout in milliseconds
8
+ */
9
+ (async function(params) {
10
+ const { type, value, timeout } = params;
11
+ const startTime = Date.now();
12
+
13
+ return new Promise((resolve, reject) => {
14
+ function check() {
15
+ if (Date.now() - startTime > timeout) {
16
+ reject(new Error(`Timeout waiting for ${type}: ${value}`));
17
+ return;
18
+ }
19
+
20
+ if (type === 'selector') {
21
+ const element = document.querySelector(value);
22
+ if (element) {
23
+ resolve(`Element found: ${value}`);
24
+ return;
25
+ }
26
+ } else if (type === 'text') {
27
+ const found = document.body.innerText.includes(value);
28
+ if (found) {
29
+ resolve(`Text found: ${value}`);
30
+ return;
31
+ }
32
+ } else if (type === 'ipc-event') {
33
+ // For IPC events, we'd need to set up a listener
34
+ // This is a simplified version
35
+ reject(new Error('IPC event waiting not yet implemented in this context'));
36
+ return;
37
+ }
38
+
39
+ setTimeout(check, 100);
40
+ }
41
+
42
+ check();
43
+ });
44
+ })
@@ -0,0 +1,121 @@
1
+ import { z } from 'zod';
2
+ import { getDefaultHost, getDefaultPort } from '../config.js';
3
+ import { AppDiscovery } from './app-discovery.js';
4
+ import { resetPluginClient } from './plugin-client.js';
5
+ import { resetInitialization } from './webview-executor.js';
6
+ /**
7
+ * Session Manager - Native IPC-based session management
8
+ *
9
+ * This module provides lightweight native session management using Tauri IPC.
10
+ * The "session" concept is maintained for API compatibility.
11
+ *
12
+ * Connection Strategy:
13
+ * 1. Try localhost first (most reliable for simulators/emulators/desktop)
14
+ * 2. If localhost fails and a remote host is configured, try that host
15
+ * 3. Return error if all connection attempts fail
16
+ */
17
+ // ============================================================================
18
+ // Schemas
19
+ // ============================================================================
20
+ export const ManageDriverSessionSchema = z.object({
21
+ action: z.enum(['start', 'stop']).describe('Action to perform: start or stop the session'),
22
+ host: z.string().optional().describe('Host address to connect to (e.g., 192.168.1.100). Falls back to MCP_BRIDGE_HOST or TAURI_DEV_HOST env vars'),
23
+ port: z.number().optional().describe('Port to connect to (default: 9223)'),
24
+ });
25
+ // ============================================================================
26
+ // Module State
27
+ // ============================================================================
28
+ // AppDiscovery instance - recreated when host changes
29
+ let appDiscovery = null;
30
+ function getAppDiscovery(host) {
31
+ if (!appDiscovery || appDiscovery.host !== host) {
32
+ appDiscovery = new AppDiscovery(host);
33
+ }
34
+ return appDiscovery;
35
+ }
36
+ // ============================================================================
37
+ // Session Management
38
+ // ============================================================================
39
+ /**
40
+ * Try to connect to a specific host and port.
41
+ * Returns session info on success, throws on failure.
42
+ */
43
+ async function tryConnect(host, port) {
44
+ const discovery = getAppDiscovery(host);
45
+ const session = await discovery.connectToPort(port, undefined, host);
46
+ return {
47
+ name: session.name,
48
+ host: session.host,
49
+ port: session.port,
50
+ };
51
+ }
52
+ /**
53
+ * Manage session lifecycle (start or stop).
54
+ *
55
+ * Connection strategy for 'start':
56
+ * 1. Try localhost:{port} first (most reliable for simulators/emulators/desktop)
57
+ * 2. If localhost fails AND a different host is configured, try {host}:{port}
58
+ * 3. If both fail, try auto-discovery on localhost
59
+ * 4. Return error if all attempts fail
60
+ *
61
+ * @param action - 'start' or 'stop'
62
+ * @param host - Optional host address (defaults to env var or localhost)
63
+ * @param port - Optional port number (defaults to 9223)
64
+ */
65
+ export async function manageDriverSession(action, host, port) {
66
+ if (action === 'start') {
67
+ // Reset any existing plugin client to ensure fresh connection
68
+ resetPluginClient();
69
+ const configuredHost = host ?? getDefaultHost();
70
+ const configuredPort = port ?? getDefaultPort();
71
+ // Strategy 1: Try localhost first (most reliable)
72
+ if (configuredHost !== 'localhost' && configuredHost !== '127.0.0.1') {
73
+ try {
74
+ const session = await tryConnect('localhost', configuredPort);
75
+ return `Session started with app: ${session.name} (localhost:${session.port})`;
76
+ }
77
+ catch {
78
+ // Localhost failed, will try configured host next
79
+ }
80
+ }
81
+ // Strategy 2: Try the configured/provided host
82
+ try {
83
+ const session = await tryConnect(configuredHost, configuredPort);
84
+ return `Session started with app: ${session.name} (${session.host}:${session.port})`;
85
+ }
86
+ catch {
87
+ // Configured host failed
88
+ }
89
+ // Strategy 3: Auto-discover on localhost (scan port range)
90
+ const localhostDiscovery = getAppDiscovery('localhost');
91
+ const firstApp = await localhostDiscovery.getFirstAvailableApp();
92
+ if (firstApp) {
93
+ try {
94
+ // Reset client again to connect to discovered port
95
+ resetPluginClient();
96
+ const session = await tryConnect('localhost', firstApp.port);
97
+ return `Session started with app: ${session.name} (localhost:${session.port})`;
98
+ }
99
+ catch {
100
+ // Discovery found app but connection failed
101
+ }
102
+ }
103
+ // Strategy 4: Try default port on configured host as last resort
104
+ try {
105
+ resetPluginClient();
106
+ const session = await tryConnect(configuredHost, configuredPort);
107
+ return `Session started with app: ${session.name} (${session.host}:${session.port})`;
108
+ }
109
+ catch {
110
+ // All attempts failed
111
+ return `Session started (native IPC mode - no Tauri app found at localhost or ${configuredHost}:${configuredPort})`;
112
+ }
113
+ }
114
+ // Stop action - disconnect all apps and reset initialization state
115
+ if (appDiscovery) {
116
+ await appDiscovery.disconnectAll();
117
+ }
118
+ resetPluginClient();
119
+ resetInitialization();
120
+ return 'Session stopped';
121
+ }