@hypothesi/tauri-mcp-server 0.9.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Script Manager - Manages persistent script injection across page navigations.
3
+ *
4
+ * This module provides functions to register, remove, and manage scripts that
5
+ * should be automatically re-injected when pages load or navigate.
6
+ *
7
+ * @internal This module is for internal use only and is not exposed as MCP tools.
8
+ */
9
+ /**
10
+ * Type of script to inject.
11
+ */
12
+ export type ScriptType = 'inline' | 'url';
13
+ /**
14
+ * A script entry in the registry.
15
+ */
16
+ export interface ScriptEntry {
17
+ /** Unique identifier for this script. */
18
+ id: string;
19
+ /** Type of script (inline code or external URL). */
20
+ type: ScriptType;
21
+ /** The script content (JavaScript code) or URL. */
22
+ content: string;
23
+ }
24
+ /**
25
+ * Response from script registration.
26
+ */
27
+ interface RegisterScriptResponse {
28
+ registered: boolean;
29
+ scriptId: string;
30
+ }
31
+ /**
32
+ * Response from script removal.
33
+ */
34
+ interface RemoveScriptResponse {
35
+ removed: boolean;
36
+ scriptId: string;
37
+ }
38
+ /**
39
+ * Response from clearing scripts.
40
+ */
41
+ interface ClearScriptsResponse {
42
+ cleared: number;
43
+ }
44
+ /**
45
+ * Response from getting scripts.
46
+ */
47
+ interface GetScriptsResponse {
48
+ scripts: ScriptEntry[];
49
+ }
50
+ /**
51
+ * Registers a script to be injected into the webview.
52
+ *
53
+ * The script will be immediately injected if the page is loaded, and will be
54
+ * automatically re-injected on subsequent page loads/navigations.
55
+ *
56
+ * @param id - Unique identifier for the script
57
+ * @param type - Type of script ('inline' for code, 'url' for external script)
58
+ * @param content - The script content (JavaScript code) or URL
59
+ * @param windowLabel - Optional window label to target
60
+ * @returns Promise resolving to registration result
61
+ */
62
+ export declare function registerScript(id: string, type: ScriptType, content: string, windowLabel?: string, appIdentifier?: string | number): Promise<RegisterScriptResponse>;
63
+ /**
64
+ * Removes a script from the registry and DOM.
65
+ *
66
+ * @param id - The script ID to remove
67
+ * @param windowLabel - Optional window label to target
68
+ * @returns Promise resolving to removal result
69
+ */
70
+ export declare function removeScript(id: string, windowLabel?: string, appIdentifier?: string | number): Promise<RemoveScriptResponse>;
71
+ /**
72
+ * Clears all registered scripts from the registry and DOM.
73
+ *
74
+ * @param windowLabel - Optional window label to target
75
+ * @returns Promise resolving to the number of scripts cleared
76
+ */
77
+ export declare function clearScripts(windowLabel?: string, appIdentifier?: string | number): Promise<ClearScriptsResponse>;
78
+ /**
79
+ * Gets all registered scripts.
80
+ *
81
+ * @returns Promise resolving to the list of registered scripts
82
+ */
83
+ export declare function getScripts(appIdentifier?: string | number): Promise<GetScriptsResponse>;
84
+ /**
85
+ * Checks if a script with the given ID is registered.
86
+ *
87
+ * @param id - The script ID to check
88
+ * @returns Promise resolving to true if the script is registered
89
+ */
90
+ export declare function isScriptRegistered(id: string, appIdentifier?: string | number): Promise<boolean>;
91
+ export {};
@@ -19,8 +19,8 @@ import { ensureSessionAndConnect } from './plugin-client.js';
19
19
  * @param windowLabel - Optional window label to target
20
20
  * @returns Promise resolving to registration result
21
21
  */
22
- export async function registerScript(id, type, content, windowLabel) {
23
- const client = await ensureSessionAndConnect();
22
+ export async function registerScript(id, type, content, windowLabel, appIdentifier) {
23
+ const client = await ensureSessionAndConnect(appIdentifier);
24
24
  const response = await client.sendCommand({
25
25
  command: 'register_script',
26
26
  args: { id, type, content, windowLabel },
@@ -37,8 +37,8 @@ export async function registerScript(id, type, content, windowLabel) {
37
37
  * @param windowLabel - Optional window label to target
38
38
  * @returns Promise resolving to removal result
39
39
  */
40
- export async function removeScript(id, windowLabel) {
41
- const client = await ensureSessionAndConnect();
40
+ export async function removeScript(id, windowLabel, appIdentifier) {
41
+ const client = await ensureSessionAndConnect(appIdentifier);
42
42
  const response = await client.sendCommand({
43
43
  command: 'remove_script',
44
44
  args: { id, windowLabel },
@@ -54,8 +54,8 @@ export async function removeScript(id, windowLabel) {
54
54
  * @param windowLabel - Optional window label to target
55
55
  * @returns Promise resolving to the number of scripts cleared
56
56
  */
57
- export async function clearScripts(windowLabel) {
58
- const client = await ensureSessionAndConnect();
57
+ export async function clearScripts(windowLabel, appIdentifier) {
58
+ const client = await ensureSessionAndConnect(appIdentifier);
59
59
  const response = await client.sendCommand({
60
60
  command: 'clear_scripts',
61
61
  args: { windowLabel },
@@ -70,8 +70,8 @@ export async function clearScripts(windowLabel) {
70
70
  *
71
71
  * @returns Promise resolving to the list of registered scripts
72
72
  */
73
- export async function getScripts() {
74
- const client = await ensureSessionAndConnect();
73
+ export async function getScripts(appIdentifier) {
74
+ const client = await ensureSessionAndConnect(appIdentifier);
75
75
  const response = await client.sendCommand({
76
76
  command: 'get_scripts',
77
77
  args: {},
@@ -87,7 +87,7 @@ export async function getScripts() {
87
87
  * @param id - The script ID to check
88
88
  * @returns Promise resolving to true if the script is registered
89
89
  */
90
- export async function isScriptRegistered(id) {
91
- const { scripts } = await getScripts();
90
+ export async function isScriptRegistered(id, appIdentifier) {
91
+ const { scripts } = await getScripts(appIdentifier);
92
92
  return scripts.some((s) => { return s.id === id; });
93
93
  }
@@ -0,0 +1,17 @@
1
+ /**
2
+ * aria-api library loader
3
+ *
4
+ * Bundles aria-api for browser injection. Provides comprehensive W3C-compliant
5
+ * accessibility computation including:
6
+ * - WAI-ARIA 1.3 role computation
7
+ * - HTML-AAM 1.0 implicit role mappings
8
+ * - Accessible Name and Description Computation 1.2
9
+ * - aria-owns relationship handling
10
+ */
11
+ /** Script ID used for the aria-api library in the script registry. */
12
+ export declare const ARIA_API_SCRIPT_ID = "__mcp_aria_api__";
13
+ /**
14
+ * Get the aria-api library source code.
15
+ * Loaded lazily and cached.
16
+ */
17
+ export declare function getAriaApiSource(): string;
@@ -0,0 +1,25 @@
1
+ /**
2
+ * html2canvas-pro library loader
3
+ *
4
+ * Loads the html2canvas-pro library from node_modules and provides it as a string
5
+ * that can be injected into the webview. html2canvas-pro is a fork of html2canvas
6
+ * that adds support for modern CSS color functions like oklch(), oklab(), lab(),
7
+ * lch(), and color().
8
+ */
9
+ /** Script ID used for the html2canvas library in the script registry. */
10
+ export declare const HTML2CANVAS_SCRIPT_ID = "__mcp_html2canvas__";
11
+ /**
12
+ * Get the html2canvas-pro library source code.
13
+ * Loaded lazily and cached.
14
+ */
15
+ export declare function getHtml2CanvasSource(): string;
16
+ /**
17
+ * Build a script that captures a screenshot using html2canvas.
18
+ * Assumes html2canvas is already loaded (either via script manager or inline).
19
+ */
20
+ export declare function buildScreenshotCaptureScript(format: 'png' | 'jpeg', quality: number): string;
21
+ /**
22
+ * Build a script that injects html2canvas and captures a screenshot.
23
+ * This is the legacy function that inlines the library - kept for fallback.
24
+ */
25
+ export declare function buildScreenshotScript(format: 'png' | 'jpeg', quality: number): string;
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Script loader for webview injection scripts
3
+ *
4
+ * These scripts are loaded at build time and injected into the webview at runtime.
5
+ * Each script is an IIFE that accepts a params object.
6
+ */
7
+ export declare const SCRIPTS: {
8
+ readonly resolveRef: string;
9
+ readonly interact: string;
10
+ readonly swipe: string;
11
+ readonly keyboard: string;
12
+ readonly waitFor: string;
13
+ readonly getStyles: string;
14
+ readonly focus: string;
15
+ readonly findElement: string;
16
+ readonly domSnapshot: string;
17
+ readonly elementPicker: string;
18
+ };
19
+ /** Script ID used for resolve-ref in the script registry. */
20
+ export declare const RESOLVE_REF_SCRIPT_ID = "__mcp_resolve_ref__";
21
+ /**
22
+ * Get the resolve-ref script source code.
23
+ */
24
+ export declare function getResolveRefSource(): string;
25
+ /**
26
+ * Build a script invocation with parameters
27
+ * The script should be an IIFE that accepts a params object
28
+ */
29
+ export declare function buildScript(script: string, params: Record<string, unknown>): string;
30
+ /**
31
+ * Build a script for typing text (uses the keyboard script's typeText function)
32
+ */
33
+ export declare function buildTypeScript(selector: string, text: string, strategy?: string): string;
34
+ /**
35
+ * Build a script for key events (press, down, up)
36
+ */
37
+ export declare function buildKeyEventScript(action: string, key: string, modifiers?: string[]): string;
@@ -56,8 +56,24 @@ export function buildTypeScript(selector, text, strategy) {
56
56
  if (!element) throw new Error('Element not found: ' + selector);
57
57
 
58
58
  element.focus();
59
- element.value = text;
60
- element.dispatchEvent(new Event('input', { bubbles: true }));
59
+
60
+ // Use native prototype setter to bypass React's value tracker
61
+ var proto = element.tagName === 'TEXTAREA'
62
+ ? HTMLTextAreaElement.prototype
63
+ : HTMLInputElement.prototype;
64
+ var descriptor = Object.getOwnPropertyDescriptor(proto, 'value');
65
+
66
+ if (descriptor && descriptor.set) {
67
+ descriptor.set.call(element, text);
68
+ } else {
69
+ element.value = text;
70
+ }
71
+
72
+ // Reset React's internal value tracker so it detects the change
73
+ if (element._valueTracker) element._valueTracker.setValue('');
74
+
75
+ // Dispatch proper InputEvent (not generic Event) for React compatibility
76
+ element.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'insertText', data: text }));
61
77
  element.dispatchEvent(new Event('change', { bubbles: true }));
62
78
 
63
79
  var msg = 'Typed "' + text + '" into ' + selector;
@@ -47,9 +47,22 @@
47
47
  }
48
48
 
49
49
  if (strategy === 'text') {
50
+ // First try: match element text content
50
51
  var xpath = xpathForText(selectorOrRef);
51
52
  var result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
52
- return result.singleNodeValue;
53
+ if (result.singleNodeValue) return result.singleNodeValue;
54
+
55
+ // Fallback: search placeholder, aria-label, and title attributes
56
+ var attrSelectors = [
57
+ '[placeholder*="' + selectorOrRef.replace(/"/g, '\\"') + '"]',
58
+ '[aria-label*="' + selectorOrRef.replace(/"/g, '\\"') + '"]',
59
+ '[title*="' + selectorOrRef.replace(/"/g, '\\"') + '"]',
60
+ ];
61
+ for (var i = 0; i < attrSelectors.length; i++) {
62
+ var el = document.querySelector(attrSelectors[i]);
63
+ if (el) return el;
64
+ }
65
+ return null;
53
66
  }
54
67
 
55
68
  if (strategy === 'xpath') {
@@ -78,12 +91,25 @@
78
91
  }
79
92
 
80
93
  if (strategy === 'text') {
94
+ // First try: match element text content
81
95
  var xpath = xpathForText(selector);
82
96
  var snapshot = document.evaluate(xpath, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
83
97
  var results = [];
84
98
  for (var i = 0; i < snapshot.snapshotLength; i++) {
85
99
  results.push(snapshot.snapshotItem(i));
86
100
  }
101
+ if (results.length > 0) return results;
102
+
103
+ // Fallback: search placeholder, aria-label, and title attributes
104
+ var attrSelectors = [
105
+ '[placeholder*="' + selector.replace(/"/g, '\\"') + '"]',
106
+ '[aria-label*="' + selector.replace(/"/g, '\\"') + '"]',
107
+ '[title*="' + selector.replace(/"/g, '\\"') + '"]',
108
+ ];
109
+ for (var i = 0; i < attrSelectors.length; i++) {
110
+ var found = Array.from(document.querySelectorAll(attrSelectors[i]));
111
+ if (found.length > 0) return results.concat(found);
112
+ }
87
113
  return results;
88
114
  }
89
115
 
@@ -0,0 +1,76 @@
1
+ import { z } from 'zod';
2
+ import { PluginClient } from './plugin-client.js';
3
+ /**
4
+ * Session Manager - Native IPC-based session management
5
+ *
6
+ * This module provides lightweight native session management using Tauri IPC.
7
+ * The "session" concept is maintained for API compatibility.
8
+ *
9
+ * Connection Strategy:
10
+ * 1. Try localhost first (most reliable for simulators/emulators/desktop)
11
+ * 2. If localhost fails and a remote host is configured, try that host
12
+ * 3. Return error if all connection attempts fail
13
+ */
14
+ export declare const ManageDriverSessionSchema: z.ZodObject<{
15
+ action: z.ZodEnum<["start", "stop", "status"]>;
16
+ host: z.ZodOptional<z.ZodString>;
17
+ port: z.ZodOptional<z.ZodNumber>;
18
+ appIdentifier: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
19
+ }, "strip", z.ZodTypeAny, {
20
+ action: "status" | "start" | "stop";
21
+ host?: string | undefined;
22
+ port?: number | undefined;
23
+ appIdentifier?: string | number | undefined;
24
+ }, {
25
+ action: "status" | "start" | "stop";
26
+ host?: string | undefined;
27
+ port?: number | undefined;
28
+ appIdentifier?: string | number | undefined;
29
+ }>;
30
+ export interface SessionInfo {
31
+ name: string;
32
+ identifier: string | null;
33
+ host: string;
34
+ port: number;
35
+ client: PluginClient;
36
+ connected: boolean;
37
+ }
38
+ /**
39
+ * Check if any session is currently active.
40
+ * @returns true if at least one session exists
41
+ */
42
+ export declare function hasActiveSession(): boolean;
43
+ /**
44
+ * Get a specific session by port.
45
+ */
46
+ export declare function getSession(port: number): SessionInfo | null;
47
+ /**
48
+ * Get the default session (most recently connected).
49
+ */
50
+ export declare function getDefaultSession(): SessionInfo | null;
51
+ /**
52
+ * Get all active sessions.
53
+ */
54
+ export declare function getAllSessions(): SessionInfo[];
55
+ /**
56
+ * Resolve target app from port or identifier.
57
+ * Returns the appropriate session based on the routing logic.
58
+ */
59
+ export declare function resolveTargetApp(portOrIdentifier?: string | number): SessionInfo;
60
+ /**
61
+ * Manage session lifecycle (start, stop, or status).
62
+ *
63
+ * Connection strategy for 'start':
64
+ * 1. Try localhost:{port} first (most reliable for simulators/emulators/desktop)
65
+ * 2. If localhost fails AND a different host is configured, try {host}:{port}
66
+ * 3. If both fail, try auto-discovery on localhost
67
+ * 4. Return error if all attempts fail
68
+ *
69
+ * @param action - 'start', 'stop', or 'status'
70
+ * @param host - Optional host address (defaults to env var or localhost)
71
+ * @param port - Optional port number (defaults to 9223)
72
+ * @param appIdentifier - Optional app identifier for 'stop' action (port or bundle ID)
73
+ * @returns For 'start'/'stop': A message string describing the result.
74
+ * For 'status': A JSON string with connection details
75
+ */
76
+ export declare function manageDriverSession(action: 'start' | 'stop' | 'status', host?: string, port?: number, appIdentifier?: string | number): Promise<string>;
@@ -0,0 +1,122 @@
1
+ import { z } from 'zod';
2
+ /**
3
+ * Ensures the MCP server is fully initialized and ready to use.
4
+ * This is called automatically by all tool functions.
5
+ *
6
+ * Initialization includes:
7
+ * - Verifying an active session exists (via driver_session)
8
+ * - Connecting to the plugin WebSocket using session config
9
+ * - Console capture is already initialized by bridge.js in the Tauri app
10
+ *
11
+ * This function is idempotent - calling it multiple times is safe.
12
+ *
13
+ * @throws Error if no session is active (driver_session must be called first)
14
+ */
15
+ export declare function ensureReady(windowId?: string, appIdentifier?: string | number): Promise<void>;
16
+ /**
17
+ * Reset initialization state (useful for testing or reconnecting).
18
+ */
19
+ export declare function resetInitialization(): void;
20
+ export interface ExecuteInWebviewResult {
21
+ result: string;
22
+ windowLabel: string;
23
+ warning?: string;
24
+ }
25
+ /**
26
+ * Execute JavaScript in the Tauri webview using native IPC via WebSocket.
27
+ *
28
+ * @param script - JavaScript code to execute in the webview context
29
+ * @param windowId - Optional window label to target (defaults to "main")
30
+ * @param appIdentifier - Optional app identifier to target specific app
31
+ * @returns Result of the script execution with window context
32
+ */
33
+ export declare function executeInWebview(script: string, windowId?: string, appIdentifier?: string | number): Promise<string>;
34
+ /**
35
+ * Execute JavaScript in the Tauri webview and return window context.
36
+ *
37
+ * @param script - JavaScript code to execute in the webview context
38
+ * @param windowId - Optional window label to target (defaults to "main")
39
+ * @param appIdentifier - Optional app identifier to target specific app
40
+ * @returns Result of the script execution with window context
41
+ */
42
+ export declare function executeInWebviewWithContext(script: string, windowId?: string, appIdentifier?: string | number): Promise<ExecuteInWebviewResult>;
43
+ /**
44
+ * Execute async JavaScript in the webview with timeout support.
45
+ *
46
+ * @param script - JavaScript code to execute (can use await)
47
+ * @param windowId - Optional window label to target (defaults to "main")
48
+ * @param timeout - Timeout in milliseconds (default: 5000)
49
+ * @returns Result of the script execution
50
+ */
51
+ export declare function executeAsyncInWebview(script: string, windowId?: string, timeout?: number, appIdentifier?: string | number): Promise<string>;
52
+ /**
53
+ * Initialize console log capture in the webview.
54
+ * This intercepts console methods and stores logs in memory.
55
+ *
56
+ * NOTE: Console capture is now automatically initialized by bridge.js when the
57
+ * Tauri app starts. This function is kept for backwards compatibility and will
58
+ * simply return early if capture is already initialized.
59
+ */
60
+ export declare function initializeConsoleCapture(): Promise<string>;
61
+ /**
62
+ * Retrieve captured console logs with optional filtering.
63
+ *
64
+ * @param filter - Optional regex pattern to filter log messages
65
+ * @param since - Optional ISO timestamp to filter logs after this time
66
+ * @param windowId - Optional window label to target (defaults to "main")
67
+ * @param appIdentifier - Optional app identifier to target specific app
68
+ * @returns Formatted console logs as string
69
+ */
70
+ export declare function getConsoleLogs(filter?: string, since?: string, windowId?: string, appIdentifier?: string | number): Promise<string>;
71
+ /**
72
+ * Clear all captured console logs.
73
+ */
74
+ export declare function clearConsoleLogs(): Promise<string>;
75
+ import type { ToolContent } from '../tools-registry.js';
76
+ /**
77
+ * Result of a screenshot capture, containing both image data and optional context.
78
+ */
79
+ export interface ScreenshotResult {
80
+ content: ToolContent[];
81
+ }
82
+ export interface CaptureScreenshotOptions {
83
+ format?: 'png' | 'jpeg';
84
+ quality?: number;
85
+ windowId?: string;
86
+ appIdentifier?: string | number;
87
+ maxWidth?: number;
88
+ }
89
+ /**
90
+ * Capture a screenshot of the entire webview.
91
+ *
92
+ * @param options - Screenshot options (format, quality, windowId, appIdentifier, etc.)
93
+ * @returns Screenshot result with image content
94
+ */
95
+ export declare function captureScreenshot(options?: CaptureScreenshotOptions): Promise<ScreenshotResult>;
96
+ export declare const ExecuteScriptSchema: z.ZodObject<{
97
+ script: z.ZodString;
98
+ }, "strip", z.ZodTypeAny, {
99
+ script: string;
100
+ }, {
101
+ script: string;
102
+ }>;
103
+ export declare const GetConsoleLogsSchema: z.ZodObject<{
104
+ filter: z.ZodOptional<z.ZodString>;
105
+ since: z.ZodOptional<z.ZodString>;
106
+ }, "strip", z.ZodTypeAny, {
107
+ filter?: string | undefined;
108
+ since?: string | undefined;
109
+ }, {
110
+ filter?: string | undefined;
111
+ since?: string | undefined;
112
+ }>;
113
+ export declare const CaptureScreenshotSchema: z.ZodObject<{
114
+ format: z.ZodDefault<z.ZodOptional<z.ZodEnum<["png", "jpeg"]>>>;
115
+ quality: z.ZodOptional<z.ZodNumber>;
116
+ }, "strip", z.ZodTypeAny, {
117
+ format: "png" | "jpeg";
118
+ quality?: number | undefined;
119
+ }, {
120
+ format?: "png" | "jpeg" | undefined;
121
+ quality?: number | undefined;
122
+ }>;
@@ -1,6 +1,5 @@
1
1
  import { z } from 'zod';
2
- import { connectPlugin } from './plugin-client.js';
3
- import { hasActiveSession, getDefaultSession, resolveTargetApp } from './session-manager.js';
2
+ import { hasActiveSession, resolveTargetApp, manageDriverSession } from './session-manager.js';
4
3
  import { createMcpLogger } from '../logger.js';
5
4
  import { buildScreenshotScript, buildScreenshotCaptureScript, getHtml2CanvasSource, HTML2CANVAS_SCRIPT_ID, } from './scripts/html2canvas-loader.js';
6
5
  import { registerScript, isScriptRegistered } from './script-manager.js';
@@ -17,7 +16,7 @@ import { getResolveRefSource, RESOLVE_REF_SCRIPT_ID } from './scripts/index.js';
17
16
  // ============================================================================
18
17
  // Auto-Initialization System
19
18
  // ============================================================================
20
- let isInitialized = false;
19
+ const initializedTargets = new Set();
21
20
  const driverLogger = createMcpLogger('DRIVER');
22
21
  /**
23
22
  * Ensures the MCP server is fully initialized and ready to use.
@@ -32,28 +31,52 @@ const driverLogger = createMcpLogger('DRIVER');
32
31
  *
33
32
  * @throws Error if no session is active (driver_session must be called first)
34
33
  */
35
- export async function ensureReady() {
36
- if (isInitialized) {
37
- return;
38
- }
39
- // Require an active session to prevent connecting to wrong app
34
+ export async function ensureReady(windowId, appIdentifier) {
35
+ // Auto-connect if no active session
40
36
  if (!hasActiveSession()) {
41
- throw new Error('No active session. Call driver_session with action "start" first to connect to a Tauri app.');
37
+ const result = await manageDriverSession('start');
38
+ if (!hasActiveSession()) {
39
+ throw new Error('Auto-connect failed: ' + result + '. Call driver_session with action "start" to connect manually.');
40
+ }
42
41
  }
43
- // Get default session for initial connection
44
- const session = getDefaultSession();
45
- if (session) {
46
- await connectPlugin(session.host, session.port);
42
+ const session = resolveTargetApp(appIdentifier);
43
+ if (!session.client.isConnected()) {
44
+ await session.client.connect();
47
45
  }
48
- // Register the resolve-ref helper so ref-based selectors work in all tools
49
- await registerScript(RESOLVE_REF_SCRIPT_ID, 'inline', getResolveRefSource());
50
- isInitialized = true;
46
+ const targetKey = `${session.host}:${session.port}:${windowId ?? 'main'}`;
47
+ if (initializedTargets.has(targetKey)) {
48
+ return;
49
+ }
50
+ // Register the resolve-ref helper in the target window
51
+ // so ref-based selectors work there.
52
+ await registerScript(RESOLVE_REF_SCRIPT_ID, 'inline', getResolveRefSource(), windowId, appIdentifier);
53
+ await waitForResolveRefHelper(session, windowId);
54
+ initializedTargets.add(targetKey);
51
55
  }
52
56
  /**
53
57
  * Reset initialization state (useful for testing or reconnecting).
54
58
  */
55
59
  export function resetInitialization() {
56
- isInitialized = false;
60
+ initializedTargets.clear();
61
+ }
62
+ async function waitForResolveRefHelper(session, windowId) {
63
+ if (!session) {
64
+ throw new Error('No active session available while registering resolve-ref helper.');
65
+ }
66
+ for (let attempt = 0; attempt < 20; attempt++) {
67
+ const response = await session.client.sendCommand({
68
+ command: 'execute_js',
69
+ args: {
70
+ script: 'return !!(window.__MCP__ && typeof window.__MCP__.resolveRef === "function")',
71
+ windowLabel: windowId,
72
+ },
73
+ }, 2000);
74
+ if (response.success && response.data === true) {
75
+ return;
76
+ }
77
+ await new Promise((resolve) => { return setTimeout(resolve, 50); });
78
+ }
79
+ throw new Error('Resolve-ref helper was not available in the webview after registration.');
57
80
  }
58
81
  /**
59
82
  * Execute JavaScript in the Tauri webview using native IPC via WebSocket.
@@ -78,7 +101,7 @@ export async function executeInWebview(script, windowId, appIdentifier) {
78
101
  export async function executeInWebviewWithContext(script, windowId, appIdentifier) {
79
102
  try {
80
103
  // Ensure we're fully initialized
81
- await ensureReady();
104
+ await ensureReady(windowId, appIdentifier);
82
105
  // Resolve target session
83
106
  const session = resolveTargetApp(appIdentifier);
84
107
  const client = session.client;
@@ -124,11 +147,12 @@ export async function executeInWebviewWithContext(script, windowId, appIdentifie
124
147
  * @param timeout - Timeout in milliseconds (default: 5000)
125
148
  * @returns Result of the script execution
126
149
  */
127
- export async function executeAsyncInWebview(script, windowId, timeout = 5000) {
150
+ export async function executeAsyncInWebview(script, windowId, timeout, appIdentifier) {
151
+ const resolvedTimeout = timeout ?? 5000;
128
152
  const wrappedScript = `
129
153
  return (async () => {
130
154
  const timeoutPromise = new Promise((_, reject) => {
131
- setTimeout(() => reject(new Error('Script execution timeout')), ${timeout});
155
+ setTimeout(() => reject(new Error('Script execution timeout')), ${resolvedTimeout});
132
156
  });
133
157
 
134
158
  const scriptPromise = (async () => {
@@ -138,7 +162,7 @@ export async function executeAsyncInWebview(script, windowId, timeout = 5000) {
138
162
  return await Promise.race([scriptPromise, timeoutPromise]);
139
163
  })();
140
164
  `;
141
- return executeInWebview(wrappedScript, windowId);
165
+ return executeInWebview(wrappedScript, windowId, appIdentifier);
142
166
  }
143
167
  // ============================================================================
144
168
  // Console Log Capture System
@@ -268,14 +292,14 @@ function buildScreenshotResult(dataUrl, method, windowContext) {
268
292
  * Prepares the html2canvas script for screenshot capture.
269
293
  * Tries to use the script manager for persistence, falls back to inline injection.
270
294
  */
271
- async function prepareHtml2canvasScript(format, quality) {
295
+ async function prepareHtml2canvasScript(format, quality, windowId, appIdentifier) {
272
296
  try {
273
297
  // Check if html2canvas is already registered
274
- const isRegistered = await isScriptRegistered(HTML2CANVAS_SCRIPT_ID);
298
+ const isRegistered = await isScriptRegistered(HTML2CANVAS_SCRIPT_ID, appIdentifier);
275
299
  if (!isRegistered) {
276
300
  // Register html2canvas via script manager for persistence across navigations
277
301
  const html2canvasSource = getHtml2CanvasSource();
278
- await registerScript(HTML2CANVAS_SCRIPT_ID, 'inline', html2canvasSource);
302
+ await registerScript(HTML2CANVAS_SCRIPT_ID, 'inline', html2canvasSource, windowId, appIdentifier);
279
303
  }
280
304
  // Use the capture-only script since html2canvas is now registered
281
305
  return buildScreenshotCaptureScript(format, quality);
@@ -299,7 +323,7 @@ export async function captureScreenshot(options = {}) {
299
323
  // - Linux: Chromium/WebKit screenshot APIs
300
324
  try {
301
325
  // Ensure we're fully initialized
302
- await ensureReady();
326
+ await ensureReady(windowId, appIdentifier);
303
327
  // Resolve target session
304
328
  const session = resolveTargetApp(appIdentifier);
305
329
  const client = session.client;
@@ -331,7 +355,7 @@ export async function captureScreenshot(options = {}) {
331
355
  }
332
356
  // Fallback 1: Use html2canvas library for high-quality DOM rendering
333
357
  // Try to use the script manager to register html2canvas for persistence
334
- const html2canvasScript = await prepareHtml2canvasScript(format, quality);
358
+ const html2canvasScript = await prepareHtml2canvasScript(format, quality, windowId, appIdentifier);
335
359
  // Fallback: Try Screen Capture API if available
336
360
  // Note: This script is wrapped by executeAsyncInWebview, so we don't need an IIFE
337
361
  const screenCaptureScript = `
@@ -390,7 +414,7 @@ export async function captureScreenshot(options = {}) {
390
414
  `;
391
415
  try {
392
416
  // Try html2canvas second (after native APIs)
393
- const result = await executeAsyncInWebview(html2canvasScript, undefined, 10000); // Longer timeout for library loading
417
+ const result = await executeAsyncInWebview(html2canvasScript, windowId, 10000, appIdentifier);
394
418
  // Validate that we got a real data URL, not 'null' or empty
395
419
  if (result && result !== 'null' && result.startsWith('data:image/')) {
396
420
  return buildScreenshotResult(result, 'html2canvas');
@@ -400,7 +424,7 @@ export async function captureScreenshot(options = {}) {
400
424
  catch (html2canvasError) {
401
425
  try {
402
426
  // Fallback to Screen Capture API
403
- const result = await executeAsyncInWebview(screenCaptureScript);
427
+ const result = await executeAsyncInWebview(screenCaptureScript, windowId, 5000, appIdentifier);
404
428
  // Validate that we got a real data URL
405
429
  if (result && result.startsWith('data:image/')) {
406
430
  return buildScreenshotResult(result, 'Screen Capture API');