@hypothesi/tauri-mcp-server 0.8.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/dist/driver/element-picker.js +272 -0
- package/dist/driver/scripts/dom-snapshot.js +13 -14
- package/dist/driver/scripts/element-picker.js +395 -0
- package/dist/driver/scripts/find-element.js +5 -30
- package/dist/driver/scripts/focus.js +8 -4
- package/dist/driver/scripts/get-styles.js +13 -16
- package/dist/driver/scripts/html2canvas-loader.js +5 -1
- package/dist/driver/scripts/index.js +11 -4
- package/dist/driver/scripts/interact.js +15 -7
- package/dist/driver/scripts/resolve-ref.js +92 -3
- package/dist/driver/scripts/wait-for.js +12 -8
- package/dist/driver/webview-interactions.js +35 -17
- package/dist/prompts-registry.js +46 -0
- package/dist/tools-registry.js +71 -110
- package/package.json +1 -1
|
@@ -1,16 +1,43 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Shared ref resolver - always available via window.__MCP__.resolveRef.
|
|
3
|
-
* Accepts a ref ID ("e3", "ref=e3", "[ref=e3]")
|
|
3
|
+
* Accepts a ref ID ("e3", "ref=e3", "[ref=e3]"), CSS selector, XPath, or text.
|
|
4
4
|
* Returns the DOM element, or null if not found.
|
|
5
5
|
*
|
|
6
6
|
* Reads window.__MCP__.reverseRefs dynamically at call time so it always
|
|
7
7
|
* uses the latest snapshot's data.
|
|
8
|
+
*
|
|
9
|
+
* Also provides:
|
|
10
|
+
* - resolveAll(selector, strategy) - returns an Array of matching elements
|
|
11
|
+
* - countAll(selector, strategy) - returns the total match count
|
|
8
12
|
*/
|
|
9
13
|
(function() {
|
|
10
14
|
window.__MCP__ = window.__MCP__ || {};
|
|
11
|
-
|
|
15
|
+
|
|
16
|
+
var REF_PATTERN = /^\[?(?:ref=)?(e\d+)\]?$/;
|
|
17
|
+
|
|
18
|
+
function xpathForText(text) {
|
|
19
|
+
// Escape single quotes for XPath by splitting on ' and using concat()
|
|
20
|
+
if (text.indexOf("'") === -1) {
|
|
21
|
+
return "//*[contains(text(), '" + text + "')]";
|
|
22
|
+
}
|
|
23
|
+
var parts = text.split("'");
|
|
24
|
+
var expr = 'concat(' + parts.map(function(p, i) {
|
|
25
|
+
return (i > 0 ? ",\"'\",": '') + "'" + p + "'";
|
|
26
|
+
}).join('') + ')';
|
|
27
|
+
return '//*[contains(text(), ' + expr + ')]';
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Resolve a single element by selector and strategy.
|
|
32
|
+
* @param {string} selectorOrRef - Selector, ref ID, XPath, or text
|
|
33
|
+
* @param {string} [strategy] - 'css' (default), 'xpath', or 'text'
|
|
34
|
+
* @returns {Element|null}
|
|
35
|
+
*/
|
|
36
|
+
window.__MCP__.resolveRef = function(selectorOrRef, strategy) {
|
|
12
37
|
if (!selectorOrRef) return null;
|
|
13
|
-
|
|
38
|
+
|
|
39
|
+
// Ref IDs always take priority regardless of strategy
|
|
40
|
+
var refMatch = selectorOrRef.match(REF_PATTERN);
|
|
14
41
|
if (refMatch) {
|
|
15
42
|
var reverseRefs = window.__MCP__.reverseRefs;
|
|
16
43
|
if (!reverseRefs) {
|
|
@@ -18,6 +45,68 @@
|
|
|
18
45
|
}
|
|
19
46
|
return reverseRefs.get(refMatch[1]) || null;
|
|
20
47
|
}
|
|
48
|
+
|
|
49
|
+
if (strategy === 'text') {
|
|
50
|
+
var xpath = xpathForText(selectorOrRef);
|
|
51
|
+
var result = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
|
52
|
+
return result.singleNodeValue;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
if (strategy === 'xpath') {
|
|
56
|
+
var result = document.evaluate(selectorOrRef, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
|
57
|
+
return result.singleNodeValue;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Default: CSS selector
|
|
21
61
|
return document.querySelector(selectorOrRef);
|
|
22
62
|
};
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Resolve all matching elements as an Array.
|
|
66
|
+
* @param {string} selector - Selector, XPath, or text
|
|
67
|
+
* @param {string} [strategy] - 'css' (default), 'xpath', or 'text'
|
|
68
|
+
* @returns {Element[]}
|
|
69
|
+
*/
|
|
70
|
+
window.__MCP__.resolveAll = function(selector, strategy) {
|
|
71
|
+
if (!selector) return [];
|
|
72
|
+
|
|
73
|
+
// Ref IDs resolve to a single element
|
|
74
|
+
var refMatch = selector.match(REF_PATTERN);
|
|
75
|
+
if (refMatch) {
|
|
76
|
+
var el = window.__MCP__.resolveRef(selector);
|
|
77
|
+
return el ? [el] : [];
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (strategy === 'text') {
|
|
81
|
+
var xpath = xpathForText(selector);
|
|
82
|
+
var snapshot = document.evaluate(xpath, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
|
83
|
+
var results = [];
|
|
84
|
+
for (var i = 0; i < snapshot.snapshotLength; i++) {
|
|
85
|
+
results.push(snapshot.snapshotItem(i));
|
|
86
|
+
}
|
|
87
|
+
return results;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (strategy === 'xpath') {
|
|
91
|
+
var snapshot = document.evaluate(selector, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
|
92
|
+
var results = [];
|
|
93
|
+
for (var i = 0; i < snapshot.snapshotLength; i++) {
|
|
94
|
+
results.push(snapshot.snapshotItem(i));
|
|
95
|
+
}
|
|
96
|
+
return results;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Default: CSS
|
|
100
|
+
return Array.from(document.querySelectorAll(selector));
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Count all matching elements.
|
|
105
|
+
* @param {string} selector - Selector, XPath, or text
|
|
106
|
+
* @param {string} [strategy] - 'css' (default), 'xpath', or 'text'
|
|
107
|
+
* @returns {number}
|
|
108
|
+
*/
|
|
109
|
+
window.__MCP__.countAll = function(selector, strategy) {
|
|
110
|
+
return window.__MCP__.resolveAll(selector, strategy).length;
|
|
111
|
+
};
|
|
23
112
|
})();
|
|
@@ -4,34 +4,38 @@
|
|
|
4
4
|
* @param {Object} params
|
|
5
5
|
* @param {string} params.type - What to wait for: 'selector', 'text', 'ipc-event'
|
|
6
6
|
* @param {string} params.value - Selector/ref ID, text, or event name to wait for
|
|
7
|
+
* @param {string} params.strategy - Selector strategy (applies when type is 'selector'): 'css', 'xpath', or 'text'
|
|
7
8
|
* @param {number} params.timeout - Timeout in milliseconds
|
|
8
9
|
*/
|
|
9
10
|
(async function(params) {
|
|
10
|
-
const { type, value, timeout } = params;
|
|
11
|
+
const { type, value, strategy, timeout } = params;
|
|
11
12
|
const startTime = Date.now();
|
|
12
13
|
|
|
13
14
|
function resolveElement(selectorOrRef) {
|
|
14
15
|
if (!selectorOrRef) return null;
|
|
15
|
-
return window.__MCP__.resolveRef(selectorOrRef);
|
|
16
|
+
return window.__MCP__.resolveRef(selectorOrRef, strategy);
|
|
16
17
|
}
|
|
17
18
|
|
|
18
|
-
return new Promise((resolve, reject)
|
|
19
|
+
return new Promise(function(resolve, reject) {
|
|
19
20
|
function check() {
|
|
20
21
|
if (Date.now() - startTime > timeout) {
|
|
21
|
-
reject(new Error(
|
|
22
|
+
reject(new Error('Timeout waiting for ' + type + ': ' + value));
|
|
22
23
|
return;
|
|
23
24
|
}
|
|
24
25
|
|
|
25
26
|
if (type === 'selector') {
|
|
26
|
-
|
|
27
|
+
var element = resolveElement(value);
|
|
27
28
|
if (element) {
|
|
28
|
-
|
|
29
|
+
var msg = 'Element found: ' + value;
|
|
30
|
+
var count = window.__MCP__.countAll(value, strategy);
|
|
31
|
+
if (count > 1) msg += ' (+' + (count - 1) + ' more match' + (count - 1 === 1 ? '' : 'es') + ')';
|
|
32
|
+
resolve(msg);
|
|
29
33
|
return;
|
|
30
34
|
}
|
|
31
35
|
} else if (type === 'text') {
|
|
32
|
-
|
|
36
|
+
var found = document.body.innerText.includes(value);
|
|
33
37
|
if (found) {
|
|
34
|
-
resolve(
|
|
38
|
+
resolve('Text found: ' + value);
|
|
35
39
|
return;
|
|
36
40
|
}
|
|
37
41
|
} else if (type === 'ipc-event') {
|
|
@@ -15,12 +15,22 @@ export const WindowTargetSchema = z.object({
|
|
|
15
15
|
appIdentifier: z.union([z.string(), z.number()]).optional().describe('App port or bundle ID to target. Defaults to the only connected app or the default app if multiple are connected.'),
|
|
16
16
|
});
|
|
17
17
|
// ============================================================================
|
|
18
|
+
// Shared Selector Strategy
|
|
19
|
+
// ============================================================================
|
|
20
|
+
/**
|
|
21
|
+
* Reusable strategy field for tools that accept a selector.
|
|
22
|
+
* Defaults to 'css' for backward compatibility.
|
|
23
|
+
*/
|
|
24
|
+
const selectorStrategyField = z.enum(['css', 'xpath', 'text']).default('css').describe('Selector strategy: "css" (default) for CSS selectors, "xpath" for XPath expressions, ' +
|
|
25
|
+
'"text" to find elements containing the given text. Ref IDs (e.g., "ref=e3") work with any strategy.');
|
|
26
|
+
// ============================================================================
|
|
18
27
|
// Schemas
|
|
19
28
|
// ============================================================================
|
|
20
29
|
export const InteractSchema = WindowTargetSchema.extend({
|
|
21
30
|
action: z.enum(['click', 'double-click', 'long-press', 'scroll', 'swipe', 'focus'])
|
|
22
31
|
.describe('Type of interaction to perform'),
|
|
23
|
-
selector: z.string().optional().describe('CSS selector
|
|
32
|
+
selector: z.string().optional().describe('Element selector: CSS selector (default), XPath expression, text content, or ref ID (e.g., "ref=e3")'),
|
|
33
|
+
strategy: selectorStrategyField,
|
|
24
34
|
x: z.number().optional().describe('X coordinate for direct coordinate interaction'),
|
|
25
35
|
y: z.number().optional().describe('Y coordinate for direct coordinate interaction'),
|
|
26
36
|
duration: z.number().optional()
|
|
@@ -42,7 +52,9 @@ export const ScreenshotSchema = WindowTargetSchema.extend({
|
|
|
42
52
|
export const KeyboardSchema = WindowTargetSchema.extend({
|
|
43
53
|
action: z.enum(['type', 'press', 'down', 'up'])
|
|
44
54
|
.describe('Keyboard action type: "type" for typing text into an element, "press/down/up" for key events'),
|
|
45
|
-
selector: z.string().optional().describe('
|
|
55
|
+
selector: z.string().optional().describe('Element selector for element to type into (required for "type" action): ' +
|
|
56
|
+
'CSS selector (default), XPath, text content, or ref ID'),
|
|
57
|
+
strategy: selectorStrategyField,
|
|
46
58
|
text: z.string().optional().describe('Text to type (required for "type" action)'),
|
|
47
59
|
key: z.string().optional().describe('Key to press (required for "press/down/up" actions, e.g., "Enter", "a", "Escape")'),
|
|
48
60
|
modifiers: z.array(z.enum(['Control', 'Alt', 'Shift', 'Meta'])).optional().describe('Modifier keys to hold'),
|
|
@@ -50,10 +62,12 @@ export const KeyboardSchema = WindowTargetSchema.extend({
|
|
|
50
62
|
export const WaitForSchema = WindowTargetSchema.extend({
|
|
51
63
|
type: z.enum(['selector', 'text', 'ipc-event']).describe('What to wait for'),
|
|
52
64
|
value: z.string().describe('Selector, text content, or IPC event name to wait for'),
|
|
65
|
+
strategy: selectorStrategyField.describe('Selector strategy (applies when type is "selector"): "css" (default), "xpath", or "text".'),
|
|
53
66
|
timeout: z.number().optional().default(5000).describe('Timeout in milliseconds (default: 5000ms)'),
|
|
54
67
|
});
|
|
55
68
|
export const GetStylesSchema = WindowTargetSchema.extend({
|
|
56
|
-
selector: z.string().describe('CSS selector
|
|
69
|
+
selector: z.string().describe('Element selector: CSS selector (default), XPath expression, text content, or ref ID'),
|
|
70
|
+
strategy: selectorStrategyField,
|
|
57
71
|
properties: z.array(z.string()).optional().describe('Specific CSS properties to retrieve. If omitted, returns all computed styles'),
|
|
58
72
|
multiple: z.boolean().optional().default(false)
|
|
59
73
|
.describe('Whether to get styles for all matching elements (true) or just the first (false)'),
|
|
@@ -68,8 +82,9 @@ export const FocusElementSchema = WindowTargetSchema.extend({
|
|
|
68
82
|
selector: z.string().describe('CSS selector for element to focus'),
|
|
69
83
|
});
|
|
70
84
|
export const FindElementSchema = WindowTargetSchema.extend({
|
|
71
|
-
selector: z.string(),
|
|
72
|
-
|
|
85
|
+
selector: z.string().describe('The selector to find: CSS selector (default), XPath expression, text content, or ref ID (e.g., "ref=e3"). ' +
|
|
86
|
+
'Interpretation depends on strategy.'),
|
|
87
|
+
strategy: selectorStrategyField,
|
|
73
88
|
});
|
|
74
89
|
export const GetConsoleLogsSchema = WindowTargetSchema.extend({
|
|
75
90
|
filter: z.string().optional().describe('Regex or keyword to filter logs'),
|
|
@@ -77,13 +92,14 @@ export const GetConsoleLogsSchema = WindowTargetSchema.extend({
|
|
|
77
92
|
});
|
|
78
93
|
export const DomSnapshotSchema = WindowTargetSchema.extend({
|
|
79
94
|
type: z.enum(['accessibility', 'structure']).describe('Snapshot type'),
|
|
80
|
-
selector: z.string().optional().describe('
|
|
95
|
+
selector: z.string().optional().describe('Selector to scope the snapshot: CSS selector (default), XPath, text content, or ref ID. If omitted, snapshots entire document.'),
|
|
96
|
+
strategy: selectorStrategyField,
|
|
81
97
|
});
|
|
82
98
|
// ============================================================================
|
|
83
99
|
// Implementation Functions
|
|
84
100
|
// ============================================================================
|
|
85
101
|
export async function interact(options) {
|
|
86
|
-
const { action, selector, x, y, duration, scrollX, scrollY, fromX, fromY, toX, toY, windowId, appIdentifier } = options;
|
|
102
|
+
const { action, selector, strategy, x, y, duration, scrollX, scrollY, fromX, fromY, toX, toY, windowId, appIdentifier } = options;
|
|
87
103
|
// Handle swipe action separately since it has different logic
|
|
88
104
|
if (action === 'swipe') {
|
|
89
105
|
return performSwipe({ fromX, fromY, toX, toY, duration, windowId, appIdentifier });
|
|
@@ -93,11 +109,12 @@ export async function interact(options) {
|
|
|
93
109
|
if (!selector) {
|
|
94
110
|
throw new Error('Focus action requires a selector');
|
|
95
111
|
}
|
|
96
|
-
return focusElement({ selector, windowId, appIdentifier });
|
|
112
|
+
return focusElement({ selector, strategy, windowId, appIdentifier });
|
|
97
113
|
}
|
|
98
114
|
const script = buildScript(SCRIPTS.interact, {
|
|
99
115
|
action,
|
|
100
116
|
selector: selector ?? null,
|
|
117
|
+
strategy: strategy ?? 'css',
|
|
101
118
|
x: x ?? null,
|
|
102
119
|
y: y ?? null,
|
|
103
120
|
duration: duration ?? 500,
|
|
@@ -146,7 +163,7 @@ export async function screenshot(options = {}) {
|
|
|
146
163
|
return result;
|
|
147
164
|
}
|
|
148
165
|
export async function keyboard(options) {
|
|
149
|
-
const { action, selectorOrKey, textOrModifiers, modifiers, windowId, appIdentifier } = options;
|
|
166
|
+
const { action, selectorOrKey, strategy, textOrModifiers, modifiers, windowId, appIdentifier } = options;
|
|
150
167
|
// Handle the different parameter combinations based on action
|
|
151
168
|
if (action === 'type') {
|
|
152
169
|
const selector = selectorOrKey;
|
|
@@ -154,7 +171,7 @@ export async function keyboard(options) {
|
|
|
154
171
|
if (!selector || !text) {
|
|
155
172
|
throw new Error('Type action requires both selector and text parameters');
|
|
156
173
|
}
|
|
157
|
-
const script = buildTypeScript(selector, text);
|
|
174
|
+
const script = buildTypeScript(selector, text, strategy);
|
|
158
175
|
try {
|
|
159
176
|
return await executeInWebview(script, windowId, appIdentifier);
|
|
160
177
|
}
|
|
@@ -179,8 +196,8 @@ export async function keyboard(options) {
|
|
|
179
196
|
}
|
|
180
197
|
}
|
|
181
198
|
export async function waitFor(options) {
|
|
182
|
-
const { type, value, timeout = 5000, windowId, appIdentifier } = options;
|
|
183
|
-
const script = buildScript(SCRIPTS.waitFor, { type, value, timeout });
|
|
199
|
+
const { type, value, strategy, timeout = 5000, windowId, appIdentifier } = options;
|
|
200
|
+
const script = buildScript(SCRIPTS.waitFor, { type, value, strategy: strategy ?? 'css', timeout });
|
|
184
201
|
try {
|
|
185
202
|
return await executeInWebview(script, windowId, appIdentifier);
|
|
186
203
|
}
|
|
@@ -190,9 +207,10 @@ export async function waitFor(options) {
|
|
|
190
207
|
}
|
|
191
208
|
}
|
|
192
209
|
export async function getStyles(options) {
|
|
193
|
-
const { selector, properties, multiple = false, windowId, appIdentifier } = options;
|
|
210
|
+
const { selector, strategy, properties, multiple = false, windowId, appIdentifier } = options;
|
|
194
211
|
const script = buildScript(SCRIPTS.getStyles, {
|
|
195
212
|
selector,
|
|
213
|
+
strategy: strategy ?? 'css',
|
|
196
214
|
properties: properties || [],
|
|
197
215
|
multiple,
|
|
198
216
|
});
|
|
@@ -232,8 +250,8 @@ export async function executeJavaScript(options) {
|
|
|
232
250
|
}
|
|
233
251
|
}
|
|
234
252
|
export async function focusElement(options) {
|
|
235
|
-
const { selector, windowId, appIdentifier } = options;
|
|
236
|
-
const script = buildScript(SCRIPTS.focus, { selector });
|
|
253
|
+
const { selector, strategy, windowId, appIdentifier } = options;
|
|
254
|
+
const script = buildScript(SCRIPTS.focus, { selector, strategy: strategy ?? 'css' });
|
|
237
255
|
try {
|
|
238
256
|
return await executeInWebview(script, windowId, appIdentifier);
|
|
239
257
|
}
|
|
@@ -274,13 +292,13 @@ export async function getConsoleLogs(options = {}) {
|
|
|
274
292
|
* Uses aria-api for comprehensive, spec-compliant accessibility computation.
|
|
275
293
|
*/
|
|
276
294
|
export async function domSnapshot(options) {
|
|
277
|
-
const { type, selector, windowId, appIdentifier } = options;
|
|
295
|
+
const { type, selector, strategy, windowId, appIdentifier } = options;
|
|
278
296
|
// Only load aria-api for accessibility snapshots
|
|
279
297
|
if (type === 'accessibility') {
|
|
280
298
|
await ensureAriaApiLoaded(windowId);
|
|
281
299
|
}
|
|
282
300
|
// Then execute the snapshot script
|
|
283
|
-
const script = buildScript(SCRIPTS.domSnapshot, { type, selector: selector ?? null });
|
|
301
|
+
const script = buildScript(SCRIPTS.domSnapshot, { type, selector: selector ?? null, strategy: strategy ?? 'css' });
|
|
284
302
|
try {
|
|
285
303
|
return await executeInWebview(script, windowId, appIdentifier);
|
|
286
304
|
}
|
package/dist/prompts-registry.js
CHANGED
|
@@ -91,6 +91,28 @@ Once changes are approved and made:
|
|
|
91
91
|
- The plugin only runs in debug builds so it won't affect production
|
|
92
92
|
- The WebSocket server binds to \`0.0.0.0:9223\` by default
|
|
93
93
|
- For localhost-only access, use \`Builder::new().bind_address("127.0.0.1").build()\``;
|
|
94
|
+
const SELECT_ELEMENT_PROMPT = (message) => {
|
|
95
|
+
const lines = [
|
|
96
|
+
'The user wants to visually select an element in their running Tauri app so they can discuss it with you.',
|
|
97
|
+
'',
|
|
98
|
+
'Follow these steps:',
|
|
99
|
+
'',
|
|
100
|
+
'1. **Ensure a session is active** - Use `driver_session` with action "start" if not already connected',
|
|
101
|
+
'',
|
|
102
|
+
'2. **Activate the element picker** - Call `webview_select_element` to show the picker overlay in the app.',
|
|
103
|
+
'The user will see a blue highlight following their cursor and can click to select an element.',
|
|
104
|
+
'They can press Escape or click X to cancel.',
|
|
105
|
+
'',
|
|
106
|
+
'3. **Review the result** - You will receive the element\'s metadata (tag, id, classes, CSS selector, XPath,',
|
|
107
|
+
'bounding rect, attributes, computed styles, parent chain) and an annotated screenshot with the element highlighted.',
|
|
108
|
+
'',
|
|
109
|
+
'4. **Respond to the user** - Use the element context and screenshot to address their request.',
|
|
110
|
+
];
|
|
111
|
+
if (message) {
|
|
112
|
+
lines.push('', '## User\'s Message About the Element', '', message);
|
|
113
|
+
}
|
|
114
|
+
return lines.join('\n');
|
|
115
|
+
};
|
|
94
116
|
/**
|
|
95
117
|
* Complete registry of all available prompts
|
|
96
118
|
*/
|
|
@@ -114,6 +136,30 @@ export const PROMPTS = [
|
|
|
114
136
|
];
|
|
115
137
|
},
|
|
116
138
|
},
|
|
139
|
+
{
|
|
140
|
+
name: 'select',
|
|
141
|
+
description: 'Visually select an element in the running Tauri app. ' +
|
|
142
|
+
'Activates a picker overlay — click an element to send its metadata and an annotated screenshot to the agent. ' +
|
|
143
|
+
'Optionally include a message describing what you want to do with the element.',
|
|
144
|
+
arguments: [
|
|
145
|
+
{
|
|
146
|
+
name: 'message',
|
|
147
|
+
description: 'What you want to discuss or do with the selected element (e.g. "this button should be green instead of blue")',
|
|
148
|
+
required: false,
|
|
149
|
+
},
|
|
150
|
+
],
|
|
151
|
+
handler: (args) => {
|
|
152
|
+
return [
|
|
153
|
+
{
|
|
154
|
+
role: 'user',
|
|
155
|
+
content: {
|
|
156
|
+
type: 'text',
|
|
157
|
+
text: SELECT_ELEMENT_PROMPT(args.message),
|
|
158
|
+
},
|
|
159
|
+
},
|
|
160
|
+
];
|
|
161
|
+
},
|
|
162
|
+
},
|
|
117
163
|
{
|
|
118
164
|
name: 'setup',
|
|
119
165
|
description: 'Set up or update the MCP Bridge plugin in a Tauri project. ' +
|
package/dist/tools-registry.js
CHANGED
|
@@ -8,6 +8,7 @@ import { manageDriverSession, ManageDriverSessionSchema, } from './driver/sessio
|
|
|
8
8
|
import { readLogs, ReadLogsSchema } from './monitor/logs.js';
|
|
9
9
|
import { executeIPCCommand, manageIPCMonitoring, getIPCEvents, emitTestEvent, getBackendState, manageWindow, ExecuteIPCCommandSchema, ManageIPCMonitoringSchema, GetIPCEventsSchema, EmitTestEventSchema, GetBackendStateSchema, ManageWindowSchema, } from './driver/plugin-commands.js';
|
|
10
10
|
import { interact, screenshot, keyboard, waitFor, getStyles, executeJavaScript, findElement, domSnapshot, InteractSchema, ScreenshotSchema, KeyboardSchema, WaitForSchema, GetStylesSchema, ExecuteJavaScriptSchema, FindElementSchema, DomSnapshotSchema, } from './driver/webview-interactions.js';
|
|
11
|
+
import { selectElement, getPointedElement, SelectElementSchema, GetPointedElementSchema, } from './driver/element-picker.js';
|
|
11
12
|
import { PLUGIN_VERSION_CARGO } from './version.js';
|
|
12
13
|
/**
|
|
13
14
|
* Standard multi-app description for webview tools.
|
|
@@ -48,123 +49,27 @@ First, verify this is a Tauri v2 project:
|
|
|
48
49
|
Examine these files and report what needs to be added or updated:
|
|
49
50
|
|
|
50
51
|
### 1. Rust Plugin Dependency
|
|
51
|
-
|
|
52
|
-
Check \`src-tauri/Cargo.toml\` for \`tauri-plugin-mcp-bridge\`.
|
|
53
|
-
It should be an **optional** dependency behind a Cargo feature
|
|
54
|
-
so that it is completely excluded from production builds:
|
|
55
|
-
|
|
52
|
+
Check \`src-tauri/Cargo.toml\` for \`tauri-plugin-mcp-bridge\`. If missing or outdated, note that it needs:
|
|
56
53
|
\`\`\`toml
|
|
57
54
|
[dependencies]
|
|
58
|
-
tauri-plugin-mcp-bridge =
|
|
59
|
-
\`\`\`
|
|
60
|
-
|
|
61
|
-
Under \`[features]\`, add a feature that enables it:
|
|
62
|
-
|
|
63
|
-
\`\`\`toml
|
|
64
|
-
[features]
|
|
65
|
-
mcp-bridge = ["dep:tauri-plugin-mcp-bridge"]
|
|
55
|
+
tauri-plugin-mcp-bridge = "${PLUGIN_VERSION_CARGO}"
|
|
66
56
|
\`\`\`
|
|
67
57
|
|
|
68
58
|
### 2. Plugin Registration
|
|
69
|
-
|
|
70
|
-
Check \`src-tauri/src/lib.rs\` or \`src-tauri/src/main.rs\` for plugin
|
|
71
|
-
registration. It should be gated behind the \`mcp-bridge\` feature flag:
|
|
72
|
-
|
|
59
|
+
Check \`src-tauri/src/lib.rs\` or \`src-tauri/src/main.rs\` for plugin registration. It should have:
|
|
73
60
|
\`\`\`rust
|
|
74
|
-
#[cfg(
|
|
61
|
+
#[cfg(debug_assertions)]
|
|
75
62
|
{
|
|
76
63
|
builder = builder.plugin(tauri_plugin_mcp_bridge::init());
|
|
77
64
|
}
|
|
78
65
|
\`\`\`
|
|
79
66
|
|
|
80
67
|
### 3. Global Tauri Setting
|
|
81
|
-
|
|
82
68
|
Check \`src-tauri/tauri.conf.json\` for \`withGlobalTauri: true\` under the \`app\` section.
|
|
83
69
|
**This is required** - without it, the MCP bridge cannot communicate with the webview.
|
|
84
70
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
\`cargo tauri dev\`), prefer placing it there:
|
|
88
|
-
|
|
89
|
-
\`\`\`json
|
|
90
|
-
{
|
|
91
|
-
"app": {
|
|
92
|
-
"withGlobalTauri": true
|
|
93
|
-
}
|
|
94
|
-
}
|
|
95
|
-
\`\`\`
|
|
96
|
-
|
|
97
|
-
### 4. Plugin Capability (Conditional via build.rs)
|
|
98
|
-
|
|
99
|
-
The \`mcp-bridge:default\` permission must **not** be added to
|
|
100
|
-
\`src-tauri/capabilities/default.json\`. Instead, it should be
|
|
101
|
-
conditionally generated by the build script so that it only exists
|
|
102
|
-
when the \`mcp-bridge\` feature is active.
|
|
103
|
-
|
|
104
|
-
Check \`src-tauri/build.rs\` and update it to conditionally write
|
|
105
|
-
(or remove) a separate capability file before
|
|
106
|
-
\`tauri_build::build()\` runs. Tauri auto-discovers all \`.json\`
|
|
107
|
-
files in \`capabilities/\`, so this ensures the permission is only
|
|
108
|
-
present when the feature is enabled:
|
|
109
|
-
|
|
110
|
-
\`\`\`rust
|
|
111
|
-
fn main() {
|
|
112
|
-
let mcp_cap_path = std::path::Path::new("capabilities/mcp-bridge.json");
|
|
113
|
-
#[cfg(all(feature = "mcp-bridge", debug_assertions))]
|
|
114
|
-
{
|
|
115
|
-
let cap = r#"{
|
|
116
|
-
"identifier": "mcp-bridge",
|
|
117
|
-
"description": "enables MCP bridge for development",
|
|
118
|
-
"windows": [
|
|
119
|
-
"main"
|
|
120
|
-
],
|
|
121
|
-
"permissions": [
|
|
122
|
-
"mcp-bridge:default"
|
|
123
|
-
]
|
|
124
|
-
}"#;
|
|
125
|
-
std::fs::write(mcp_cap_path, cap)
|
|
126
|
-
.expect("failed to write mcp-bridge capability");
|
|
127
|
-
}
|
|
128
|
-
#[cfg(not(all(feature = "mcp-bridge", debug_assertions)))]
|
|
129
|
-
{
|
|
130
|
-
let _ = std::fs::remove_file(mcp_cap_path);
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
tauri_build::build()
|
|
134
|
-
}
|
|
135
|
-
\`\`\`
|
|
136
|
-
|
|
137
|
-
If \`build.rs\` already has other logic, integrate the conditional
|
|
138
|
-
block before the \`tauri_build::build()\` call.
|
|
139
|
-
|
|
140
|
-
### 5. Gitignore the Generated Capability File
|
|
141
|
-
|
|
142
|
-
Since \`capabilities/mcp-bridge.json\` is generated at build time, add it to \`src-tauri/.gitignore\`:
|
|
143
|
-
|
|
144
|
-
\`\`\`gitignore
|
|
145
|
-
/capabilities/mcp-bridge.json
|
|
146
|
-
\`\`\`
|
|
147
|
-
|
|
148
|
-
### 6. Dev Scripts (package.json)
|
|
149
|
-
|
|
150
|
-
If the project uses npm scripts to run \`tauri dev\`, add
|
|
151
|
-
\`--features mcp-bridge\` to the dev scripts so the feature is
|
|
152
|
-
automatically enabled. For example:
|
|
153
|
-
|
|
154
|
-
\`\`\`json
|
|
155
|
-
{
|
|
156
|
-
"scripts": {
|
|
157
|
-
"dev": "tauri dev --features mcp-bridge",
|
|
158
|
-
"dev:ios": "tauri ios dev --features mcp-bridge",
|
|
159
|
-
"dev:android": "tauri android dev --features mcp-bridge"
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
\`\`\`
|
|
163
|
-
|
|
164
|
-
Do **not** add \`--features mcp-bridge\` to release-profile dev
|
|
165
|
-
scripts (e.g. those using \`--release\`), as \`debug_assertions\`
|
|
166
|
-
is false in release builds and the guard will exclude the plugin
|
|
167
|
-
anyway.
|
|
71
|
+
### 4. Plugin Permissions
|
|
72
|
+
Check \`src-tauri/capabilities/default.json\` (or similar) for \`"mcp-bridge:default"\` permission.
|
|
168
73
|
|
|
169
74
|
## Response Format
|
|
170
75
|
|
|
@@ -179,19 +84,13 @@ Only after the user says yes should you make any modifications.
|
|
|
179
84
|
## After Setup
|
|
180
85
|
|
|
181
86
|
Once changes are approved and made:
|
|
182
|
-
1. Run the Tauri app in development mode
|
|
183
|
-
updated, use \`npm run dev\`. Otherwise use
|
|
184
|
-
\`cargo tauri dev --features mcp-bridge\` directly.
|
|
87
|
+
1. Run the Tauri app in development mode (\`cargo tauri dev\`)
|
|
185
88
|
2. Use \`driver_session\` with action "start" to connect
|
|
186
89
|
3. Use \`driver_session\` with action "status" to verify
|
|
187
90
|
|
|
188
91
|
## Notes
|
|
189
92
|
|
|
190
|
-
- The plugin
|
|
191
|
-
\`cfg(feature = "mcp-bridge")\` and \`cfg(debug_assertions)\` must
|
|
192
|
-
be true, so even if the feature flag is accidentally enabled in a
|
|
193
|
-
release build, the plugin will not be included
|
|
194
|
-
- The \`mcp-bridge\` Cargo feature must be passed explicitly — either via npm dev scripts or \`cargo tauri dev --features mcp-bridge\`
|
|
93
|
+
- The plugin only runs in debug builds so it won't affect production
|
|
195
94
|
- The WebSocket server binds to \`0.0.0.0:9223\` by default
|
|
196
95
|
- For localhost-only access, use \`Builder::new().bind_address("127.0.0.1").build()\`
|
|
197
96
|
`;
|
|
@@ -271,6 +170,8 @@ export const TOOLS = [
|
|
|
271
170
|
{
|
|
272
171
|
name: 'webview_find_element',
|
|
273
172
|
description: '[Tauri Apps Only] Find DOM elements in a running Tauri app\'s webview. ' +
|
|
173
|
+
'Supports CSS selectors (default), XPath expressions, and text content matching via the strategy parameter. ' +
|
|
174
|
+
'Returns the element\'s HTML. ' +
|
|
274
175
|
'Requires active driver_session. ' +
|
|
275
176
|
MULTI_APP_DESC + ' ' +
|
|
276
177
|
'For browser pages or documentation sites, use Chrome DevTools MCP instead.',
|
|
@@ -314,6 +215,7 @@ export const TOOLS = [
|
|
|
314
215
|
name: 'webview_interact',
|
|
315
216
|
description: '[Tauri Apps Only] Click, scroll, swipe, focus, or perform gestures in a Tauri app webview. ' +
|
|
316
217
|
'Supported actions: click, double-click, long-press, scroll, swipe, focus. ' +
|
|
218
|
+
'Supports CSS selectors (default), XPath, and text content matching via the strategy parameter. ' +
|
|
317
219
|
'Requires active driver_session. ' +
|
|
318
220
|
'For browser interaction, use Chrome DevTools MCP instead.',
|
|
319
221
|
category: TOOL_CATEGORIES.UI_AUTOMATION,
|
|
@@ -364,6 +266,8 @@ export const TOOLS = [
|
|
|
364
266
|
{
|
|
365
267
|
name: 'webview_keyboard',
|
|
366
268
|
description: '[Tauri Apps Only] Type text or send keyboard events in a Tauri app. ' +
|
|
269
|
+
'The selector parameter (for "type" action) supports CSS selectors (default), ' +
|
|
270
|
+
'XPath, and text content matching via the strategy parameter. ' +
|
|
367
271
|
'Requires active driver_session. ' +
|
|
368
272
|
MULTI_APP_DESC + ' ' +
|
|
369
273
|
'For browser keyboard input, use Chrome DevTools MCP instead.',
|
|
@@ -381,6 +285,7 @@ export const TOOLS = [
|
|
|
381
285
|
return await keyboard({
|
|
382
286
|
action: parsed.action,
|
|
383
287
|
selectorOrKey: parsed.selector,
|
|
288
|
+
strategy: parsed.strategy,
|
|
384
289
|
textOrModifiers: parsed.text,
|
|
385
290
|
windowId: parsed.windowId,
|
|
386
291
|
appIdentifier: parsed.appIdentifier,
|
|
@@ -398,6 +303,7 @@ export const TOOLS = [
|
|
|
398
303
|
{
|
|
399
304
|
name: 'webview_wait_for',
|
|
400
305
|
description: '[Tauri Apps Only] Wait for elements, text, or IPC events in a Tauri app. ' +
|
|
306
|
+
'When type is "selector", supports CSS (default), XPath, and text strategies via the strategy parameter. ' +
|
|
401
307
|
'Requires active driver_session. ' +
|
|
402
308
|
MULTI_APP_DESC + ' ' +
|
|
403
309
|
'For browser waits, use Chrome DevTools MCP instead.',
|
|
@@ -413,6 +319,7 @@ export const TOOLS = [
|
|
|
413
319
|
return await waitFor({
|
|
414
320
|
type: parsed.type,
|
|
415
321
|
value: parsed.value,
|
|
322
|
+
strategy: parsed.strategy,
|
|
416
323
|
timeout: parsed.timeout,
|
|
417
324
|
windowId: parsed.windowId,
|
|
418
325
|
appIdentifier: parsed.appIdentifier,
|
|
@@ -422,6 +329,7 @@ export const TOOLS = [
|
|
|
422
329
|
{
|
|
423
330
|
name: 'webview_get_styles',
|
|
424
331
|
description: '[Tauri Apps Only] Get computed CSS styles from elements in a Tauri app. ' +
|
|
332
|
+
'Supports CSS selectors (default), XPath, and text content matching via the strategy parameter. ' +
|
|
425
333
|
'Requires active driver_session. ' +
|
|
426
334
|
MULTI_APP_DESC + ' ' +
|
|
427
335
|
'For browser style inspection, use Chrome DevTools MCP instead.',
|
|
@@ -436,6 +344,7 @@ export const TOOLS = [
|
|
|
436
344
|
const parsed = GetStylesSchema.parse(args);
|
|
437
345
|
return await getStyles({
|
|
438
346
|
selector: parsed.selector,
|
|
347
|
+
strategy: parsed.strategy,
|
|
439
348
|
properties: parsed.properties,
|
|
440
349
|
multiple: parsed.multiple,
|
|
441
350
|
windowId: parsed.windowId,
|
|
@@ -480,6 +389,7 @@ export const TOOLS = [
|
|
|
480
389
|
'with element tag names, IDs, CSS classes, and data-testid attributes (if present). ' +
|
|
481
390
|
'Use this for understanding page layout, debugging CSS selectors, or locating elements by class/ID. ' +
|
|
482
391
|
'Use the optional selector parameter to scope the snapshot to a subtree. ' +
|
|
392
|
+
'The selector supports CSS (default), XPath, and text content matching via the strategy parameter. ' +
|
|
483
393
|
'Requires active driver_session. ' +
|
|
484
394
|
MULTI_APP_DESC,
|
|
485
395
|
category: TOOL_CATEGORIES.UI_AUTOMATION,
|
|
@@ -494,6 +404,57 @@ export const TOOLS = [
|
|
|
494
404
|
return await domSnapshot({
|
|
495
405
|
type: parsed.type,
|
|
496
406
|
selector: parsed.selector,
|
|
407
|
+
strategy: parsed.strategy,
|
|
408
|
+
windowId: parsed.windowId,
|
|
409
|
+
appIdentifier: parsed.appIdentifier,
|
|
410
|
+
});
|
|
411
|
+
},
|
|
412
|
+
},
|
|
413
|
+
// Element Picker Tools
|
|
414
|
+
{
|
|
415
|
+
name: 'webview_select_element',
|
|
416
|
+
description: '[Tauri Apps Only] Activates an element picker overlay in the Tauri app. ' +
|
|
417
|
+
'The user visually selects an element by clicking it, and the tool returns ' +
|
|
418
|
+
'rich element metadata (tag, id, classes, attributes, text, bounding rect, ' +
|
|
419
|
+
'CSS selector, computed styles, parent chain) plus an annotated screenshot ' +
|
|
420
|
+
'with the element highlighted. ' +
|
|
421
|
+
'Requires active driver_session. ' +
|
|
422
|
+
MULTI_APP_DESC,
|
|
423
|
+
category: TOOL_CATEGORIES.UI_AUTOMATION,
|
|
424
|
+
schema: SelectElementSchema,
|
|
425
|
+
annotations: {
|
|
426
|
+
title: 'Select Element (Visual Picker)',
|
|
427
|
+
readOnlyHint: true,
|
|
428
|
+
openWorldHint: false,
|
|
429
|
+
},
|
|
430
|
+
handler: async (args) => {
|
|
431
|
+
const parsed = SelectElementSchema.parse(args);
|
|
432
|
+
return await selectElement({
|
|
433
|
+
timeout: parsed.timeout,
|
|
434
|
+
windowId: parsed.windowId,
|
|
435
|
+
appIdentifier: parsed.appIdentifier,
|
|
436
|
+
});
|
|
437
|
+
},
|
|
438
|
+
},
|
|
439
|
+
{
|
|
440
|
+
name: 'webview_get_pointed_element',
|
|
441
|
+
description: '[Tauri Apps Only] Retrieves element metadata for an element the user previously ' +
|
|
442
|
+
'pointed at via Alt+Shift+Click in the Tauri app. Returns the same rich metadata ' +
|
|
443
|
+
'as webview_select_element (tag, id, classes, attributes, text, bounding rect, ' +
|
|
444
|
+
'CSS selector, computed styles, parent chain) plus an annotated screenshot. ' +
|
|
445
|
+
'The user must Alt+Shift+Click an element first before calling this tool. ' +
|
|
446
|
+
'Requires active driver_session. ' +
|
|
447
|
+
MULTI_APP_DESC,
|
|
448
|
+
category: TOOL_CATEGORIES.UI_AUTOMATION,
|
|
449
|
+
schema: GetPointedElementSchema,
|
|
450
|
+
annotations: {
|
|
451
|
+
title: 'Get Pointed Element',
|
|
452
|
+
readOnlyHint: true,
|
|
453
|
+
openWorldHint: false,
|
|
454
|
+
},
|
|
455
|
+
handler: async (args) => {
|
|
456
|
+
const parsed = GetPointedElementSchema.parse(args);
|
|
457
|
+
return await getPointedElement({
|
|
497
458
|
windowId: parsed.windowId,
|
|
498
459
|
appIdentifier: parsed.appIdentifier,
|
|
499
460
|
});
|