@midscene/computer 1.2.1-beta-20260112081017.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +243 -0
- package/dist/es/index.mjs +438 -0
- package/dist/es/mcp-server.mjs +508 -0
- package/dist/lib/index.js +498 -0
- package/dist/lib/mcp-server.js +559 -0
- package/dist/types/index.d.ts +70 -0
- package/dist/types/mcp-server.d.ts +88 -0
- package/package.json +48 -0
- package/rslib.config.ts +26 -0
- package/src/agent.ts +17 -0
- package/src/device.ts +554 -0
- package/src/index.ts +8 -0
- package/src/mcp-server.ts +65 -0
- package/src/mcp-tools.ts +96 -0
- package/src/types/libnut.d.ts +36 -0
- package/src/utils.ts +51 -0
- package/tests/ai/ai-auto-todo.test.ts +85 -0
- package/tests/ai/ai-shop.test.ts +56 -0
- package/tests/ai/basic.test.ts +46 -0
- package/tests/ai/keyboard.test.ts +66 -0
- package/tests/ai/multi-display.test.ts +76 -0
- package/tests/ai/test-utils.ts +31 -0
- package/tests/ai/web-browser.test.ts +63 -0
- package/tests/unit-test/agent.test.ts +34 -0
- package/tests/unit-test/device.test.ts +53 -0
- package/tsconfig.json +18 -0
- package/tsconfig.tsbuildinfo +1 -0
- package/vitest.config.ts +47 -0
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
import { BaseMCPServer, BaseMidsceneTools, createMCPServerLauncher } from "@midscene/shared/mcp";
|
|
2
|
+
import { Agent } from "@midscene/core/agent";
|
|
3
|
+
import node_assert from "node:assert";
|
|
4
|
+
import { getMidsceneLocationSchema, z } from "@midscene/core";
|
|
5
|
+
import { defineAction, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionHover, defineActionKeyboardPress, defineActionRightClick, defineActionScroll, defineActionTap } from "@midscene/core/device";
|
|
6
|
+
import { sleep } from "@midscene/core/utils";
|
|
7
|
+
import { createImgBase64ByFormat } from "@midscene/shared/img";
|
|
8
|
+
import { getDebug } from "@midscene/shared/logger";
|
|
9
|
+
import screenshot_desktop from "screenshot-desktop";
|
|
10
|
+
function _define_property(obj, key, value) {
|
|
11
|
+
if (key in obj) Object.defineProperty(obj, key, {
|
|
12
|
+
value: value,
|
|
13
|
+
enumerable: true,
|
|
14
|
+
configurable: true,
|
|
15
|
+
writable: true
|
|
16
|
+
});
|
|
17
|
+
else obj[key] = value;
|
|
18
|
+
return obj;
|
|
19
|
+
}
|
|
20
|
+
const SMOOTH_MOVE_STEPS_TAP = 8;
|
|
21
|
+
const SMOOTH_MOVE_STEPS_HOVER = 10;
|
|
22
|
+
const SMOOTH_MOVE_DELAY_TAP = 8;
|
|
23
|
+
const SMOOTH_MOVE_DELAY_HOVER = 10;
|
|
24
|
+
const HOVER_EFFECT_WAIT = 300;
|
|
25
|
+
const CLICK_HOLD_DURATION = 50;
|
|
26
|
+
const INPUT_FOCUS_DELAY = 300;
|
|
27
|
+
const INPUT_CLEAR_DELAY = 150;
|
|
28
|
+
const SCROLL_REPEAT_COUNT = 10;
|
|
29
|
+
const SCROLL_STEP_DELAY = 100;
|
|
30
|
+
const SCROLL_COMPLETE_DELAY = 500;
|
|
31
|
+
let libnut = null;
|
|
32
|
+
let libnutLoadError = null;
|
|
33
|
+
async function getLibnut() {
|
|
34
|
+
if (libnut) return libnut;
|
|
35
|
+
if (libnutLoadError) throw libnutLoadError;
|
|
36
|
+
try {
|
|
37
|
+
const libnutModule = await import("@computer-use/libnut/dist/import_libnut");
|
|
38
|
+
libnut = libnutModule.libnut;
|
|
39
|
+
if (!libnut) throw new Error('libnut module loaded but libnut object is undefined');
|
|
40
|
+
return libnut;
|
|
41
|
+
} catch (error) {
|
|
42
|
+
libnutLoadError = error;
|
|
43
|
+
throw new Error(`Failed to load @computer-use/libnut. Make sure it is properly installed and compiled for your platform. Error: ${error}`);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
const debugDevice = getDebug('computer:device');
|
|
47
|
+
async function smoothMoveMouse(targetX, targetY, steps, stepDelay) {
|
|
48
|
+
node_assert(libnut, 'libnut not initialized');
|
|
49
|
+
const currentPos = libnut.getMousePos();
|
|
50
|
+
for(let i = 1; i <= steps; i++){
|
|
51
|
+
const stepX = Math.round(currentPos.x + (targetX - currentPos.x) * i / steps);
|
|
52
|
+
const stepY = Math.round(currentPos.y + (targetY - currentPos.y) * i / steps);
|
|
53
|
+
libnut.moveMouse(stepX, stepY);
|
|
54
|
+
await sleep(stepDelay);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
const KEY_NAME_MAP = {
|
|
58
|
+
windows: 'win',
|
|
59
|
+
win: 'win',
|
|
60
|
+
ctrl: 'control',
|
|
61
|
+
esc: 'escape',
|
|
62
|
+
del: 'delete',
|
|
63
|
+
ins: 'insert',
|
|
64
|
+
pgup: 'pageup',
|
|
65
|
+
pgdn: 'pagedown',
|
|
66
|
+
arrowup: 'up',
|
|
67
|
+
arrowdown: 'down',
|
|
68
|
+
arrowleft: 'left',
|
|
69
|
+
arrowright: 'right',
|
|
70
|
+
volumedown: 'audio_vol_down',
|
|
71
|
+
volumeup: 'audio_vol_up',
|
|
72
|
+
mediavolumedown: 'audio_vol_down',
|
|
73
|
+
mediavolumeup: 'audio_vol_up',
|
|
74
|
+
mute: 'audio_mute',
|
|
75
|
+
mediamute: 'audio_mute',
|
|
76
|
+
mediaplay: 'audio_play',
|
|
77
|
+
mediapause: 'audio_pause',
|
|
78
|
+
mediaplaypause: 'audio_play',
|
|
79
|
+
mediastop: 'audio_stop',
|
|
80
|
+
medianexttrack: 'audio_next',
|
|
81
|
+
mediaprevioustrack: 'audio_prev',
|
|
82
|
+
medianext: 'audio_next',
|
|
83
|
+
mediaprev: 'audio_prev'
|
|
84
|
+
};
|
|
85
|
+
const PRIMARY_KEY_MAP = {
|
|
86
|
+
command: 'cmd',
|
|
87
|
+
cmd: 'cmd',
|
|
88
|
+
meta: 'meta',
|
|
89
|
+
control: 'control',
|
|
90
|
+
ctrl: 'control',
|
|
91
|
+
shift: 'shift',
|
|
92
|
+
alt: 'alt',
|
|
93
|
+
option: 'alt'
|
|
94
|
+
};
|
|
95
|
+
function normalizeKeyName(key) {
|
|
96
|
+
const lowerKey = key.toLowerCase();
|
|
97
|
+
return KEY_NAME_MAP[lowerKey] || lowerKey;
|
|
98
|
+
}
|
|
99
|
+
function normalizePrimaryKey(key) {
|
|
100
|
+
const lowerKey = key.toLowerCase();
|
|
101
|
+
if (PRIMARY_KEY_MAP[lowerKey]) return PRIMARY_KEY_MAP[lowerKey];
|
|
102
|
+
return KEY_NAME_MAP[lowerKey] || lowerKey;
|
|
103
|
+
}
|
|
104
|
+
class ComputerDevice {
|
|
105
|
+
describe() {
|
|
106
|
+
return this.description || 'Computer Device';
|
|
107
|
+
}
|
|
108
|
+
static async listDisplays() {
|
|
109
|
+
try {
|
|
110
|
+
const displays = await screenshot_desktop.listDisplays();
|
|
111
|
+
return displays.map((d)=>({
|
|
112
|
+
id: String(d.id),
|
|
113
|
+
name: d.name || `Display ${d.id}`,
|
|
114
|
+
primary: d.primary || false
|
|
115
|
+
}));
|
|
116
|
+
} catch (error) {
|
|
117
|
+
debugDevice(`Failed to list displays: ${error}`);
|
|
118
|
+
return [];
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
async connect() {
|
|
122
|
+
debugDevice('Connecting to computer device');
|
|
123
|
+
try {
|
|
124
|
+
libnut = await getLibnut();
|
|
125
|
+
const size = await this.size();
|
|
126
|
+
const displays = await ComputerDevice.listDisplays();
|
|
127
|
+
this.description = `
|
|
128
|
+
Type: Computer
|
|
129
|
+
Platform: ${process.platform}
|
|
130
|
+
Display: ${this.displayId || 'Primary'}
|
|
131
|
+
Screen Size: ${size.width}x${size.height}
|
|
132
|
+
Available Displays: ${displays.length > 0 ? displays.map((d)=>d.name).join(', ') : 'Unknown'}
|
|
133
|
+
`;
|
|
134
|
+
debugDevice('Computer device connected', this.description);
|
|
135
|
+
} catch (error) {
|
|
136
|
+
debugDevice(`Failed to connect: ${error}`);
|
|
137
|
+
throw new Error(`Unable to connect to computer device: ${error}`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
async screenshotBase64() {
|
|
141
|
+
debugDevice('Taking screenshot', {
|
|
142
|
+
displayId: this.displayId
|
|
143
|
+
});
|
|
144
|
+
try {
|
|
145
|
+
const options = {
|
|
146
|
+
format: 'png'
|
|
147
|
+
};
|
|
148
|
+
if (void 0 !== this.displayId) if ('darwin' === process.platform) {
|
|
149
|
+
const screenIndex = Number(this.displayId);
|
|
150
|
+
if (!Number.isNaN(screenIndex)) options.screen = screenIndex;
|
|
151
|
+
} else options.screen = this.displayId;
|
|
152
|
+
debugDevice('Screenshot options', options);
|
|
153
|
+
const buffer = await screenshot_desktop(options);
|
|
154
|
+
return createImgBase64ByFormat('png', buffer.toString('base64'));
|
|
155
|
+
} catch (error) {
|
|
156
|
+
debugDevice(`Screenshot failed: ${error}`);
|
|
157
|
+
throw new Error(`Failed to take screenshot: ${error}`);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
async size() {
|
|
161
|
+
node_assert(libnut, 'libnut not initialized');
|
|
162
|
+
try {
|
|
163
|
+
const screenSize = libnut.getScreenSize();
|
|
164
|
+
return {
|
|
165
|
+
width: screenSize.width,
|
|
166
|
+
height: screenSize.height,
|
|
167
|
+
dpr: 1
|
|
168
|
+
};
|
|
169
|
+
} catch (error) {
|
|
170
|
+
debugDevice(`Failed to get screen size: ${error}`);
|
|
171
|
+
throw new Error(`Failed to get screen size: ${error}`);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
actionSpace() {
|
|
175
|
+
const defaultActions = [
|
|
176
|
+
defineActionTap(async (param)=>{
|
|
177
|
+
node_assert(libnut, 'libnut not initialized');
|
|
178
|
+
const element = param.locate;
|
|
179
|
+
node_assert(element, 'Element not found, cannot tap');
|
|
180
|
+
const [x, y] = element.center;
|
|
181
|
+
const targetX = Math.round(x);
|
|
182
|
+
const targetY = Math.round(y);
|
|
183
|
+
await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_TAP, SMOOTH_MOVE_DELAY_TAP);
|
|
184
|
+
libnut.mouseToggle('down', 'left');
|
|
185
|
+
await sleep(CLICK_HOLD_DURATION);
|
|
186
|
+
libnut.mouseToggle('up', 'left');
|
|
187
|
+
}),
|
|
188
|
+
defineActionDoubleClick(async (param)=>{
|
|
189
|
+
node_assert(libnut, 'libnut not initialized');
|
|
190
|
+
const element = param.locate;
|
|
191
|
+
node_assert(element, 'Element not found, cannot double click');
|
|
192
|
+
const [x, y] = element.center;
|
|
193
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
194
|
+
libnut.mouseClick('left', true);
|
|
195
|
+
}),
|
|
196
|
+
defineActionRightClick(async (param)=>{
|
|
197
|
+
node_assert(libnut, 'libnut not initialized');
|
|
198
|
+
const element = param.locate;
|
|
199
|
+
node_assert(element, 'Element not found, cannot right click');
|
|
200
|
+
const [x, y] = element.center;
|
|
201
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
202
|
+
libnut.mouseClick('right');
|
|
203
|
+
}),
|
|
204
|
+
defineActionHover(async (param)=>{
|
|
205
|
+
node_assert(libnut, 'libnut not initialized');
|
|
206
|
+
const element = param.locate;
|
|
207
|
+
node_assert(element, 'Element not found, cannot hover');
|
|
208
|
+
const [x, y] = element.center;
|
|
209
|
+
const targetX = Math.round(x);
|
|
210
|
+
const targetY = Math.round(y);
|
|
211
|
+
await smoothMoveMouse(targetX, targetY, SMOOTH_MOVE_STEPS_HOVER, SMOOTH_MOVE_DELAY_HOVER);
|
|
212
|
+
await sleep(HOVER_EFFECT_WAIT);
|
|
213
|
+
}),
|
|
214
|
+
defineAction({
|
|
215
|
+
name: 'Input',
|
|
216
|
+
description: 'Input text into the input field',
|
|
217
|
+
interfaceAlias: 'aiInput',
|
|
218
|
+
paramSchema: z.object({
|
|
219
|
+
value: z.string().describe('The text to input'),
|
|
220
|
+
mode: z["enum"]([
|
|
221
|
+
'replace',
|
|
222
|
+
'clear',
|
|
223
|
+
'append'
|
|
224
|
+
]).default('replace').optional().describe('Input mode: replace, clear, or append'),
|
|
225
|
+
locate: getMidsceneLocationSchema().describe('The input field to be filled').optional()
|
|
226
|
+
}),
|
|
227
|
+
call: async (param)=>{
|
|
228
|
+
node_assert(libnut, 'libnut not initialized');
|
|
229
|
+
const element = param.locate;
|
|
230
|
+
if (element && 'append' !== param.mode) {
|
|
231
|
+
const [x, y] = element.center;
|
|
232
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
233
|
+
libnut.mouseClick('left');
|
|
234
|
+
await sleep(INPUT_FOCUS_DELAY);
|
|
235
|
+
const modifier = 'darwin' === process.platform ? 'command' : 'control';
|
|
236
|
+
libnut.keyTap('a', [
|
|
237
|
+
modifier
|
|
238
|
+
]);
|
|
239
|
+
await sleep(50);
|
|
240
|
+
libnut.keyTap('backspace');
|
|
241
|
+
await sleep(INPUT_CLEAR_DELAY);
|
|
242
|
+
}
|
|
243
|
+
if ('clear' === param.mode) return;
|
|
244
|
+
if (!param.value) return;
|
|
245
|
+
libnut.typeString(param.value);
|
|
246
|
+
}
|
|
247
|
+
}),
|
|
248
|
+
defineActionScroll(async (param)=>{
|
|
249
|
+
node_assert(libnut, 'libnut not initialized');
|
|
250
|
+
if (param.locate) {
|
|
251
|
+
const element = param.locate;
|
|
252
|
+
const [x, y] = element.center;
|
|
253
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
254
|
+
}
|
|
255
|
+
const scrollType = param?.scrollType;
|
|
256
|
+
const scrollToEdgeActions = {
|
|
257
|
+
scrollToTop: [
|
|
258
|
+
0,
|
|
259
|
+
10
|
|
260
|
+
],
|
|
261
|
+
scrollToBottom: [
|
|
262
|
+
0,
|
|
263
|
+
-10
|
|
264
|
+
],
|
|
265
|
+
scrollToLeft: [
|
|
266
|
+
-10,
|
|
267
|
+
0
|
|
268
|
+
],
|
|
269
|
+
scrollToRight: [
|
|
270
|
+
10,
|
|
271
|
+
0
|
|
272
|
+
]
|
|
273
|
+
};
|
|
274
|
+
const edgeAction = scrollToEdgeActions[scrollType || ''];
|
|
275
|
+
if (edgeAction) {
|
|
276
|
+
const [dx, dy] = edgeAction;
|
|
277
|
+
for(let i = 0; i < SCROLL_REPEAT_COUNT; i++){
|
|
278
|
+
libnut.scrollMouse(dx, dy);
|
|
279
|
+
await sleep(SCROLL_STEP_DELAY);
|
|
280
|
+
}
|
|
281
|
+
return;
|
|
282
|
+
}
|
|
283
|
+
if ('singleAction' === scrollType || !scrollType) {
|
|
284
|
+
const distance = param?.distance || 500;
|
|
285
|
+
const ticks = Math.ceil(distance / 100);
|
|
286
|
+
const direction = param?.direction || 'down';
|
|
287
|
+
const directionMap = {
|
|
288
|
+
up: [
|
|
289
|
+
0,
|
|
290
|
+
ticks
|
|
291
|
+
],
|
|
292
|
+
down: [
|
|
293
|
+
0,
|
|
294
|
+
-ticks
|
|
295
|
+
],
|
|
296
|
+
left: [
|
|
297
|
+
-ticks,
|
|
298
|
+
0
|
|
299
|
+
],
|
|
300
|
+
right: [
|
|
301
|
+
ticks,
|
|
302
|
+
0
|
|
303
|
+
]
|
|
304
|
+
};
|
|
305
|
+
const [dx, dy] = directionMap[direction] || [
|
|
306
|
+
0,
|
|
307
|
+
-ticks
|
|
308
|
+
];
|
|
309
|
+
libnut.scrollMouse(dx, dy);
|
|
310
|
+
await sleep(SCROLL_COMPLETE_DELAY);
|
|
311
|
+
return;
|
|
312
|
+
}
|
|
313
|
+
throw new Error(`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`);
|
|
314
|
+
}),
|
|
315
|
+
defineActionKeyboardPress(async (param)=>{
|
|
316
|
+
node_assert(libnut, 'libnut not initialized');
|
|
317
|
+
if (param.locate) {
|
|
318
|
+
const [x, y] = param.locate.center;
|
|
319
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
320
|
+
libnut.mouseClick('left');
|
|
321
|
+
await sleep(50);
|
|
322
|
+
}
|
|
323
|
+
const keys = param.keyName.split('+');
|
|
324
|
+
const modifiers = keys.slice(0, -1).map(normalizeKeyName);
|
|
325
|
+
const key = normalizePrimaryKey(keys[keys.length - 1]);
|
|
326
|
+
debugDevice('KeyboardPress', {
|
|
327
|
+
original: param.keyName,
|
|
328
|
+
key,
|
|
329
|
+
modifiers
|
|
330
|
+
});
|
|
331
|
+
if (modifiers.length > 0) libnut.keyTap(key, modifiers);
|
|
332
|
+
else libnut.keyTap(key);
|
|
333
|
+
}),
|
|
334
|
+
defineActionDragAndDrop(async (param)=>{
|
|
335
|
+
node_assert(libnut, 'libnut not initialized');
|
|
336
|
+
const from = param.from;
|
|
337
|
+
const to = param.to;
|
|
338
|
+
node_assert(from, 'missing "from" param for drag and drop');
|
|
339
|
+
node_assert(to, 'missing "to" param for drag and drop');
|
|
340
|
+
const [fromX, fromY] = from.center;
|
|
341
|
+
const [toX, toY] = to.center;
|
|
342
|
+
libnut.moveMouse(Math.round(fromX), Math.round(fromY));
|
|
343
|
+
libnut.mouseToggle('down', 'left');
|
|
344
|
+
await sleep(100);
|
|
345
|
+
libnut.moveMouse(Math.round(toX), Math.round(toY));
|
|
346
|
+
await sleep(100);
|
|
347
|
+
libnut.mouseToggle('up', 'left');
|
|
348
|
+
}),
|
|
349
|
+
defineActionClearInput(async (param)=>{
|
|
350
|
+
node_assert(libnut, 'libnut not initialized');
|
|
351
|
+
const element = param.locate;
|
|
352
|
+
node_assert(element, 'Element not found, cannot clear input');
|
|
353
|
+
const [x, y] = element.center;
|
|
354
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
355
|
+
libnut.mouseClick('left');
|
|
356
|
+
await sleep(100);
|
|
357
|
+
const modifier = 'darwin' === process.platform ? 'command' : 'control';
|
|
358
|
+
libnut.keyTap('a', [
|
|
359
|
+
modifier
|
|
360
|
+
]);
|
|
361
|
+
libnut.keyTap('backspace');
|
|
362
|
+
await sleep(50);
|
|
363
|
+
})
|
|
364
|
+
];
|
|
365
|
+
const platformActions = Object.values(createPlatformActions());
|
|
366
|
+
const customActions = this.options?.customActions || [];
|
|
367
|
+
return [
|
|
368
|
+
...defaultActions,
|
|
369
|
+
...platformActions,
|
|
370
|
+
...customActions
|
|
371
|
+
];
|
|
372
|
+
}
|
|
373
|
+
async destroy() {
|
|
374
|
+
if (this.destroyed) return;
|
|
375
|
+
this.destroyed = true;
|
|
376
|
+
debugDevice('Computer device destroyed');
|
|
377
|
+
}
|
|
378
|
+
async url() {
|
|
379
|
+
return '';
|
|
380
|
+
}
|
|
381
|
+
constructor(options){
|
|
382
|
+
_define_property(this, "interfaceType", 'computer');
|
|
383
|
+
_define_property(this, "options", void 0);
|
|
384
|
+
_define_property(this, "displayId", void 0);
|
|
385
|
+
_define_property(this, "description", void 0);
|
|
386
|
+
_define_property(this, "destroyed", false);
|
|
387
|
+
_define_property(this, "uri", void 0);
|
|
388
|
+
this.options = options;
|
|
389
|
+
this.displayId = options?.displayId;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
function createPlatformActions() {
|
|
393
|
+
return {
|
|
394
|
+
ListDisplays: defineAction({
|
|
395
|
+
name: 'ListDisplays',
|
|
396
|
+
description: 'List all available displays/monitors',
|
|
397
|
+
call: async ()=>await ComputerDevice.listDisplays()
|
|
398
|
+
})
|
|
399
|
+
};
|
|
400
|
+
}
|
|
401
|
+
class ComputerAgent extends Agent {
|
|
402
|
+
}
|
|
403
|
+
async function agentFromComputer(opts) {
|
|
404
|
+
const device = new ComputerDevice(opts || {});
|
|
405
|
+
await device.connect();
|
|
406
|
+
return new ComputerAgent(device, opts);
|
|
407
|
+
}
|
|
408
|
+
const debug = getDebug('mcp:computer-tools');
|
|
409
|
+
class ComputerMidsceneTools extends BaseMidsceneTools {
|
|
410
|
+
createTemporaryDevice() {
|
|
411
|
+
return new ComputerDevice({});
|
|
412
|
+
}
|
|
413
|
+
async ensureAgent(displayId) {
|
|
414
|
+
if (this.agent && displayId) {
|
|
415
|
+
try {
|
|
416
|
+
await this.agent.destroy?.();
|
|
417
|
+
} catch (error) {
|
|
418
|
+
debug('Failed to destroy agent during cleanup:', error);
|
|
419
|
+
}
|
|
420
|
+
this.agent = void 0;
|
|
421
|
+
}
|
|
422
|
+
if (this.agent) return this.agent;
|
|
423
|
+
debug('Creating Computer agent with displayId:', displayId || 'primary');
|
|
424
|
+
const opts = displayId ? {
|
|
425
|
+
displayId
|
|
426
|
+
} : void 0;
|
|
427
|
+
const agent = await agentFromComputer(opts);
|
|
428
|
+
this.agent = agent;
|
|
429
|
+
return agent;
|
|
430
|
+
}
|
|
431
|
+
preparePlatformTools() {
|
|
432
|
+
return [
|
|
433
|
+
{
|
|
434
|
+
name: 'computer_connect',
|
|
435
|
+
description: 'Connect to computer desktop. If displayId not provided, uses the primary display.',
|
|
436
|
+
schema: {
|
|
437
|
+
displayId: z.string().optional().describe('Display ID (from list_displays)')
|
|
438
|
+
},
|
|
439
|
+
handler: async ({ displayId })=>{
|
|
440
|
+
const agent = await this.ensureAgent(displayId);
|
|
441
|
+
const screenshot = await agent.interface.screenshotBase64();
|
|
442
|
+
return {
|
|
443
|
+
content: [
|
|
444
|
+
{
|
|
445
|
+
type: 'text',
|
|
446
|
+
text: `Connected to computer${displayId ? ` (Display: ${displayId})` : ' (Primary display)'}`
|
|
447
|
+
},
|
|
448
|
+
...this.buildScreenshotContent(screenshot)
|
|
449
|
+
]
|
|
450
|
+
};
|
|
451
|
+
}
|
|
452
|
+
},
|
|
453
|
+
{
|
|
454
|
+
name: 'computer_disconnect',
|
|
455
|
+
description: 'Disconnect from computer and release resources',
|
|
456
|
+
schema: {},
|
|
457
|
+
handler: this.createDisconnectHandler('computer')
|
|
458
|
+
},
|
|
459
|
+
{
|
|
460
|
+
name: 'computer_list_displays',
|
|
461
|
+
description: 'List all available displays/monitors',
|
|
462
|
+
schema: {},
|
|
463
|
+
handler: async ()=>{
|
|
464
|
+
const displays = await ComputerDevice.listDisplays();
|
|
465
|
+
return {
|
|
466
|
+
content: [
|
|
467
|
+
{
|
|
468
|
+
type: 'text',
|
|
469
|
+
text: `Available displays:\n${displays.map((d)=>`- ${d.name} (ID: ${d.id})${d.primary ? ' [PRIMARY]' : ''}`).join('\n')}`
|
|
470
|
+
}
|
|
471
|
+
]
|
|
472
|
+
};
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
];
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
class ComputerMCPServer extends BaseMCPServer {
|
|
479
|
+
createToolsManager() {
|
|
480
|
+
return new ComputerMidsceneTools();
|
|
481
|
+
}
|
|
482
|
+
constructor(toolsManager){
|
|
483
|
+
super({
|
|
484
|
+
name: '@midscene/computer-mcp',
|
|
485
|
+
version: __VERSION__,
|
|
486
|
+
description: 'Control the computer desktop using natural language commands'
|
|
487
|
+
}, toolsManager);
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
function mcpServerForAgent(agent) {
|
|
491
|
+
return createMCPServerLauncher({
|
|
492
|
+
agent,
|
|
493
|
+
platformName: 'Computer',
|
|
494
|
+
ToolsManagerClass: ComputerMidsceneTools,
|
|
495
|
+
MCPServerClass: ComputerMCPServer
|
|
496
|
+
});
|
|
497
|
+
}
|
|
498
|
+
async function mcpKitForAgent(agent) {
|
|
499
|
+
const toolsManager = new ComputerMidsceneTools();
|
|
500
|
+
const computerAgent = agent instanceof ComputerAgent ? agent : agent;
|
|
501
|
+
toolsManager.setAgent(computerAgent);
|
|
502
|
+
await toolsManager.initTools();
|
|
503
|
+
return {
|
|
504
|
+
description: 'Midscene MCP Kit for computer desktop automation',
|
|
505
|
+
tools: toolsManager.getToolDefinitions()
|
|
506
|
+
};
|
|
507
|
+
}
|
|
508
|
+
export { ComputerMCPServer, mcpKitForAgent, mcpServerForAgent };
|