@midscene/computer 1.2.1-beta-20260112081017.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +243 -0
- package/dist/es/index.mjs +438 -0
- package/dist/es/mcp-server.mjs +508 -0
- package/dist/lib/index.js +498 -0
- package/dist/lib/mcp-server.js +559 -0
- package/dist/types/index.d.ts +70 -0
- package/dist/types/mcp-server.d.ts +88 -0
- package/package.json +48 -0
- package/rslib.config.ts +26 -0
- package/src/agent.ts +17 -0
- package/src/device.ts +554 -0
- package/src/index.ts +8 -0
- package/src/mcp-server.ts +65 -0
- package/src/mcp-tools.ts +96 -0
- package/src/types/libnut.d.ts +36 -0
- package/src/utils.ts +51 -0
- package/tests/ai/ai-auto-todo.test.ts +85 -0
- package/tests/ai/ai-shop.test.ts +56 -0
- package/tests/ai/basic.test.ts +46 -0
- package/tests/ai/keyboard.test.ts +66 -0
- package/tests/ai/multi-display.test.ts +76 -0
- package/tests/ai/test-utils.ts +31 -0
- package/tests/ai/web-browser.test.ts +63 -0
- package/tests/unit-test/agent.test.ts +34 -0
- package/tests/unit-test/device.test.ts +53 -0
- package/tsconfig.json +18 -0
- package/tsconfig.tsbuildinfo +1 -0
- package/vitest.config.ts +47 -0
package/src/device.ts
ADDED
|
@@ -0,0 +1,554 @@
|
|
|
1
|
+
import assert from 'node:assert';
|
|
2
|
+
import {
|
|
3
|
+
type DeviceAction,
|
|
4
|
+
type InterfaceType,
|
|
5
|
+
type LocateResultElement,
|
|
6
|
+
type Size,
|
|
7
|
+
getMidsceneLocationSchema,
|
|
8
|
+
z,
|
|
9
|
+
} from '@midscene/core';
|
|
10
|
+
import {
|
|
11
|
+
type AbstractInterface,
|
|
12
|
+
type ActionTapParam,
|
|
13
|
+
defineAction,
|
|
14
|
+
defineActionClearInput,
|
|
15
|
+
defineActionDoubleClick,
|
|
16
|
+
defineActionDragAndDrop,
|
|
17
|
+
defineActionHover,
|
|
18
|
+
defineActionKeyboardPress,
|
|
19
|
+
defineActionRightClick,
|
|
20
|
+
defineActionScroll,
|
|
21
|
+
defineActionTap,
|
|
22
|
+
} from '@midscene/core/device';
|
|
23
|
+
import { sleep } from '@midscene/core/utils';
|
|
24
|
+
import { createImgBase64ByFormat } from '@midscene/shared/img';
|
|
25
|
+
import { getDebug } from '@midscene/shared/logger';
|
|
26
|
+
import screenshot from 'screenshot-desktop';
|
|
27
|
+
|
|
28
|
+
// Type definitions
|
|
29
|
+
interface LibNut {
|
|
30
|
+
getScreenSize(): { width: number; height: number };
|
|
31
|
+
getMousePos(): { x: number; y: number };
|
|
32
|
+
moveMouse(x: number, y: number): void;
|
|
33
|
+
mouseClick(button?: 'left' | 'right' | 'middle', double?: boolean): void;
|
|
34
|
+
mouseToggle(state: 'up' | 'down', button?: 'left' | 'right' | 'middle'): void;
|
|
35
|
+
scrollMouse(x: number, y: number): void;
|
|
36
|
+
keyTap(key: string, modifiers?: string[]): void;
|
|
37
|
+
typeString(text: string): void;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
interface ScreenshotOptions {
|
|
41
|
+
format: 'png' | 'jpg';
|
|
42
|
+
screen?: string | number;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
interface ScreenshotDisplay {
|
|
46
|
+
id: string | number;
|
|
47
|
+
name?: string;
|
|
48
|
+
primary?: boolean;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Constants
|
|
52
|
+
const SMOOTH_MOVE_STEPS_TAP = 8;
|
|
53
|
+
const SMOOTH_MOVE_STEPS_HOVER = 10;
|
|
54
|
+
const SMOOTH_MOVE_DELAY_TAP = 8;
|
|
55
|
+
const SMOOTH_MOVE_DELAY_HOVER = 10;
|
|
56
|
+
const HOVER_EFFECT_WAIT = 300;
|
|
57
|
+
const CLICK_HOLD_DURATION = 50;
|
|
58
|
+
const INPUT_FOCUS_DELAY = 300;
|
|
59
|
+
const INPUT_CLEAR_DELAY = 150;
|
|
60
|
+
const SCROLL_REPEAT_COUNT = 10;
|
|
61
|
+
const SCROLL_STEP_DELAY = 100;
|
|
62
|
+
const SCROLL_COMPLETE_DELAY = 500;
|
|
63
|
+
|
|
64
|
+
// Lazy load libnut with fallback
|
|
65
|
+
let libnut: LibNut | null = null;
|
|
66
|
+
let libnutLoadError: Error | null = null;
|
|
67
|
+
|
|
68
|
+
async function getLibnut(): Promise<LibNut> {
|
|
69
|
+
if (libnut) return libnut;
|
|
70
|
+
if (libnutLoadError) throw libnutLoadError;
|
|
71
|
+
|
|
72
|
+
try {
|
|
73
|
+
const libnutModule = await import(
|
|
74
|
+
'@computer-use/libnut/dist/import_libnut'
|
|
75
|
+
);
|
|
76
|
+
libnut = libnutModule.libnut as LibNut;
|
|
77
|
+
if (!libnut) {
|
|
78
|
+
throw new Error('libnut module loaded but libnut object is undefined');
|
|
79
|
+
}
|
|
80
|
+
return libnut;
|
|
81
|
+
} catch (error) {
|
|
82
|
+
libnutLoadError = error as Error;
|
|
83
|
+
throw new Error(
|
|
84
|
+
`Failed to load @computer-use/libnut. Make sure it is properly installed and compiled for your platform. Error: ${error}`,
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const debugDevice = getDebug('computer:device');
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Smooth mouse movement to trigger mousemove events
|
|
93
|
+
*/
|
|
94
|
+
async function smoothMoveMouse(
|
|
95
|
+
targetX: number,
|
|
96
|
+
targetY: number,
|
|
97
|
+
steps: number,
|
|
98
|
+
stepDelay: number,
|
|
99
|
+
): Promise<void> {
|
|
100
|
+
assert(libnut, 'libnut not initialized');
|
|
101
|
+
const currentPos = libnut.getMousePos();
|
|
102
|
+
for (let i = 1; i <= steps; i++) {
|
|
103
|
+
const stepX = Math.round(
|
|
104
|
+
currentPos.x + ((targetX - currentPos.x) * i) / steps,
|
|
105
|
+
);
|
|
106
|
+
const stepY = Math.round(
|
|
107
|
+
currentPos.y + ((targetY - currentPos.y) * i) / steps,
|
|
108
|
+
);
|
|
109
|
+
libnut.moveMouse(stepX, stepY);
|
|
110
|
+
await sleep(stepDelay);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Key name mapping for cross-platform compatibility
|
|
115
|
+
// Note: Modifier keys have different names when used as primary key vs modifier
|
|
116
|
+
const KEY_NAME_MAP: Record<string, string> = {
|
|
117
|
+
// Modifier keys (for use in modifiers array)
|
|
118
|
+
windows: 'win',
|
|
119
|
+
win: 'win',
|
|
120
|
+
ctrl: 'control',
|
|
121
|
+
esc: 'escape',
|
|
122
|
+
del: 'delete',
|
|
123
|
+
ins: 'insert',
|
|
124
|
+
// Navigation keys
|
|
125
|
+
pgup: 'pageup',
|
|
126
|
+
pgdn: 'pagedown',
|
|
127
|
+
arrowup: 'up',
|
|
128
|
+
arrowdown: 'down',
|
|
129
|
+
arrowleft: 'left',
|
|
130
|
+
arrowright: 'right',
|
|
131
|
+
// Media keys
|
|
132
|
+
volumedown: 'audio_vol_down',
|
|
133
|
+
volumeup: 'audio_vol_up',
|
|
134
|
+
mediavolumedown: 'audio_vol_down',
|
|
135
|
+
mediavolumeup: 'audio_vol_up',
|
|
136
|
+
mute: 'audio_mute',
|
|
137
|
+
mediamute: 'audio_mute',
|
|
138
|
+
mediaplay: 'audio_play',
|
|
139
|
+
mediapause: 'audio_pause',
|
|
140
|
+
mediaplaypause: 'audio_play',
|
|
141
|
+
mediastop: 'audio_stop',
|
|
142
|
+
medianexttrack: 'audio_next',
|
|
143
|
+
mediaprevioustrack: 'audio_prev',
|
|
144
|
+
medianext: 'audio_next',
|
|
145
|
+
mediaprev: 'audio_prev',
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
// When pressing modifier keys alone (as primary key), use these names
|
|
149
|
+
// This is needed because libnut requires different key names for modifiers
|
|
150
|
+
// when they are the main key vs when they are in the modifiers array
|
|
151
|
+
const PRIMARY_KEY_MAP: Record<string, string> = {
|
|
152
|
+
command: 'cmd',
|
|
153
|
+
cmd: 'cmd',
|
|
154
|
+
meta: 'meta',
|
|
155
|
+
control: 'control',
|
|
156
|
+
ctrl: 'control',
|
|
157
|
+
shift: 'shift',
|
|
158
|
+
alt: 'alt',
|
|
159
|
+
option: 'alt',
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
function normalizeKeyName(key: string): string {
|
|
163
|
+
const lowerKey = key.toLowerCase();
|
|
164
|
+
return KEY_NAME_MAP[lowerKey] || lowerKey;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function normalizePrimaryKey(key: string): string {
|
|
168
|
+
const lowerKey = key.toLowerCase();
|
|
169
|
+
// First check PRIMARY_KEY_MAP for modifier keys pressed alone
|
|
170
|
+
if (PRIMARY_KEY_MAP[lowerKey]) {
|
|
171
|
+
return PRIMARY_KEY_MAP[lowerKey];
|
|
172
|
+
}
|
|
173
|
+
// Then use regular KEY_NAME_MAP
|
|
174
|
+
return KEY_NAME_MAP[lowerKey] || lowerKey;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
export interface DisplayInfo {
|
|
178
|
+
id: string;
|
|
179
|
+
name: string;
|
|
180
|
+
primary?: boolean;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
export interface ComputerDeviceOpt {
|
|
184
|
+
displayId?: string;
|
|
185
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
186
|
+
customActions?: DeviceAction<any>[];
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
export class ComputerDevice implements AbstractInterface {
|
|
190
|
+
interfaceType: InterfaceType = 'computer';
|
|
191
|
+
private options?: ComputerDeviceOpt;
|
|
192
|
+
private displayId?: string;
|
|
193
|
+
private description?: string;
|
|
194
|
+
private destroyed = false;
|
|
195
|
+
uri?: string;
|
|
196
|
+
|
|
197
|
+
constructor(options?: ComputerDeviceOpt) {
|
|
198
|
+
this.options = options;
|
|
199
|
+
this.displayId = options?.displayId;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
describe(): string {
|
|
203
|
+
return this.description || 'Computer Device';
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Get all available displays
|
|
208
|
+
*/
|
|
209
|
+
static async listDisplays(): Promise<DisplayInfo[]> {
|
|
210
|
+
try {
|
|
211
|
+
const displays: ScreenshotDisplay[] = await screenshot.listDisplays();
|
|
212
|
+
return displays.map((d) => ({
|
|
213
|
+
id: String(d.id),
|
|
214
|
+
name: d.name || `Display ${d.id}`,
|
|
215
|
+
primary: d.primary || false,
|
|
216
|
+
}));
|
|
217
|
+
} catch (error) {
|
|
218
|
+
debugDevice(`Failed to list displays: ${error}`);
|
|
219
|
+
return [];
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
async connect(): Promise<void> {
|
|
224
|
+
debugDevice('Connecting to computer device');
|
|
225
|
+
|
|
226
|
+
try {
|
|
227
|
+
// Load libnut on first connect
|
|
228
|
+
libnut = await getLibnut();
|
|
229
|
+
|
|
230
|
+
const size = await this.size();
|
|
231
|
+
const displays = await ComputerDevice.listDisplays();
|
|
232
|
+
|
|
233
|
+
this.description = `
|
|
234
|
+
Type: Computer
|
|
235
|
+
Platform: ${process.platform}
|
|
236
|
+
Display: ${this.displayId || 'Primary'}
|
|
237
|
+
Screen Size: ${size.width}x${size.height}
|
|
238
|
+
Available Displays: ${displays.length > 0 ? displays.map((d) => d.name).join(', ') : 'Unknown'}
|
|
239
|
+
`;
|
|
240
|
+
debugDevice('Computer device connected', this.description);
|
|
241
|
+
} catch (error) {
|
|
242
|
+
debugDevice(`Failed to connect: ${error}`);
|
|
243
|
+
throw new Error(`Unable to connect to computer device: ${error}`);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
async screenshotBase64(): Promise<string> {
|
|
248
|
+
debugDevice('Taking screenshot', { displayId: this.displayId });
|
|
249
|
+
|
|
250
|
+
try {
|
|
251
|
+
const options: ScreenshotOptions = { format: 'png' };
|
|
252
|
+
if (this.displayId !== undefined) {
|
|
253
|
+
// On macOS: displayId is numeric (CGDirectDisplayID)
|
|
254
|
+
// On Windows: displayId is string like "\\.\DISPLAY1"
|
|
255
|
+
// On Linux: displayId is string like ":0.0"
|
|
256
|
+
if (process.platform === 'darwin') {
|
|
257
|
+
const screenIndex = Number(this.displayId);
|
|
258
|
+
if (!Number.isNaN(screenIndex)) {
|
|
259
|
+
options.screen = screenIndex;
|
|
260
|
+
}
|
|
261
|
+
} else {
|
|
262
|
+
// Windows and Linux use string IDs directly
|
|
263
|
+
options.screen = this.displayId;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
debugDevice('Screenshot options', options);
|
|
268
|
+
const buffer: Buffer = await screenshot(options);
|
|
269
|
+
return createImgBase64ByFormat('png', buffer.toString('base64'));
|
|
270
|
+
} catch (error) {
|
|
271
|
+
debugDevice(`Screenshot failed: ${error}`);
|
|
272
|
+
throw new Error(`Failed to take screenshot: ${error}`);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
async size(): Promise<Size> {
|
|
277
|
+
assert(libnut, 'libnut not initialized');
|
|
278
|
+
try {
|
|
279
|
+
const screenSize = libnut.getScreenSize();
|
|
280
|
+
return {
|
|
281
|
+
width: screenSize.width,
|
|
282
|
+
height: screenSize.height,
|
|
283
|
+
dpr: 1, // Desktop typically uses logical pixels
|
|
284
|
+
};
|
|
285
|
+
} catch (error) {
|
|
286
|
+
debugDevice(`Failed to get screen size: ${error}`);
|
|
287
|
+
throw new Error(`Failed to get screen size: ${error}`);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
292
|
+
actionSpace(): DeviceAction<any>[] {
|
|
293
|
+
const defaultActions: DeviceAction<any>[] = [
|
|
294
|
+
// Tap (single click)
|
|
295
|
+
defineActionTap(async (param: ActionTapParam) => {
|
|
296
|
+
assert(libnut, 'libnut not initialized');
|
|
297
|
+
const element = param.locate as LocateResultElement;
|
|
298
|
+
assert(element, 'Element not found, cannot tap');
|
|
299
|
+
const [x, y] = element.center;
|
|
300
|
+
const targetX = Math.round(x);
|
|
301
|
+
const targetY = Math.round(y);
|
|
302
|
+
|
|
303
|
+
await smoothMoveMouse(
|
|
304
|
+
targetX,
|
|
305
|
+
targetY,
|
|
306
|
+
SMOOTH_MOVE_STEPS_TAP,
|
|
307
|
+
SMOOTH_MOVE_DELAY_TAP,
|
|
308
|
+
);
|
|
309
|
+
// Use mouseToggle for more realistic click behavior
|
|
310
|
+
libnut.mouseToggle('down', 'left');
|
|
311
|
+
await sleep(CLICK_HOLD_DURATION);
|
|
312
|
+
libnut.mouseToggle('up', 'left');
|
|
313
|
+
}),
|
|
314
|
+
|
|
315
|
+
// DoubleClick
|
|
316
|
+
defineActionDoubleClick(async (param) => {
|
|
317
|
+
assert(libnut, 'libnut not initialized');
|
|
318
|
+
const element = param.locate as LocateResultElement;
|
|
319
|
+
assert(element, 'Element not found, cannot double click');
|
|
320
|
+
const [x, y] = element.center;
|
|
321
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
322
|
+
libnut.mouseClick('left', true);
|
|
323
|
+
}),
|
|
324
|
+
|
|
325
|
+
// RightClick
|
|
326
|
+
defineActionRightClick(async (param) => {
|
|
327
|
+
assert(libnut, 'libnut not initialized');
|
|
328
|
+
const element = param.locate as LocateResultElement;
|
|
329
|
+
assert(element, 'Element not found, cannot right click');
|
|
330
|
+
const [x, y] = element.center;
|
|
331
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
332
|
+
libnut.mouseClick('right');
|
|
333
|
+
}),
|
|
334
|
+
|
|
335
|
+
// Hover
|
|
336
|
+
defineActionHover(async (param) => {
|
|
337
|
+
assert(libnut, 'libnut not initialized');
|
|
338
|
+
const element = param.locate as LocateResultElement;
|
|
339
|
+
assert(element, 'Element not found, cannot hover');
|
|
340
|
+
const [x, y] = element.center;
|
|
341
|
+
const targetX = Math.round(x);
|
|
342
|
+
const targetY = Math.round(y);
|
|
343
|
+
|
|
344
|
+
await smoothMoveMouse(
|
|
345
|
+
targetX,
|
|
346
|
+
targetY,
|
|
347
|
+
SMOOTH_MOVE_STEPS_HOVER,
|
|
348
|
+
SMOOTH_MOVE_DELAY_HOVER,
|
|
349
|
+
);
|
|
350
|
+
await sleep(HOVER_EFFECT_WAIT);
|
|
351
|
+
}),
|
|
352
|
+
|
|
353
|
+
// Input
|
|
354
|
+
defineAction({
|
|
355
|
+
name: 'Input',
|
|
356
|
+
description: 'Input text into the input field',
|
|
357
|
+
interfaceAlias: 'aiInput',
|
|
358
|
+
paramSchema: z.object({
|
|
359
|
+
value: z.string().describe('The text to input'),
|
|
360
|
+
mode: z
|
|
361
|
+
.enum(['replace', 'clear', 'append'])
|
|
362
|
+
.default('replace')
|
|
363
|
+
.optional()
|
|
364
|
+
.describe('Input mode: replace, clear, or append'),
|
|
365
|
+
locate: getMidsceneLocationSchema()
|
|
366
|
+
.describe('The input field to be filled')
|
|
367
|
+
.optional(),
|
|
368
|
+
}),
|
|
369
|
+
call: async (param) => {
|
|
370
|
+
assert(libnut, 'libnut not initialized');
|
|
371
|
+
const element = param.locate as LocateResultElement | undefined;
|
|
372
|
+
|
|
373
|
+
if (element && param.mode !== 'append') {
|
|
374
|
+
// Click and clear
|
|
375
|
+
const [x, y] = element.center;
|
|
376
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
377
|
+
libnut.mouseClick('left');
|
|
378
|
+
await sleep(INPUT_FOCUS_DELAY);
|
|
379
|
+
|
|
380
|
+
// Select all and delete
|
|
381
|
+
const modifier =
|
|
382
|
+
process.platform === 'darwin' ? 'command' : 'control';
|
|
383
|
+
libnut.keyTap('a', [modifier]);
|
|
384
|
+
await sleep(50);
|
|
385
|
+
libnut.keyTap('backspace');
|
|
386
|
+
await sleep(INPUT_CLEAR_DELAY);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
if (param.mode === 'clear') {
|
|
390
|
+
return;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
if (!param.value) {
|
|
394
|
+
return;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
libnut.typeString(param.value);
|
|
398
|
+
},
|
|
399
|
+
}),
|
|
400
|
+
|
|
401
|
+
// Scroll
|
|
402
|
+
defineActionScroll(async (param) => {
|
|
403
|
+
assert(libnut, 'libnut not initialized');
|
|
404
|
+
|
|
405
|
+
if (param.locate) {
|
|
406
|
+
const element = param.locate as LocateResultElement;
|
|
407
|
+
const [x, y] = element.center;
|
|
408
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
const scrollType = param?.scrollType;
|
|
412
|
+
|
|
413
|
+
// Scroll to edge actions
|
|
414
|
+
const scrollToEdgeActions: Record<string, [number, number]> = {
|
|
415
|
+
scrollToTop: [0, 10],
|
|
416
|
+
scrollToBottom: [0, -10],
|
|
417
|
+
scrollToLeft: [-10, 0],
|
|
418
|
+
scrollToRight: [10, 0],
|
|
419
|
+
};
|
|
420
|
+
|
|
421
|
+
const edgeAction = scrollToEdgeActions[scrollType || ''];
|
|
422
|
+
if (edgeAction) {
|
|
423
|
+
const [dx, dy] = edgeAction;
|
|
424
|
+
for (let i = 0; i < SCROLL_REPEAT_COUNT; i++) {
|
|
425
|
+
libnut.scrollMouse(dx, dy);
|
|
426
|
+
await sleep(SCROLL_STEP_DELAY);
|
|
427
|
+
}
|
|
428
|
+
return;
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// Single scroll action
|
|
432
|
+
if (scrollType === 'singleAction' || !scrollType) {
|
|
433
|
+
const distance = param?.distance || 500;
|
|
434
|
+
const ticks = Math.ceil(distance / 100);
|
|
435
|
+
const direction = param?.direction || 'down';
|
|
436
|
+
|
|
437
|
+
const directionMap: Record<string, [number, number]> = {
|
|
438
|
+
up: [0, ticks],
|
|
439
|
+
down: [0, -ticks],
|
|
440
|
+
left: [-ticks, 0],
|
|
441
|
+
right: [ticks, 0],
|
|
442
|
+
};
|
|
443
|
+
|
|
444
|
+
const [dx, dy] = directionMap[direction] || [0, -ticks];
|
|
445
|
+
libnut.scrollMouse(dx, dy);
|
|
446
|
+
await sleep(SCROLL_COMPLETE_DELAY);
|
|
447
|
+
return;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
throw new Error(
|
|
451
|
+
`Unknown scroll type: ${scrollType}, param: ${JSON.stringify(param)}`,
|
|
452
|
+
);
|
|
453
|
+
}),
|
|
454
|
+
|
|
455
|
+
// KeyboardPress
|
|
456
|
+
defineActionKeyboardPress(async (param) => {
|
|
457
|
+
assert(libnut, 'libnut not initialized');
|
|
458
|
+
|
|
459
|
+
if (param.locate) {
|
|
460
|
+
const [x, y] = param.locate.center;
|
|
461
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
462
|
+
libnut.mouseClick('left');
|
|
463
|
+
await sleep(50);
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
const keys = param.keyName.split('+');
|
|
467
|
+
const modifiers = keys.slice(0, -1).map(normalizeKeyName);
|
|
468
|
+
// Use normalizePrimaryKey for the main key to handle modifier keys pressed alone
|
|
469
|
+
const key = normalizePrimaryKey(keys[keys.length - 1]);
|
|
470
|
+
|
|
471
|
+
debugDevice('KeyboardPress', {
|
|
472
|
+
original: param.keyName,
|
|
473
|
+
key,
|
|
474
|
+
modifiers,
|
|
475
|
+
});
|
|
476
|
+
|
|
477
|
+
if (modifiers.length > 0) {
|
|
478
|
+
libnut.keyTap(key, modifiers);
|
|
479
|
+
} else {
|
|
480
|
+
libnut.keyTap(key);
|
|
481
|
+
}
|
|
482
|
+
}),
|
|
483
|
+
|
|
484
|
+
// DragAndDrop
|
|
485
|
+
defineActionDragAndDrop(async (param) => {
|
|
486
|
+
assert(libnut, 'libnut not initialized');
|
|
487
|
+
const from = param.from as LocateResultElement;
|
|
488
|
+
const to = param.to as LocateResultElement;
|
|
489
|
+
assert(from, 'missing "from" param for drag and drop');
|
|
490
|
+
assert(to, 'missing "to" param for drag and drop');
|
|
491
|
+
|
|
492
|
+
const [fromX, fromY] = from.center;
|
|
493
|
+
const [toX, toY] = to.center;
|
|
494
|
+
|
|
495
|
+
libnut.moveMouse(Math.round(fromX), Math.round(fromY));
|
|
496
|
+
libnut.mouseToggle('down', 'left');
|
|
497
|
+
await sleep(100);
|
|
498
|
+
libnut.moveMouse(Math.round(toX), Math.round(toY));
|
|
499
|
+
await sleep(100);
|
|
500
|
+
libnut.mouseToggle('up', 'left');
|
|
501
|
+
}),
|
|
502
|
+
|
|
503
|
+
// ClearInput
|
|
504
|
+
defineActionClearInput(async (param) => {
|
|
505
|
+
assert(libnut, 'libnut not initialized');
|
|
506
|
+
const element = param.locate as LocateResultElement;
|
|
507
|
+
assert(element, 'Element not found, cannot clear input');
|
|
508
|
+
|
|
509
|
+
const [x, y] = element.center;
|
|
510
|
+
libnut.moveMouse(Math.round(x), Math.round(y));
|
|
511
|
+
libnut.mouseClick('left');
|
|
512
|
+
await sleep(100);
|
|
513
|
+
|
|
514
|
+
const modifier = process.platform === 'darwin' ? 'command' : 'control';
|
|
515
|
+
libnut.keyTap('a', [modifier]);
|
|
516
|
+
libnut.keyTap('backspace');
|
|
517
|
+
await sleep(50);
|
|
518
|
+
}),
|
|
519
|
+
];
|
|
520
|
+
|
|
521
|
+
const platformActions = Object.values(createPlatformActions());
|
|
522
|
+
const customActions = this.options?.customActions || [];
|
|
523
|
+
|
|
524
|
+
return [...defaultActions, ...platformActions, ...customActions];
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
async destroy(): Promise<void> {
|
|
528
|
+
if (this.destroyed) {
|
|
529
|
+
return;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
this.destroyed = true;
|
|
533
|
+
debugDevice('Computer device destroyed');
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
async url(): Promise<string> {
|
|
537
|
+
return '';
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
/**
|
|
542
|
+
* Platform-specific actions
|
|
543
|
+
*/
|
|
544
|
+
function createPlatformActions() {
|
|
545
|
+
return {
|
|
546
|
+
ListDisplays: defineAction({
|
|
547
|
+
name: 'ListDisplays',
|
|
548
|
+
description: 'List all available displays/monitors',
|
|
549
|
+
call: async () => {
|
|
550
|
+
return await ComputerDevice.listDisplays();
|
|
551
|
+
},
|
|
552
|
+
}),
|
|
553
|
+
} as const;
|
|
554
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export { ComputerDevice } from './device';
|
|
2
|
+
export type { ComputerDeviceOpt, DisplayInfo } from './device';
|
|
3
|
+
|
|
4
|
+
export { ComputerAgent, agentFromComputer } from './agent';
|
|
5
|
+
export type { ComputerAgentOpt } from './agent';
|
|
6
|
+
|
|
7
|
+
export { overrideAIConfig } from '@midscene/shared/env';
|
|
8
|
+
export { checkComputerEnvironment, getConnectedDisplays } from './utils';
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import type { Agent } from '@midscene/core/agent';
|
|
2
|
+
import {
|
|
3
|
+
BaseMCPServer,
|
|
4
|
+
type Tool,
|
|
5
|
+
createMCPServerLauncher,
|
|
6
|
+
} from '@midscene/shared/mcp';
|
|
7
|
+
import { ComputerAgent } from './agent';
|
|
8
|
+
import { ComputerMidsceneTools } from './mcp-tools.js';
|
|
9
|
+
|
|
10
|
+
declare const __VERSION__: string;
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Computer MCP Server
|
|
14
|
+
* Provides MCP tools for computer desktop automation
|
|
15
|
+
*/
|
|
16
|
+
export class ComputerMCPServer extends BaseMCPServer {
|
|
17
|
+
constructor(toolsManager?: ComputerMidsceneTools) {
|
|
18
|
+
super(
|
|
19
|
+
{
|
|
20
|
+
name: '@midscene/computer-mcp',
|
|
21
|
+
version: __VERSION__,
|
|
22
|
+
description:
|
|
23
|
+
'Control the computer desktop using natural language commands',
|
|
24
|
+
},
|
|
25
|
+
toolsManager,
|
|
26
|
+
);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
protected createToolsManager(): ComputerMidsceneTools {
|
|
30
|
+
return new ComputerMidsceneTools();
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Create an MCP server launcher for a specific Computer Agent
|
|
36
|
+
*/
|
|
37
|
+
export function mcpServerForAgent(agent: Agent | ComputerAgent) {
|
|
38
|
+
return createMCPServerLauncher({
|
|
39
|
+
agent,
|
|
40
|
+
platformName: 'Computer',
|
|
41
|
+
ToolsManagerClass: ComputerMidsceneTools,
|
|
42
|
+
MCPServerClass: ComputerMCPServer,
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Create MCP kit for a specific Computer Agent
|
|
48
|
+
*/
|
|
49
|
+
export async function mcpKitForAgent(agent: Agent | ComputerAgent): Promise<{
|
|
50
|
+
description: string;
|
|
51
|
+
tools: Tool[];
|
|
52
|
+
}> {
|
|
53
|
+
const toolsManager = new ComputerMidsceneTools();
|
|
54
|
+
|
|
55
|
+
// Convert Agent to ComputerAgent if needed
|
|
56
|
+
const computerAgent =
|
|
57
|
+
agent instanceof ComputerAgent ? agent : (agent as ComputerAgent);
|
|
58
|
+
toolsManager.setAgent(computerAgent);
|
|
59
|
+
await toolsManager.initTools();
|
|
60
|
+
|
|
61
|
+
return {
|
|
62
|
+
description: 'Midscene MCP Kit for computer desktop automation',
|
|
63
|
+
tools: toolsManager.getToolDefinitions(),
|
|
64
|
+
};
|
|
65
|
+
}
|