ucu-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,425 @@
1
+ /**
2
+ * Cross-platform input synthesis for UCU-MCP.
3
+ *
4
+ * macOS: Uses CGEvent API exclusively for BACKGROUND input injection.
5
+ * This does NOT activate windows or steal focus — the AI agent can
6
+ * control the desktop while the user continues working in another
7
+ * terminal/window without interruption.
8
+ *
9
+ * Windows: Uses SendInput (stub).
10
+ * Linux: Uses xdotool (stub).
11
+ */
12
+ import { execFile } from "node:child_process";
13
+ import { promisify } from "node:util";
14
+ import { logger } from "../util/logger.js";
15
+ const execFileAsync = promisify(execFile);
16
+ // ── Dry-run mode ──────────────────────────────────────────────────────────
17
+ const isDryRun = () => process.env.UCU_DRY_RUN === "true";
18
+ function logDryRun(action, details) {
19
+ logger.info(`[DRY RUN] Would ${action}`, details);
20
+ }
21
+ // ── macOS key code map ────────────────────────────────────────────────────
22
+ const MAC_KEY_CODES = {
23
+ enter: 36, return: 36,
24
+ tab: 48,
25
+ escape: 53, esc: 53,
26
+ backspace: 51, delete: 51,
27
+ forwarddelete: 117, fn_delete: 117,
28
+ space: 49,
29
+ up: 126, down: 125, left: 123, right: 124,
30
+ home: 115, end: 119,
31
+ pageup: 116, pagedown: 121,
32
+ f1: 122, f2: 120, f3: 99, f4: 118, f5: 96, f6: 97,
33
+ f7: 98, f8: 100, f9: 101, f10: 109, f11: 103, f12: 111,
34
+ capslock: 57,
35
+ };
36
+ const MAC_MODIFIER_FLAGS = {
37
+ cmd: 0x00100000, command: 0x00100000,
38
+ shift: 0x00020000,
39
+ option: 0x00080000, alt: 0x00080000,
40
+ control: 0x00040000, ctrl: 0x00040000,
41
+ };
42
+ // ── AppleScript string escaping ───────────────────────────────────────────
43
+ function escapeAppleScriptString(str) {
44
+ return str
45
+ .replace(/\\/g, "\\\\")
46
+ .replace(/"/g, '\\"')
47
+ .replace(/\n/g, "\\n")
48
+ .replace(/\r/g, "\\r")
49
+ .replace(/\t/g, "\\t")
50
+ .replace(/[\x00-\x1f\x7f-\x9f]/g, "");
51
+ }
52
+ // ── JXA runner helper ─────────────────────────────────────────────────────
53
+ async function runJXA(script, timeout = 5000) {
54
+ const { stdout } = await execFileAsync("/usr/bin/osascript", [
55
+ "-l", "JavaScript", "-e", script,
56
+ ], { timeout });
57
+ return stdout.trim();
58
+ }
59
+ // ── Mouse operations (CGEvent — background, no focus steal) ───────────────
60
+ export async function click(x, y, button = "left", _platform = process.platform) {
61
+ if (isDryRun()) {
62
+ logDryRun("click", { x, y, button });
63
+ return;
64
+ }
65
+ if (_platform === "darwin") {
66
+ const btnType = { left: 0, right: 1, middle: 2 }[button];
67
+ await runJXA(`
68
+ ObjC.import('CoreGraphics');
69
+ var loc = $.CGPointMake(${x}, ${y});
70
+ var down = $.CGEventCreateMouseEvent(null, ${1 + btnType * 2}, loc, ${btnType});
71
+ var up = $.CGEventCreateMouseEvent(null, ${2 + btnType * 2}, loc, ${btnType});
72
+ $.CGEventPost(0, down);
73
+ $.CGEventPost(0, up);
74
+ $.CFRelease(down);
75
+ $.CFRelease(up);
76
+ `);
77
+ return;
78
+ }
79
+ if (_platform === "linux") {
80
+ const btnFlag = { left: "1", right: "3", middle: "2" }[button];
81
+ await execFileAsync("xdotool", ["mousemove", String(x), String(y)]);
82
+ await execFileAsync("xdotool", ["click", btnFlag]);
83
+ return;
84
+ }
85
+ // Windows
86
+ throw new Error("click not implemented for Windows");
87
+ }
88
+ export async function doubleClick(x, y, button = "left", _platform = process.platform) {
89
+ if (isDryRun()) {
90
+ logDryRun("doubleClick", { x, y, button });
91
+ return;
92
+ }
93
+ if (_platform === "darwin") {
94
+ const btnType = { left: 0, right: 1, middle: 2 }[button];
95
+ await runJXA(`
96
+ ObjC.import('CoreGraphics');
97
+ var loc = $.CGPointMake(${x}, ${y});
98
+ var down1 = $.CGEventCreateMouseEvent(null, ${1 + btnType * 2}, loc, ${btnType});
99
+ $.CGEventSetIntegerValueField(down1, 1, 1);
100
+ var up1 = $.CGEventCreateMouseEvent(null, ${2 + btnType * 2}, loc, ${btnType});
101
+ $.CGEventSetIntegerValueField(up1, 1, 1);
102
+ var down2 = $.CGEventCreateMouseEvent(null, ${1 + btnType * 2}, loc, ${btnType});
103
+ $.CGEventSetIntegerValueField(down2, 1, 2);
104
+ var up2 = $.CGEventCreateMouseEvent(null, ${2 + btnType * 2}, loc, ${btnType});
105
+ $.CGEventSetIntegerValueField(up2, 1, 2);
106
+ $.CGEventPost(0, down1);
107
+ $.CGEventPost(0, up1);
108
+ $.CGEventPost(0, down2);
109
+ $.CGEventPost(0, up2);
110
+ $.CFRelease(down1);
111
+ $.CFRelease(up1);
112
+ $.CFRelease(down2);
113
+ $.CFRelease(up2);
114
+ `);
115
+ return;
116
+ }
117
+ // Fallback: two clicks
118
+ await click(x, y, button, _platform);
119
+ await new Promise(r => setTimeout(r, 50));
120
+ await click(x, y, button, _platform);
121
+ }
122
+ export async function move(x, y, _platform = process.platform) {
123
+ if (isDryRun()) {
124
+ logDryRun("move", { x, y });
125
+ return;
126
+ }
127
+ if (_platform === "darwin") {
128
+ await runJXA(`
129
+ ObjC.import('CoreGraphics');
130
+ var loc = $.CGPointMake(${x}, ${y});
131
+ var ev = $.CGEventCreateMouseEvent(null, 5, loc, 0);
132
+ $.CGEventPost(0, ev);
133
+ $.CFRelease(ev);
134
+ `);
135
+ return;
136
+ }
137
+ if (_platform === "linux") {
138
+ await execFileAsync("xdotool", ["mousemove", String(x), String(y)]);
139
+ return;
140
+ }
141
+ throw new Error("move not implemented for Windows");
142
+ }
143
+ export async function drag(fromX, fromY, toX, toY, button = "left", duration = 300, _platform = process.platform) {
144
+ if (isDryRun()) {
145
+ logDryRun("drag", { fromX, fromY, toX, toY, button, duration });
146
+ return;
147
+ }
148
+ if (_platform === "darwin") {
149
+ const btnType = { left: 0, right: 1, middle: 2 }[button];
150
+ const steps = Math.max(2, Math.min(60, Math.ceil(duration / 16)));
151
+ const delayMicros = Math.max(0, Math.floor((duration * 1000) / steps));
152
+ await runJXA(`
153
+ ObjC.import('CoreGraphics');
154
+ ObjC.import('stdlib');
155
+ var from = $.CGPointMake(${fromX}, ${fromY});
156
+ var to = $.CGPointMake(${toX}, ${toY});
157
+ var down = $.CGEventCreateMouseEvent(null, ${1 + btnType * 2}, from, ${btnType});
158
+ $.CGEventPost(0, down);
159
+ $.CFRelease(down);
160
+ for (var i = 1; i <= ${steps}; i++) {
161
+ var t = i / ${steps};
162
+ var x = ${fromX} + (${toX} - ${fromX}) * t;
163
+ var y = ${fromY} + (${toY} - ${fromY}) * t;
164
+ var pt = $.CGPointMake(x, y);
165
+ var moveEv = $.CGEventCreateMouseEvent(null, 6, pt, ${btnType});
166
+ $.CGEventPost(0, moveEv);
167
+ $.CFRelease(moveEv);
168
+ if (${delayMicros} > 0 && i < ${steps}) $.usleep(${delayMicros});
169
+ }
170
+ var up = $.CGEventCreateMouseEvent(null, ${2 + btnType * 2}, to, ${btnType});
171
+ $.CGEventPost(0, up);
172
+ $.CFRelease(up);
173
+ `);
174
+ return;
175
+ }
176
+ if (_platform === "linux") {
177
+ await execFileAsync("xdotool", [
178
+ "mousemove", String(fromX), String(fromY),
179
+ "mousedown", String({ left: 1, right: 3, middle: 2 }[button]),
180
+ ]);
181
+ await execFileAsync("xdotool", ["mousemove", String(toX), String(toY)]);
182
+ await execFileAsync("xdotool", [
183
+ "mouseup", String({ left: 1, right: 3, middle: 2 }[button]),
184
+ ]);
185
+ return;
186
+ }
187
+ throw new Error("drag not implemented for Windows");
188
+ }
189
+ export async function scroll(x, y, deltaX, deltaY, _platform = process.platform) {
190
+ if (isDryRun()) {
191
+ logDryRun("scroll", { x, y, deltaX, deltaY });
192
+ return;
193
+ }
194
+ if (_platform === "darwin") {
195
+ const verticalDelta = -deltaY;
196
+ const horizontalDelta = deltaX;
197
+ await runJXA(`
198
+ ObjC.import('CoreGraphics');
199
+ var loc = $.CGPointMake(${x}, ${y});
200
+ var ev = $.CGEventCreateScrollWheelEvent(null, 1, 2, ${verticalDelta}, ${horizontalDelta});
201
+ $.CGEventPost(0, ev);
202
+ $.CFRelease(ev);
203
+ `);
204
+ return;
205
+ }
206
+ if (_platform === "linux") {
207
+ const verticalButton = deltaY < 0 ? "4" : "5";
208
+ for (let i = 0; i < Math.abs(deltaY); i++) {
209
+ await execFileAsync("xdotool", ["click", verticalButton]);
210
+ }
211
+ const horizontalButton = deltaX < 0 ? "6" : "7";
212
+ for (let i = 0; i < Math.abs(deltaX); i++) {
213
+ await execFileAsync("xdotool", ["click", horizontalButton]);
214
+ }
215
+ return;
216
+ }
217
+ throw new Error("scroll not implemented for Windows");
218
+ }
219
+ // ── Keyboard operations (CGEvent — background) ────────────────────────────
220
+ export async function typeText(text, delay = 20, _platform = process.platform) {
221
+ if (isDryRun()) {
222
+ logDryRun("typeText", { text: text.slice(0, 50), delay });
223
+ return;
224
+ }
225
+ if (!text)
226
+ return;
227
+ if (_platform === "darwin") {
228
+ // Character -> { keyCode, shift? } map for CGEvent injection
229
+ const CHAR_TO_KEY = {};
230
+ // Lowercase letters
231
+ const letterMap = {
232
+ a: 0, s: 1, d: 2, f: 3, h: 4, g: 5, z: 6, x: 7, c: 8, v: 9,
233
+ b: 11, q: 12, w: 13, e: 14, r: 15, y: 16, t: 17,
234
+ o: 31, u: 32, i: 33, p: 34, l: 37, j: 38, k: 40,
235
+ n: 45, m: 46,
236
+ };
237
+ for (const [ch, code] of Object.entries(letterMap)) {
238
+ CHAR_TO_KEY[ch] = { code };
239
+ CHAR_TO_KEY[ch.toUpperCase()] = { code, shift: true };
240
+ }
241
+ // Digits
242
+ const digitMap = {
243
+ "1": 18, "2": 19, "3": 20, "4": 21, "5": 23,
244
+ "6": 22, "7": 26, "8": 28, "9": 25, "0": 29,
245
+ };
246
+ for (const [ch, code] of Object.entries(digitMap)) {
247
+ CHAR_TO_KEY[ch] = { code };
248
+ }
249
+ // Unshifted symbols
250
+ CHAR_TO_KEY["="] = { code: 24 };
251
+ CHAR_TO_KEY["-"] = { code: 27 };
252
+ CHAR_TO_KEY["["] = { code: 33 };
253
+ CHAR_TO_KEY["]"] = { code: 30 };
254
+ CHAR_TO_KEY["\\"] = { code: 42 };
255
+ CHAR_TO_KEY[";"] = { code: 41 };
256
+ CHAR_TO_KEY["'"] = { code: 39 };
257
+ CHAR_TO_KEY[","] = { code: 43 };
258
+ CHAR_TO_KEY["/"] = { code: 44 };
259
+ CHAR_TO_KEY["."] = { code: 47 };
260
+ CHAR_TO_KEY["`"] = { code: 50 };
261
+ CHAR_TO_KEY[" "] = { code: 49 };
262
+ // Shifted symbols
263
+ CHAR_TO_KEY["!"] = { code: 18, shift: true };
264
+ CHAR_TO_KEY["@"] = { code: 19, shift: true };
265
+ CHAR_TO_KEY["#"] = { code: 20, shift: true };
266
+ CHAR_TO_KEY["$"] = { code: 21, shift: true };
267
+ CHAR_TO_KEY["%"] = { code: 23, shift: true };
268
+ CHAR_TO_KEY["^"] = { code: 22, shift: true };
269
+ CHAR_TO_KEY["&"] = { code: 26, shift: true };
270
+ CHAR_TO_KEY["*"] = { code: 28, shift: true };
271
+ CHAR_TO_KEY["("] = { code: 25, shift: true };
272
+ CHAR_TO_KEY[")"] = { code: 29, shift: true };
273
+ CHAR_TO_KEY["_"] = { code: 27, shift: true };
274
+ CHAR_TO_KEY["+"] = { code: 24, shift: true };
275
+ CHAR_TO_KEY["{"] = { code: 33, shift: true };
276
+ CHAR_TO_KEY["}"] = { code: 30, shift: true };
277
+ CHAR_TO_KEY["|"] = { code: 42, shift: true };
278
+ CHAR_TO_KEY[":"] = { code: 41, shift: true };
279
+ CHAR_TO_KEY['"'] = { code: 39, shift: true };
280
+ CHAR_TO_KEY["<"] = { code: 43, shift: true };
281
+ CHAR_TO_KEY[">"] = { code: 47, shift: true };
282
+ CHAR_TO_KEY["?"] = { code: 44, shift: true };
283
+ CHAR_TO_KEY["~"] = { code: 50, shift: true };
284
+ const SHIFT_FLAG = 0x00020000;
285
+ // Partition text into CGEvent-typable runs and fallback runs
286
+ const batches = [];
287
+ let currentFallback = "";
288
+ let currentCG = [];
289
+ const flushCG = () => {
290
+ if (currentCG.length > 0) {
291
+ batches.push({ cgEvent: true, chars: currentCG });
292
+ currentCG = [];
293
+ }
294
+ };
295
+ const flushFallback = () => {
296
+ if (currentFallback.length > 0) {
297
+ batches.push({ cgEvent: false, chars: currentFallback });
298
+ currentFallback = "";
299
+ }
300
+ };
301
+ for (const ch of text) {
302
+ const entry = CHAR_TO_KEY[ch];
303
+ if (entry) {
304
+ flushFallback();
305
+ currentCG.push({ code: entry.code, shift: !!entry.shift });
306
+ }
307
+ else {
308
+ flushCG();
309
+ currentFallback += ch;
310
+ }
311
+ }
312
+ flushCG();
313
+ flushFallback();
314
+ // Process each batch
315
+ for (const batch of batches) {
316
+ if (batch.cgEvent && Array.isArray(batch.chars)) {
317
+ // Build a single JXA script that types all chars in this CGEvent batch
318
+ const keyStatements = batch.chars.map(({ code, shift }) => {
319
+ const flags = shift ? SHIFT_FLAG : 0;
320
+ return `
321
+ kd = $.CGEventCreateKeyboardEvent(null, ${code}, true);
322
+ ku = $.CGEventCreateKeyboardEvent(null, ${code}, false);
323
+ if (${flags}) { $.CGEventSetFlags(kd, ${flags}); $.CGEventSetFlags(ku, ${flags}); }
324
+ $.CGEventPost(0, kd);
325
+ $.CGEventPost(0, ku);
326
+ $.CFRelease(kd);
327
+ $.CFRelease(ku);`;
328
+ }).join("\n");
329
+ await runJXA(`
330
+ ObjC.import('CoreGraphics');
331
+ var kd, ku;
332
+ ${keyStatements}
333
+ `);
334
+ }
335
+ else {
336
+ // Fallback: use osascript keystroke for unsupported chars (emoji, CJK, etc.)
337
+ const escaped = escapeAppleScriptString(batch.chars);
338
+ await execFileAsync("/usr/bin/osascript", [
339
+ "-e", `tell application "System Events" to keystroke "${escaped}"`,
340
+ ], { timeout: 5000 });
341
+ }
342
+ }
343
+ return;
344
+ }
345
+ if (_platform === "linux") {
346
+ await execFileAsync("xdotool", [
347
+ "type", "--delay", String(delay), "--", text,
348
+ ]);
349
+ return;
350
+ }
351
+ throw new Error("typeText not implemented for Windows");
352
+ }
353
+ export async function pressKey(key, modifiers = [], _platform = process.platform) {
354
+ if (isDryRun()) {
355
+ logDryRun("pressKey", { key, modifiers });
356
+ return;
357
+ }
358
+ if (_platform === "darwin") {
359
+ const keyCode = MAC_KEY_CODES[key.toLowerCase()];
360
+ if (keyCode === undefined) {
361
+ throw new Error(`Unknown key: ${key}. Supported keys: ${Object.keys(MAC_KEY_CODES).join(", ")}`);
362
+ }
363
+ // Build modifier flags
364
+ let flags = 0;
365
+ for (const mod of modifiers) {
366
+ const flag = MAC_MODIFIER_FLAGS[mod.toLowerCase()];
367
+ if (flag === undefined) {
368
+ throw new Error(`Unknown modifier: ${mod}. Supported: ${Object.keys(MAC_MODIFIER_FLAGS).join(", ")}`);
369
+ }
370
+ flags |= flag;
371
+ }
372
+ await runJXA(`
373
+ ObjC.import('CoreGraphics');
374
+ var flags = ${flags};
375
+ var keyDown = $.CGEventCreateKeyboardEvent(null, ${keyCode}, true);
376
+ $.CGEventSetFlags(keyDown, flags);
377
+ $.CGEventPost(0, keyDown);
378
+ var keyUp = $.CGEventCreateKeyboardEvent(null, ${keyCode}, false);
379
+ $.CGEventSetFlags(keyUp, flags);
380
+ $.CGEventPost(0, keyUp);
381
+ $.CFRelease(keyDown);
382
+ $.CFRelease(keyUp);
383
+ `);
384
+ return;
385
+ }
386
+ if (_platform === "linux") {
387
+ const keyArg = modifiers.length > 0 ? modifiers.join("+") + "+" + key : key;
388
+ await execFileAsync("xdotool", ["key", keyArg]);
389
+ return;
390
+ }
391
+ throw new Error("pressKey not implemented for Windows");
392
+ }
393
+ export async function pressShortcut(keys, _platform = process.platform) {
394
+ if (isDryRun()) {
395
+ logDryRun("pressShortcut", { keys });
396
+ return;
397
+ }
398
+ if (keys.length < 2) {
399
+ throw new Error("pressShortcut requires at least 2 keys (modifier + key)");
400
+ }
401
+ const modifiers = keys.slice(0, -1);
402
+ const key = keys[keys.length - 1];
403
+ await pressKey(key, modifiers, _platform);
404
+ }
405
+ // ── Cursor position ───────────────────────────────────────────────────────
406
+ export async function getCursorPosition(_platform = process.platform) {
407
+ if (_platform === "darwin") {
408
+ const result = await runJXA(`
409
+ ObjC.import('CoreGraphics');
410
+ var ev = $.CGEventCreate(null);
411
+ var loc = $.CGEventGetLocation(ev);
412
+ $.CFRelease(ev);
413
+ return JSON.stringify({x: loc.x, y: loc.y});
414
+ `);
415
+ return JSON.parse(result);
416
+ }
417
+ if (_platform === "linux") {
418
+ const { stdout } = await execFileAsync("xdotool", ["getmouselocation"]);
419
+ const match = stdout.match(/x:(\d+)\s+y:(\d+)/);
420
+ if (!match)
421
+ throw new Error("Failed to parse cursor position");
422
+ return { x: parseInt(match[1]), y: parseInt(match[2]) };
423
+ }
424
+ throw new Error("getCursorPosition not implemented for Windows");
425
+ }
@@ -0,0 +1,20 @@
1
+ export interface ScreenshotEncodeOptions {
2
+ format?: "png" | "jpeg";
3
+ maxWidth?: number;
4
+ }
5
+ /**
6
+ * Capture the full screen and return a base64-encoded PNG string.
7
+ */
8
+ export declare function captureFullScreen(options?: ScreenshotEncodeOptions): Promise<string>;
9
+ /**
10
+ * Capture a specific window by its ID and return a base64-encoded PNG string.
11
+ *
12
+ * - macOS: windowId is the CGWindowID (use `osascript -e 'tell app "System Events" ...'` or Quartz).
13
+ * - Linux: windowId is the X11 window id (xdotool style).
14
+ * - Windows: windowId is the native HWND (hex or decimal).
15
+ */
16
+ export declare function captureWindow(windowId: number | string, options?: ScreenshotEncodeOptions): Promise<string>;
17
+ /**
18
+ * Capture a specific screen region and return a base64-encoded PNG string.
19
+ */
20
+ export declare function captureRegion(x: number, y: number, width: number, height: number, options?: ScreenshotEncodeOptions): Promise<string>;
@@ -0,0 +1,157 @@
1
+ import { execFile } from "node:child_process";
2
+ import { randomUUID } from "node:crypto";
3
+ import { readFile, unlink } from "node:fs/promises";
4
+ import { tmpdir } from "node:os";
5
+ import { extname, join } from "node:path";
6
+ import { promisify } from "node:util";
7
+ const execFileAsync = promisify(execFile);
8
+ function getPlatform() {
9
+ const p = process.platform;
10
+ if (p === "darwin" || p === "linux" || p === "win32")
11
+ return p;
12
+ throw new Error(`Unsupported platform: ${p}`);
13
+ }
14
+ async function tempImagePath(extension = "png") {
15
+ return join(tmpdir(), `ucu-screenshot-${randomUUID()}.${extension}`);
16
+ }
17
+ async function readAndClean(filePath) {
18
+ const buf = await readFile(filePath);
19
+ await unlink(filePath).catch(() => { });
20
+ return buf.toString("base64");
21
+ }
22
+ async function encodeForClient(filePath, options = {}) {
23
+ const platform = getPlatform();
24
+ const targetFormat = options.format ?? "png";
25
+ const maxWidth = options.maxWidth && options.maxWidth > 0 ? Math.round(options.maxWidth) : undefined;
26
+ if (platform !== "darwin" || (!maxWidth && targetFormat === "png")) {
27
+ return readAndClean(filePath);
28
+ }
29
+ const cleanup = [filePath];
30
+ let currentPath = filePath;
31
+ try {
32
+ if (maxWidth) {
33
+ const resizedPath = await tempImagePath(extname(currentPath).replace(".", "") || "png");
34
+ cleanup.push(resizedPath);
35
+ await execFileAsync("/usr/bin/sips", ["-Z", String(maxWidth), currentPath, "--out", resizedPath], { timeout: 15000 });
36
+ currentPath = resizedPath;
37
+ }
38
+ if (targetFormat === "jpeg") {
39
+ const jpegPath = await tempImagePath("jpg");
40
+ cleanup.push(jpegPath);
41
+ await execFileAsync("/usr/bin/sips", ["-s", "format", "jpeg", currentPath, "--out", jpegPath], { timeout: 15000 });
42
+ currentPath = jpegPath;
43
+ }
44
+ const buf = await readFile(currentPath);
45
+ return buf.toString("base64");
46
+ }
47
+ finally {
48
+ await Promise.all(cleanup.map((path) => unlink(path).catch(() => { })));
49
+ }
50
+ }
51
+ /**
52
+ * Capture the full screen and return a base64-encoded PNG string.
53
+ */
54
+ export async function captureFullScreen(options) {
55
+ const platform = getPlatform();
56
+ const outFile = await tempImagePath("png");
57
+ switch (platform) {
58
+ case "darwin":
59
+ await execFileAsync("screencapture", ["-x", outFile]);
60
+ break;
61
+ case "linux":
62
+ await execFileAsync("scrot", [outFile]);
63
+ break;
64
+ case "win32":
65
+ await execFileAsync("powershell", [
66
+ "-NoProfile",
67
+ "-Command",
68
+ `Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.Screen]::PrimaryScreen | ForEach-Object { $bmp = New-Object System.Drawing.Bitmap($_.Bounds.Width, $_.Bounds.Height); $g = [System.Drawing.Graphics]::FromImage($bmp); $g.CopyFromScreen($_.Bounds.Location, [System.Drawing.Point]::Empty, $_.Bounds.Size); $bmp.Save('${outFile.replace(/'/g, "''").replace(/\\/g, "\\\\")}'); $g.Dispose(); $bmp.Dispose() }`,
69
+ ]);
70
+ break;
71
+ }
72
+ return encodeForClient(outFile, options);
73
+ }
74
+ /**
75
+ * Capture a specific window by its ID and return a base64-encoded PNG string.
76
+ *
77
+ * - macOS: windowId is the CGWindowID (use `osascript -e 'tell app "System Events" ...'` or Quartz).
78
+ * - Linux: windowId is the X11 window id (xdotool style).
79
+ * - Windows: windowId is the native HWND (hex or decimal).
80
+ */
81
+ export async function captureWindow(windowId, options) {
82
+ const platform = getPlatform();
83
+ const outFile = await tempImagePath("png");
84
+ switch (platform) {
85
+ case "darwin":
86
+ // screencapture -l<windowId> captures a specific window
87
+ await execFileAsync("screencapture", ["-x", `-l${windowId}`, outFile]);
88
+ break;
89
+ case "linux": {
90
+ // Use import from xdotool / xwd + convert
91
+ const wid = String(windowId);
92
+ // xwd -> convert to png via ImageMagick
93
+ const xwdFile = outFile.replace(/\.png$/, ".xwd");
94
+ await execFileAsync("xwd", ["-id", wid, "-out", xwdFile]);
95
+ await execFileAsync("convert", [xwdFile, outFile]);
96
+ await unlink(xwdFile).catch(() => { });
97
+ break;
98
+ }
99
+ case "win32":
100
+ // PowerShell: capture a specific window handle
101
+ await execFileAsync("powershell", [
102
+ "-NoProfile",
103
+ "-Command",
104
+ `Add-Type -AssemblyName System.Drawing; Add-Type @'
105
+ using System;
106
+ using System.Runtime.InteropServices;
107
+ using System.Drawing;
108
+ public class WinCapture {
109
+ [DllImport("user32.dll")] public static extern IntPtr GetWindowRect(IntPtr hWnd, out RECT r);
110
+ [StructLayout(LayoutKind.Sequential)] public struct RECT { public int Left, Top, Right, Bottom; }
111
+ public static void CaptureWindow(IntPtr hWnd, string path) {
112
+ RECT r; GetWindowRect(hWnd, out r);
113
+ int w = r.Right - r.Left, h = r.Bottom - r.Top;
114
+ if (w <= 0 || h <= 0) throw new Exception("Invalid window size");
115
+ var bmp = new Bitmap(w, h);
116
+ var g = Graphics.FromImage(bmp);
117
+ g.CopyFromScreen(r.Left, r.Top, 0, 0, new Size(w, h));
118
+ bmp.Save(path);
119
+ g.Dispose(); bmp.Dispose();
120
+ }
121
+ }
122
+ '@; [WinCapture]::CaptureWindow([IntPtr]${windowId}, '${outFile.replace(/'/g, "''").replace(/\\/g, "\\\\")}')`,
123
+ ]);
124
+ break;
125
+ }
126
+ return encodeForClient(outFile, options);
127
+ }
128
+ /**
129
+ * Capture a specific screen region and return a base64-encoded PNG string.
130
+ */
131
+ export async function captureRegion(x, y, width, height, options) {
132
+ const platform = getPlatform();
133
+ const outFile = await tempImagePath("png");
134
+ switch (platform) {
135
+ case "darwin":
136
+ // screencapture -R<x,y,w,h>
137
+ await execFileAsync("screencapture", ["-x", `-R${x},${y},${width},${height}`, outFile]);
138
+ break;
139
+ case "linux":
140
+ // scrot with --select for region, but that is interactive.
141
+ // Use import: +crop instead
142
+ await execFileAsync("import", [
143
+ "-window", "root",
144
+ "-crop", `${width}x${height}+${x}+${y}`,
145
+ outFile,
146
+ ]);
147
+ break;
148
+ case "win32":
149
+ await execFileAsync("powershell", [
150
+ "-NoProfile",
151
+ "-Command",
152
+ `Add-Type -AssemblyName System.Drawing; $bmp = New-Object System.Drawing.Bitmap(${width}, ${height}); $g = [System.Drawing.Graphics]::FromImage($bmp); $g.CopyFromScreen(${x}, ${y}, 0, 0, [System.Drawing.Size]::new(${width}, ${height})); $bmp.Save('${outFile.replace(/'/g, "''").replace(/\\/g, "\\\\")}'); $g.Dispose(); $bmp.Dispose()`,
153
+ ]);
154
+ break;
155
+ }
156
+ return encodeForClient(outFile, options);
157
+ }
package/package.json ADDED
@@ -0,0 +1,50 @@
1
+ {
2
+ "name": "ucu-mcp",
3
+ "version": "0.1.0",
4
+ "description": "MCP server for Universal Computer Use \u2014 desktop automation for AI agents via Model Context Protocol",
5
+ "type": "module",
6
+ "bin": {
7
+ "ucu-mcp": "./dist/bin/ucu-mcp.js"
8
+ },
9
+ "files": [
10
+ "dist/bin/",
11
+ "dist/src/",
12
+ "dist/index.js",
13
+ "dist/index.d.ts",
14
+ "README.md",
15
+ "CHANGELOG.md"
16
+ ],
17
+ "main": "./dist/index.js",
18
+ "types": "./dist/index.d.ts",
19
+ "scripts": {
20
+ "build": "tsc",
21
+ "start": "node dist/bin/ucu-mcp.js",
22
+ "dev": "tsx bin/ucu-mcp.ts",
23
+ "test": "vitest run",
24
+ "test:watch": "vitest",
25
+ "test:integration": "vitest run tests/integration/",
26
+ "test:macos-gui": "UCU_MACOS_GUI_SMOKE=1 vitest run tests/integration/macos-gui-smoke.test.ts"
27
+ },
28
+ "keywords": [
29
+ "mcp",
30
+ "computer-use",
31
+ "desktop-automation",
32
+ "macos",
33
+ "accessibility",
34
+ "ai-agent"
35
+ ],
36
+ "repository": {
37
+ "type": "git",
38
+ "url": "git+https://github.com/2876674942/ucu-mcp-backup.git"
39
+ },
40
+ "license": "MIT",
41
+ "dependencies": {
42
+ "@modelcontextprotocol/sdk": "^1.12.1"
43
+ },
44
+ "devDependencies": {
45
+ "@types/node": "^22.15.21",
46
+ "tsx": "^4.19.4",
47
+ "typescript": "^5.8.3",
48
+ "vitest": "^4.1.7"
49
+ }
50
+ }