ucu-mcp 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,9 @@ export async function clickElement(elementId, app) {
22
22
  ${jxaElementActionHelpers()}
23
23
  var elemPath = ${elementIdLiteral};
24
24
  var appName = ${appLiteral};
25
+ // 容忍大小写/空格/连字符/下划线变体(cc-switch vs CC Switch vs cc_switch)
26
+ var _norm = function(s) { return String(s||'').toLowerCase().split(' ').join('').split('-').join('').split('_').join(''); };
27
+ var appNorm = _norm(appName);
25
28
  var cached = ${cachedJson};
26
29
 
27
30
  var elem = resolveElementInApp(elemPath, appName) || resolveElementByFullPath(elemPath);
@@ -88,6 +91,9 @@ export async function typeInElement(elementId, text, app, clearFirst) {
88
91
  ${jxaElementActionHelpers()}
89
92
  var elemPath = ${elementIdLiteral};
90
93
  var appName = ${appLiteral};
94
+ // 容忍大小写/空格/连字符/下划线变体(cc-switch vs CC Switch vs cc_switch)
95
+ var _norm = function(s) { return String(s||'').toLowerCase().split(' ').join('').split('-').join('').split('_').join(''); };
96
+ var appNorm = _norm(appName);
91
97
  var textToType = ${textLiteral};
92
98
  var shouldClear = ${clearFirst ? "true" : "false"};
93
99
  var cached = ${cachedJson};
@@ -167,6 +173,9 @@ export async function setElementValue(elementId, value, app) {
167
173
  ${jxaElementActionHelpers()}
168
174
  var elemPath = ${elementIdLiteral};
169
175
  var appName = ${appLiteral};
176
+ // 容忍大小写/空格/连字符/下划线变体(cc-switch vs CC Switch vs cc_switch)
177
+ var _norm = function(s) { return String(s||'').toLowerCase().split(' ').join('').split('-').join('').split('_').join(''); };
178
+ var appNorm = _norm(appName);
170
179
  var valueToSet = ${valueLiteral};
171
180
  var cached = ${cachedJson};
172
181
 
@@ -210,3 +219,249 @@ export async function setElementValue(elementId, value, app) {
210
219
  rethrowElementActionError(error, "set_value", elementId);
211
220
  }
212
221
  }
222
+ export async function findMenuBarExtra(app) {
223
+ const appLiteral = JSON.stringify(app);
224
+ // 两阶段 JXA:先遍历 app 自身 menuBarItems(host:"self"),若为空或只有 Apple 菜单,
225
+ // 再遍历 SystemUIServer.menuBarItems 找第三方托盘 status item(host:"systemuiserver")。
226
+ // 纯 LSUIElement 托盘应用的 status item 由 SystemUIServer 进程托管,不在 app 自身进程里。
227
+ const jxaScript = `
228
+ var se = Application('System Events');
229
+ var _result = null;
230
+ var appName = ${appLiteral};
231
+ // 容忍大小写/空格/连字符/下划线变体(cc-switch vs CC Switch vs cc_switch)
232
+ var _norm = function(s) { return String(s||'').toLowerCase().split(' ').join('').split('-').join('').split('_').join(''); };
233
+ var appNorm = _norm(appName);
234
+
235
+ // 双向 includes 容忍 "CC Switch" vs "cc-switch" 之类长度差异
236
+ var _matchApp = function(itemNorm) {
237
+ if (!itemNorm) return false;
238
+ return itemNorm === appNorm || itemNorm.indexOf(appNorm) !== -1 || appNorm.indexOf(itemNorm) !== -1;
239
+ };
240
+
241
+ var _readItem = function(item, mb, i, host) {
242
+ var desc = '', nm = '';
243
+ try { desc = item.description(); } catch(e) {}
244
+ try { nm = item.name(); } catch(e) {}
245
+ var pos = [0,0], sz = [0,0];
246
+ try { pos = item.position(); } catch(e) {}
247
+ try { sz = item.size(); } catch(e) {}
248
+ if (sz[0] === 0 && sz[1] === 0) return null;
249
+ return {menuBar: mb, index: i, name: nm, description: desc, x: pos[0], y: pos[1], width: sz[0], height: sz[1], host: host};
250
+ };
251
+
252
+ try {
253
+ var procs = se.processes;
254
+ var p = null;
255
+ for (var k = 0; k < procs.length; k++) {
256
+ var pn = '';
257
+ try { pn = procs[k].name(); } catch(e) {}
258
+ if (_norm(pn) === appNorm) { p = procs[k]; break; }
259
+ }
260
+
261
+ var items = [];
262
+
263
+ // 阶段 1:app 自身 menuBarItems(host:"self")
264
+ if (p) {
265
+ try {
266
+ var menuBars = p.menuBars();
267
+ for (var mb = 0; mb < menuBars.length; mb++) {
268
+ var mbItems;
269
+ try { mbItems = menuBars[mb].menuBarItems(); } catch(e) { continue; }
270
+ for (var i = 0; i < mbItems.length; i++) {
271
+ var rec = _readItem(mbItems[i], mb, i, "self");
272
+ if (rec) items.push(rec);
273
+ }
274
+ }
275
+ } catch(e) {}
276
+ }
277
+
278
+ // 是否需要阶段 2:app 自身无 item,或仅含 Apple 菜单(index 0 的 app-name 项)
279
+ var hasNonApple = false;
280
+ for (var j = 0; j < items.length; j++) {
281
+ if (_norm(items[j].name) !== "apple") { hasNonApple = true; break; }
282
+ }
283
+
284
+ // 阶段 2:SystemUIServer 托管的第三方托盘 status item(host:"systemuiserver")
285
+ // 仅当 app 自身没有可点击的非 Apple 项时才查 SystemUIServer,避免对有窗口的应用产生噪音。
286
+ if (!hasNonApple) {
287
+ try {
288
+ var suiProcs = se.processes.byName("SystemUIServer");
289
+ if (suiProcs) {
290
+ var suiBars = suiProcs.menuBars();
291
+ for (var smb = 0; smb < suiBars.length; smb++) {
292
+ var suiItems;
293
+ try { suiItems = suiBars[smb].menuBarItems(); } catch(e) { continue; }
294
+ for (var si = 0; si < suiItems.length; si++) {
295
+ var sItem = suiItems[si];
296
+ var sDesc = '', sNm = '';
297
+ try { sDesc = sItem.description(); } catch(e) {}
298
+ try { sNm = sItem.name(); } catch(e) {}
299
+ // 按 description/name 匹配目标 app(status item 的 description 通常是 app 名)
300
+ if (_matchApp(_norm(sDesc)) || _matchApp(_norm(sNm))) {
301
+ var sRec = _readItem(sItem, smb, si, "systemuiserver");
302
+ if (sRec) {
303
+ // 保留匹配信号,供 click 二次定位
304
+ sRec.name = sNm; sRec.description = sDesc;
305
+ items.push(sRec);
306
+ }
307
+ }
308
+ }
309
+ }
310
+ }
311
+ } catch(e) {
312
+ // SystemUIServer 不可达(罕见),忽略,继续返回阶段 1 结果
313
+ }
314
+ }
315
+
316
+ if (!p && items.length === 0) {
317
+ _result = {error: "process not found: " + appName, items: []};
318
+ } else {
319
+ _result = {items: items};
320
+ }
321
+ } catch(e) {
322
+ _result = {error: "menu bar AX read failed: " + String(e.message || e), items: []};
323
+ }
324
+ JSON.stringify(_result);
325
+ `;
326
+ let out;
327
+ try {
328
+ out = execFileSync("osascript", ["-l", "JavaScript", "-e", jxaScript], { encoding: "utf-8", timeout: 15000 }).trim();
329
+ }
330
+ catch (error) {
331
+ rethrowElementActionError(error, "find_menu_bar_extra", app);
332
+ return []; // unreachable — rethrow throws
333
+ }
334
+ const parsed = JSON.parse(out);
335
+ if (parsed.error) {
336
+ throw new Error(parsed.error);
337
+ }
338
+ return parsed.items;
339
+ }
340
+ export function matchMenuBarExtra(items, selector) {
341
+ if (items.length === 0)
342
+ return undefined;
343
+ let filtered = items;
344
+ if (selector.description) {
345
+ const d = selector.description.toLowerCase();
346
+ filtered = filtered.filter((it) => (it.description || "").toLowerCase().includes(d) || (it.name || "").toLowerCase().includes(d));
347
+ }
348
+ else if (selector.name) {
349
+ const n = selector.name.toLowerCase();
350
+ filtered = filtered.filter((it) => (it.name || "").toLowerCase().includes(n) || (it.description || "").toLowerCase().includes(n));
351
+ }
352
+ else if (selector.index === undefined) {
353
+ // 无 selector 时排除 Apple 菜单(macOS 每个 app 的 index 0 app-name 项),
354
+ // 否则 click_menu_bar_extra(app) 不带 selector 会误点 Apple 菜单。
355
+ // 注意:SystemUIServer 托管的第三方托盘 item 不应被此过滤误删——它们的 name 通常非 "apple"。
356
+ filtered = filtered.filter((it) => (it.name || "").toLowerCase() !== "apple");
357
+ }
358
+ if (selector.index !== undefined) {
359
+ return filtered[selector.index];
360
+ }
361
+ return filtered[0];
362
+ }
363
+ export async function clickMenuBarExtra(app, selector = {}) {
364
+ const items = await this.findMenuBarExtra(app);
365
+ const target = matchMenuBarExtra(items, selector);
366
+ if (!target) {
367
+ throw new ElementNotFoundError(`menu bar extra not found in ${app} (selector: ${JSON.stringify(selector)}; ${items.length} items scanned)`);
368
+ }
369
+ const appLiteral = JSON.stringify(app);
370
+ const mb = target.menuBar;
371
+ const idx = target.index;
372
+ const host = target.host;
373
+ const tgtNameLiteral = JSON.stringify(target.name || "");
374
+ const tgtDescLiteral = JSON.stringify(target.description || "");
375
+ // AXPress,失败则坐标点击中心(与 clickElement 同模式,应对 Tauri 等静默吞)
376
+ // host==="systemuiserver" 时在 SystemUIServer 进程上重定位(托盘 status item 由它托管),
377
+ // SystemUIServer.menuBarItems 顺序不稳定,用保存的 name/description 二次匹配定位具体 item。
378
+ // host==="self" 时按 app 进程的 menuBars()[mb].menuBarItems()[idx] 重定位(稳定)。
379
+ const resolveItemBlock = host === "systemuiserver"
380
+ ? `// SystemUIServer 托管的第三方托盘:按 name/description 二次匹配(顺序不稳定)
381
+ var suiProc = null;
382
+ try { suiProc = se.processes.byName("SystemUIServer"); } catch(e) {}
383
+ var item = null;
384
+ if (suiProc) {
385
+ var suiBars = suiProc.menuBars();
386
+ outer: for (var b = 0; b < suiBars.length; b++) {
387
+ var suiItems;
388
+ try { suiItems = suiBars[b].menuBarItems(); } catch(e) { continue; }
389
+ for (var ii = 0; ii < suiItems.length; ii++) {
390
+ var it = suiItems[ii];
391
+ var iDesc = '', iNm = '';
392
+ try { iDesc = it.description(); } catch(e) {}
393
+ try { iNm = it.name(); } catch(e) {}
394
+ if (_matchApp(_norm(iDesc)) || _matchApp(_norm(iNm))
395
+ || _norm(iDesc) === tgtDescNorm || _norm(iNm) === tgtNameNorm) {
396
+ item = it; break outer;
397
+ }
398
+ }
399
+ }
400
+ }
401
+ if (!item) { _result = {success: false, error: "SystemUIServer status item not found for " + appName}; }`
402
+ : `// app 自身 menu bar:按 menuBar/index 重定位(稳定)
403
+ if (!p) {
404
+ _result = {success: false, error: "process not found: " + appName};
405
+ } else {
406
+ var item = p.menuBars()[${mb}].menuBarItems()[${idx}];
407
+ }`;
408
+ const jxaScript = `
409
+ var se = Application('System Events');
410
+ var _result = null;
411
+ var appName = ${appLiteral};
412
+ var tgtName = ${tgtNameLiteral};
413
+ var tgtDesc = ${tgtDescLiteral};
414
+ // 容忍大小写/空格/连字符/下划线变体(cc-switch vs CC Switch vs cc_switch)
415
+ var _norm = function(s) { return String(s||'').toLowerCase().split(' ').join('').split('-').join('').split('_').join(''); };
416
+ var appNorm = _norm(appName);
417
+ var tgtNameNorm = _norm(tgtName);
418
+ var tgtDescNorm = _norm(tgtDesc);
419
+ var _matchApp = function(itemNorm) {
420
+ if (!itemNorm) return false;
421
+ return itemNorm === appNorm || itemNorm.indexOf(appNorm) !== -1 || appNorm.indexOf(itemNorm) !== -1;
422
+ };
423
+ try {
424
+ var procs = se.processes;
425
+ var p = null;
426
+ for (var k = 0; k < procs.length; k++) {
427
+ var pn = '';
428
+ try { pn = procs[k].name(); } catch(e) {}
429
+ if (_norm(pn) === appNorm) { p = procs[k]; break; }
430
+ }
431
+ ${resolveItemBlock}
432
+ if (!_result && item) {
433
+ try {
434
+ item.actions.AXPress.perform();
435
+ _result = {success: true, method: "axpress"};
436
+ } catch(e) {
437
+ var pos = item.position();
438
+ var sz = item.size();
439
+ var cx = pos[0] + sz[0] / 2;
440
+ var cy = pos[1] + sz[1] / 2;
441
+ ObjC.import('CoreGraphics');
442
+ var pt = $.CGPointMake(cx, cy);
443
+ var down = $.CGEventCreateMouseEvent(null, $.kCGEventLeftMouseDown, pt, $.kCGMouseButtonLeft);
444
+ $.CGEventPost($.kCGHIDEventTap, down);
445
+ var up = $.CGEventCreateMouseEvent(null, $.kCGEventLeftMouseUp, pt, $.kCGMouseButtonLeft);
446
+ $.CGEventPost($.kCGHIDEventTap, up);
447
+ _result = {success: true, method: "coordinate", x: cx, y: cy};
448
+ }
449
+ }
450
+ } catch(e) {
451
+ _result = {success: false, error: String(e.message || e)};
452
+ }
453
+ JSON.stringify(_result);
454
+ `;
455
+ let out;
456
+ try {
457
+ out = execFileSync("osascript", ["-l", "JavaScript", "-e", jxaScript], { encoding: "utf-8", timeout: 15000 }).trim();
458
+ }
459
+ catch (error) {
460
+ rethrowElementActionError(error, "click_menu_bar_extra", app);
461
+ return; // unreachable
462
+ }
463
+ const result = JSON.parse(out);
464
+ if (!result.success) {
465
+ throw new Error(`click_menu_bar_extra failed in ${app}: ${result.error}`);
466
+ }
467
+ }
@@ -74,7 +74,7 @@ export async function ocr(display, region) {
74
74
  const nativeResult = await ocrNative(tmpPath, scaleFactor, region);
75
75
  if (nativeResult)
76
76
  return nativeResult;
77
- return await ocrJxa(tmpPath, screenSize, scaleFactor, region, buf);
77
+ return await ocrJxa(tmpPath, screenSize, scaleFactor, region);
78
78
  }
79
79
  finally {
80
80
  await unlink(tmpPath).catch(() => { });
@@ -83,6 +83,9 @@ export async function ocr(display, region) {
83
83
  async function ocrNative(tmpPath, scaleFactor, region) {
84
84
  const screenDirname = dirname(fileURLToPath(import.meta.url));
85
85
  const candidates = [
86
+ // npm prod: screen.js 编译落在 dist/src/platform/macos/(4 级深),到包根需 4 级 ../
87
+ join(screenDirname, "..", "..", "..", "..", "native", "ocr", "ocr-helper"),
88
+ // dev: screen.ts 在 src/platform/macos/(3 级深),3 级到包根
86
89
  join(screenDirname, "..", "..", "..", "native", "ocr", "ocr-helper"),
87
90
  join(screenDirname, "..", "..", "native", "ocr", "ocr-helper"),
88
91
  join(process.cwd(), "native", "ocr", "ocr-helper"),
@@ -120,45 +123,47 @@ async function ocrNative(tmpPath, scaleFactor, region) {
120
123
  return null;
121
124
  }
122
125
  }
123
- async function ocrJxa(tmpPath, screenSize, scaleFactor, region, buf) {
126
+ async function ocrJxa(tmpPath, screenSize, scaleFactor, region) {
124
127
  const pathLiteral = JSON.stringify(tmpPath);
125
128
  const jxaScript = `
126
129
  function run() {
127
130
  ObjC.import('Vision');
128
131
  ObjC.import('AppKit');
129
132
  ObjC.import('Foundation');
130
- var app = Application.currentApplication();
131
- app.includeStandardAdditions = true;
132
133
  var path = ${pathLiteral};
133
- var url = $.NSURL.fileURLWithPath(path);
134
- var image = $.NSImage.alloc.initWithContentsOfURL(url);
135
- if (!image || !image.isValid) {
134
+ var fm = $.NSFileManager.defaultManager;
135
+ if (!fm.fileExistsAtPath(path)) {
136
136
  return JSON.stringify({error: "Failed to load screenshot image", elements: [], fullText: ""});
137
137
  }
138
- var cgImage = image.CGImageForProposedRectContextHints(null, null, null);
139
- if (!cgImage) {
138
+ var url = $.NSURL.fileURLWithPath(path);
139
+ var handler = $.VNImageRequestHandler.alloc.initWithURLOptions(url, $());
140
+ if (!handler) {
140
141
  return JSON.stringify({error: "Failed to get CGImage from screenshot", elements: [], fullText: ""});
141
142
  }
142
143
  var request = $.VNRecognizeTextRequest.alloc.init;
143
144
  request.recognitionLevel = $.VNRequestTextRecognitionLevelAccurate;
144
145
  request.usesLanguageCorrection = true;
145
- var handler = $.VNImageRequestHandler.alloc.initWithCGImageOptions(cgImage, null);
146
- var performError = $();
147
- var success = handler.performRequestsError([request], performError);
146
+ var performError = Ref();
147
+ var success = handler.performRequestsError($([request]), performError);
148
148
  if (!success) {
149
149
  return JSON.stringify({error: "OCR request failed", elements: [], fullText: ""});
150
150
  }
151
151
  var results = request.results;
152
+ var image = $.NSImage.alloc.initWithContentsOfURL(url);
153
+ if (!image || $(image.representations()).count === 0) {
154
+ return JSON.stringify({error: "Failed to load screenshot image", elements: [], fullText: ""});
155
+ }
156
+ var rep = image.representations().objectAtIndex(0);
157
+ var imgWidth = rep.pixelsWide;
158
+ var imgHeight = rep.pixelsHigh;
152
159
  var elements = [];
153
160
  var fullTextParts = [];
154
- var imgWidth = cgImage.width;
155
- var imgHeight = cgImage.height;
156
161
  for (var i = 0; i < results.count; i++) {
157
162
  var obs = $(results).objectAtIndex(i);
158
163
  var candidates = obs.topCandidates(1);
159
- if (candidates && candidates.count > 0) {
164
+ if (candidates && $(candidates).count > 0) {
160
165
  var candidate = $(candidates).objectAtIndex(0);
161
- var text = candidate.string.toString();
166
+ var text = ObjC.unwrap(candidate.string);
162
167
  var confidence = candidate.confidence;
163
168
  var bbox = obs.boundingBox;
164
169
  var bx = bbox.origin.x * imgWidth;
@@ -183,8 +188,6 @@ async function ocrJxa(tmpPath, screenSize, scaleFactor, region, buf) {
183
188
  : "";
184
189
  throw new CaptureError(`ocr failed: ${parsed.error}${hint}`);
185
190
  }
186
- const imgWidth = buf.readUInt32BE(16);
187
- const scaleFactorX = screenSize.width / (region ? region.width : (imgWidth / scaleFactor));
188
191
  const elements = parsed.elements.map((el) => ({
189
192
  text: el.text,
190
193
  x: Math.round(el.x / scaleFactor) + (region ? region.x : 0),
@@ -53,6 +53,25 @@ export async function focusApp(app) {
53
53
  await new Promise((resolve) => setTimeout(resolve, 150));
54
54
  } while (Date.now() < deadline);
55
55
  if (!target) {
56
+ // 托盘应用(LSUIElement,如 cc-switch)没有常规窗口——回退找菜单栏 status item,
57
+ // 建立 tray activeTarget(windowId='tray',validateActiveTarget 对它特判不查窗口)。
58
+ try {
59
+ const extras = await this.findMenuBarExtra(app);
60
+ if (extras.length > 0) {
61
+ this.activeTarget = {
62
+ targetId: randomUUID(),
63
+ appName: app,
64
+ pid: 0,
65
+ windowId: "tray",
66
+ title: "",
67
+ capturedAt: new Date().toISOString(),
68
+ };
69
+ return this.activeTarget;
70
+ }
71
+ }
72
+ catch {
73
+ // findMenuBarExtra 失败(AX 权限/进程不可达等),落到下面的 WindowNotFoundError
74
+ }
56
75
  this.activeTarget = undefined;
57
76
  const err = new WindowNotFoundError(app, { hint: "list_windows returned no match for this app. If the app is running, " +
58
77
  "the most likely cause is that it is an Electron app whose AX tree is " +
@@ -194,6 +213,9 @@ function resolveNativeHelper(folder, binary) {
194
213
  return override === null ? null : override;
195
214
  }
196
215
  const candidates = [
216
+ // npm prod: window.js 在 dist/src/platform/macos/(4 级深),到包根需 4 级 ../
217
+ join(__windowDirname, "..", "..", "..", "..", "native", folder, binary),
218
+ // dev: window.ts 在 src/platform/macos/(3 级深),3 级到包根
197
219
  join(__windowDirname, "..", "..", "..", "native", folder, binary),
198
220
  join(__windowDirname, "..", "..", "native", folder, binary),
199
221
  ];
@@ -12,6 +12,8 @@
12
12
  const DEFAULT_BLOCKED_KEYS = [
13
13
  // macOS – app-level
14
14
  "cmd+q",
15
+ "cmd+shift+q", // log out(方向2 后字母 q 可解析,须显式拦截)
16
+ "cmd+option+q", // log out variant
15
17
  "cmd+w",
16
18
  "cmd+l", // lock screen
17
19
  // macOS – system-level
@@ -74,12 +76,22 @@ const DEFAULT_TEXT_INJECTION_PATTERNS = [
74
76
  // ---------------------------------------------------------------------------
75
77
  // Helpers
76
78
  // ---------------------------------------------------------------------------
77
- /** Normalize a shortcut string to lowercase, trimmed, sorted modifiers. */
79
+ /** Modifier alias canonical name. MAC_MODIFIER_FLAGS accepts both forms
80
+ * (cmd/command, option/alt, control/ctrl); normalize them so a blocklist
81
+ * entry like "cmd+option+esc" also catches "cmd+alt+esc". */
82
+ const MODIFIER_CANONICAL = {
83
+ alt: "option", ctrl: "control", cmd: "command",
84
+ };
85
+ /** Normalize a shortcut string to lowercase, trimmed, sorted modifiers with
86
+ * modifier aliases canonicalized (alt→option, ctrl→control, cmd→command). */
78
87
  function normalizeShortcut(raw) {
79
88
  return raw
80
89
  .toLowerCase()
81
90
  .split("+")
82
- .map((s) => s.trim())
91
+ .map((s) => {
92
+ const t = s.trim();
93
+ return MODIFIER_CANONICAL[t] ?? t;
94
+ })
83
95
  .sort()
84
96
  .join("+");
85
97
  }
@@ -104,6 +116,8 @@ export const OBSERVE_ACTIONS = new Set([
104
116
  // and an AX window lookup — it does not synthesize mouse or keyboard input,
105
117
  // so the user-activity pause must not block it. (OpenCode 0.3.7 follow-up)
106
118
  "focus_app",
119
+ // describe_screen reads screen state (OCR + AX), no input synthesis.
120
+ "describe_screen",
107
121
  ]);
108
122
  /** Actions that synthesize user input — need full user-activity protection. */
109
123
  export const INPUT_ACTIONS = new Set([
@@ -117,6 +131,7 @@ export const INPUT_ACTIONS = new Set([
117
131
  "click_element",
118
132
  "type_in_element",
119
133
  "set_value",
134
+ "click_menu_bar_extra",
120
135
  "clipboard_write",
121
136
  ]);
122
137
  export function classifyAction(action) {
@@ -80,6 +80,20 @@ const MAC_MODIFIER_FLAGS = {
80
80
  option: 0x00080000, alt: 0x00080000,
81
81
  control: 0x00040000, ctrl: 0x00040000,
82
82
  };
83
+ // 字母/数字 keyCode —— typeText 与 pressKey 共享的唯一数据源。
84
+ // pressKey 在 MAC_KEY_CODES(特殊键)未命中时回退查这两个 map,让 Cmd+M / Cmd+W 等
85
+ // 含字母的快捷键可用。注意 'a' 的 keyCode 是 0,查找时必须用 `in` 判定存在性,
86
+ // 不能用 truthy(否则 0 会被当成未命中而穿透到 digit map)。
87
+ const MAC_LETTER_KEY_CODES = {
88
+ a: 0, s: 1, d: 2, f: 3, h: 4, g: 5, z: 6, x: 7, c: 8, v: 9,
89
+ b: 11, q: 12, w: 13, e: 14, r: 15, y: 16, t: 17,
90
+ o: 31, u: 32, i: 33, p: 34, l: 37, j: 38, k: 40,
91
+ n: 45, m: 46,
92
+ };
93
+ const MAC_DIGIT_KEY_CODES = {
94
+ "1": 18, "2": 19, "3": 20, "4": 21, "5": 23,
95
+ "6": 22, "7": 26, "8": 28, "9": 25, "0": 29,
96
+ };
83
97
  // ── AppleScript string escaping ───────────────────────────────────────────
84
98
  function escapeAppleScriptString(str) {
85
99
  return str
@@ -288,23 +302,11 @@ export async function typeText(text, delay = 20, _platform = process.platform) {
288
302
  if (_platform === "darwin") {
289
303
  // Character -> { keyCode, shift? } map for CGEvent injection
290
304
  const CHAR_TO_KEY = {};
291
- // Lowercase letters
292
- const letterMap = {
293
- a: 0, s: 1, d: 2, f: 3, h: 4, g: 5, z: 6, x: 7, c: 8, v: 9,
294
- b: 11, q: 12, w: 13, e: 14, r: 15, y: 16, t: 17,
295
- o: 31, u: 32, i: 33, p: 34, l: 37, j: 38, k: 40,
296
- n: 45, m: 46,
297
- };
298
- for (const [ch, code] of Object.entries(letterMap)) {
305
+ for (const [ch, code] of Object.entries(MAC_LETTER_KEY_CODES)) {
299
306
  CHAR_TO_KEY[ch] = { code };
300
307
  CHAR_TO_KEY[ch.toUpperCase()] = { code, shift: true };
301
308
  }
302
- // Digits
303
- const digitMap = {
304
- "1": 18, "2": 19, "3": 20, "4": 21, "5": 23,
305
- "6": 22, "7": 26, "8": 28, "9": 25, "0": 29,
306
- };
307
- for (const [ch, code] of Object.entries(digitMap)) {
309
+ for (const [ch, code] of Object.entries(MAC_DIGIT_KEY_CODES)) {
308
310
  CHAR_TO_KEY[ch] = { code };
309
311
  }
310
312
  // Unshifted symbols
@@ -422,9 +424,15 @@ export async function pressKey(key, modifiers = [], _platform = process.platform
422
424
  return;
423
425
  }
424
426
  if (_platform === "darwin") {
425
- const keyCode = MAC_KEY_CODES[key.toLowerCase()];
427
+ const lookup = key.toLowerCase();
428
+ // 先查特殊键,未命中再回退查字母/数字(让 Cmd+M / Cmd+W 等含字母的快捷键可用)。
429
+ // 用 `in` 判定存在性——'a' 的 keyCode 是 0,truthy 判断会误穿透到 digit map。
430
+ const keyCode = lookup in MAC_KEY_CODES ? MAC_KEY_CODES[lookup] :
431
+ (key.length === 1 && lookup in MAC_LETTER_KEY_CODES) ? MAC_LETTER_KEY_CODES[lookup] :
432
+ (key.length === 1 && key in MAC_DIGIT_KEY_CODES) ? MAC_DIGIT_KEY_CODES[key] :
433
+ undefined;
426
434
  if (keyCode === undefined) {
427
- throw new Error(`Unknown key: ${key}. Supported keys: ${Object.keys(MAC_KEY_CODES).join(", ")}`);
435
+ throw new Error(`Unknown key: ${key}. Supported keys: special keys (${Object.keys(MAC_KEY_CODES).join(", ")}), single letters a-z, single digits 0-9`);
428
436
  }
429
437
  // Build modifier flags
430
438
  let flags = 0;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ucu-mcp",
3
- "version": "0.4.3",
3
+ "version": "0.5.0",
4
4
  "description": "MCP server for Universal Computer Use — desktop automation for AI agents via Model Context Protocol",
5
5
  "type": "module",
6
6
  "bin": {
@@ -12,6 +12,7 @@
12
12
  "dist/src/",
13
13
  "dist/index.js",
14
14
  "dist/index.d.ts",
15
+ "skills/",
15
16
  "README.md",
16
17
  "CHANGELOG.md"
17
18
  ],
@@ -0,0 +1,100 @@
1
+ ---
2
+ name: ucu-mcp
3
+ description: >-
4
+ Guidance for using UCU-MCP, the macOS computer-use MCP server (screenshot,
5
+ click, type, OCR, AX element tools, menu-bar tray support). Use when an
6
+ agent needs to automate macOS desktop apps over MCP — establishing target
7
+ context, reading screen state, interacting with UI elements, operating
8
+ menu-bar/tray apps, or recovering from AX/permission errors. Covers Claude
9
+ Code CLI/Desktop, Codex, OpenCode, and other MCP clients.
10
+ ---
11
+
12
+ # UCU-MCP
13
+
14
+ UCU-MCP is a cross-client computer-use MCP server for macOS (Windows/Linux are
15
+ explicit stubs). It exposes 26 tools that let an agent see the screen and drive
16
+ native apps through a combination of Accessibility (AX) APIs, CGEvent input
17
+ synthesis, Vision OCR, and ScreenCaptureKit screenshots.
18
+
19
+ - npm package: `ucu-mcp`
20
+ - Run: `npx -y ucu-mcp` (stdio MCP server) or install globally via `npm i -g ucu-mcp`
21
+
22
+ ## Core Workflow
23
+
24
+ 1. **Check readiness** → `doctor` verifies Accessibility + Screen Recording
25
+ permissions and native helpers. If anything is missing, follow its guidance
26
+ (see [troubleshooting](references/troubleshooting.md)).
27
+ 2. **Establish target context** → `list_apps` then `focus_app(name)` sets the
28
+ active window target. Subsequent AX tools operate against that target.
29
+ 3. **Prefer AX over coordinates** → `find_element(text/role/value)` →
30
+ `click_element` / `type_in_element` / `set_value`. AX is precise and survives
31
+ layout shifts; coordinates are a last resort.
32
+ 4. **When AX is opaque (Electron/Tauri/WebView)** → `screenshot` + `ocr` to
33
+ locate text by bounding box, then `click(x, y)` at the returned coordinates.
34
+ 5. **When image content is not visible to you** (relayed/downgraded to a URL) →
35
+ `screenshot(describe: true)` or the standalone `describe_screen` tool to get a
36
+ structured text view (OCR blocks + AX tree + foreground window).
37
+ 6. **Menu-bar/tray apps** (e.g. cc-switch) → `click_menu_bar_extra(app,
38
+ description/name/index)` opens the tray menu, then `find_element` inside it.
39
+ 7. **Verify actions** → pass `captureAfter: true` on action tools, or call
40
+ `screenshot` / `get_window_state` afterwards.
41
+ 8. **Recover from errors** → every error response carries a `hint` with the
42
+ next step. See the [error code table](references/troubleshooting.md).
43
+
44
+ Full tool inventory with parameters: [tool-reference](references/tool-reference.md).
45
+ Common task playbooks: [workflows](references/workflows.md).
46
+
47
+ ## Operating Rules
48
+
49
+ - **AX-first.** Use `find_element` → `click_element` / `type_in_element` /
50
+ `set_value` whenever the AX tree exposes the target. Fall back to coordinates
51
+ only when AX returns nothing (Electron/WebView) or the control silently
52
+ swallows AX actions.
53
+ - **Observe before acting.** Call `screenshot` / `get_window_state` /
54
+ `describe_screen` before destructive or hard-to-reverse actions so you act on
55
+ current state, not assumptions.
56
+ - **TARGET_STALE is recoverable.** Re-run `focus_app` for the target app, then
57
+ retry — the element cache refetches equivalent AX nodes.
58
+ - **Tray apps need `click_menu_bar_extra`.** `focus_app` alone cannot reach
59
+ pure menu-bar (LSUIElement) apps; their status item is hosted by
60
+ `SystemUIServer` and is not in any app window's AX tree.
61
+ - **Dangerous actions are blocked.** Quit/logout/lock shortcuts (`cmd+q`,
62
+ `cmd+shift+q`, `cmd+l`, …), sensitive-window URLs, and suspicious injected
63
+ text are rejected by the safety guard. Choose a safer action or ask the user.
64
+ - **Sensitive fields are masked in `describe_screen`.** Password fields
65
+ (`AXSecureTextField`, or names matching `/password|secret|token/i`) appear as
66
+ `[REDACTED]` — never try to read or exfiltrate them.
67
+ - **macOS is locked → actions blocked.** The server refuses to synthesize input
68
+ while the screen is locked; wait for unlock or ask the user.
69
+
70
+ ## MCP Config
71
+
72
+ Add UCU-MCP to your MCP client. Stdio transport, no arguments needed.
73
+
74
+ **Codex / generic TOML:**
75
+
76
+ ```toml
77
+ [mcp_servers.ucu-mcp]
78
+ command = "npx"
79
+ args = ["-y", "ucu-mcp"]
80
+ ```
81
+
82
+ **Claude Code CLI / Desktop** — add via `claude mcp add`:
83
+
84
+ ```bash
85
+ claude mcp add ucu-mcp -- npx -y ucu-mcp
86
+ ```
87
+
88
+ Run `ucu-mcp doctor` once after first connect to verify macOS permissions
89
+ (System Settings → Privacy & Security → Accessibility **and** Screen Recording
90
+ must be granted to the launching terminal/client).
91
+
92
+ ## References
93
+
94
+ - [tool-reference.md](references/tool-reference.md) — all 26 tools, parameters,
95
+ return shapes, and when to use each.
96
+ - [workflows.md](references/workflows.md) — playbooks for common tasks: form
97
+ filling, tray apps, opaque Electron UIs, vision-degraded environments, stale
98
+ targets.
99
+ - [troubleshooting.md](references/troubleshooting.md) — error code table with
100
+ recovery steps, permission issues, AX-opacity workarounds, OCR failures.
@@ -0,0 +1,4 @@
1
+ interface:
2
+ display_name: "UCU-MCP"
3
+ short_description: "Guide agents using UCU-MCP macOS computer-use"
4
+ default_prompt: "Use $ucu-mcp to automate macOS desktop apps through UCU-MCP."