ucu-mcp 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ import { randomUUID } from "node:crypto";
3
3
  import { promisify } from "node:util";
4
4
  import { captureFullScreen, captureRegion } from "../utils/screenshot.js";
5
5
  import { click as inputClick, doubleClick as inputDoubleClick, move as inputMove, drag as inputDrag, scroll as inputScroll, typeText, pressShortcut } from "../utils/input.js";
6
- import { CaptureError, ElementNotFoundError, InputSynthesisError, PermissionError, PlatformError, UcuError, WindowNotFoundError } from "../util/errors.js";
6
+ import { CaptureError, ElementNotFoundError, InputSynthesisError, PermissionError, PlatformError, TargetStaleError, UcuError, WindowNotFoundError } from "../util/errors.js";
7
7
  const execFileAsync = promisify(execFile);
8
8
  function errorMessage(error) {
9
9
  return error instanceof Error ? error.message : String(error);
@@ -40,6 +40,24 @@ function rethrowInputError(error, operation) {
40
40
  throw error;
41
41
  throw new InputSynthesisError(`${operation} failed: ${errorMessage(error)}`);
42
42
  }
43
+ function normalizeAppName(name) {
44
+ return name.trim().toLowerCase();
45
+ }
46
+ function appNameMatches(processName, requestedApp) {
47
+ const process = normalizeAppName(processName);
48
+ const requested = normalizeAppName(requestedApp);
49
+ if (!process || !requested)
50
+ return false;
51
+ return process === requested ||
52
+ process.startsWith(`${requested} `) ||
53
+ process.startsWith(`${requested}-`) ||
54
+ process.includes(` ${requested} `);
55
+ }
56
+ function selectWindowForApp(windows, requestedApp) {
57
+ const requested = normalizeAppName(requestedApp);
58
+ return windows.find((window) => normalizeAppName(window.processName) === requested) ??
59
+ windows.find((window) => appNameMatches(window.processName, requestedApp));
60
+ }
43
61
  export class MacOSPlatform {
44
62
  elementCache = new Map();
45
63
  elementCacheTtlMs = 30_000;
@@ -81,6 +99,18 @@ export class MacOSPlatform {
81
99
  isCacheEntryExpired(descriptor) {
82
100
  return Date.now() - descriptor.cachedAt > this.elementCacheTtlMs;
83
101
  }
102
+ // ── Target Validation ────────────────────────────────────────────────────
103
+ /** Validate that the active target window still exists. */
104
+ async validateActiveTarget() {
105
+ if (!this.activeTarget?.windowId)
106
+ return;
107
+ this.windowCache = undefined; // Bypass cache — stale detection must use fresh data
108
+ const windows = await this.listWindows(true);
109
+ const stillExists = windows.some(w => w.id === this.activeTarget.windowId);
110
+ if (!stillExists) {
111
+ throw new TargetStaleError(this.activeTarget.windowId);
112
+ }
113
+ }
84
114
  // ── Focus Management ────────────────────────────────────────────────────
85
115
  /** Save the current frontmost app/window so we can restore after an action. */
86
116
  async saveFocus() {
@@ -199,7 +229,6 @@ export class MacOSPlatform {
199
229
  return JSON.parse(out);
200
230
  }
201
231
  async focusApp(app) {
202
- const appLower = app.toLowerCase();
203
232
  const escapedApp = app.replace(/\\/g, "\\\\").replace(/"/g, '\\"');
204
233
  this.windowCache = undefined;
205
234
  try {
@@ -213,7 +242,7 @@ export class MacOSPlatform {
213
242
  const deadline = Date.now() + 3000;
214
243
  do {
215
244
  const windows = await this.listWindows(true);
216
- target = windows.find((w) => w.processName.toLowerCase().includes(appLower));
245
+ target = selectWindowForApp(windows, app);
217
246
  if (target)
218
247
  break;
219
248
  await new Promise((resolve) => setTimeout(resolve, 150));
@@ -222,10 +251,12 @@ export class MacOSPlatform {
222
251
  throw new WindowNotFoundError(app);
223
252
  }
224
253
  this.activeTarget = {
254
+ targetId: randomUUID(),
225
255
  appName: target.processName,
226
256
  pid: target.pid,
227
257
  windowId: target.id,
228
258
  title: target.title,
259
+ capturedAt: new Date().toISOString(),
229
260
  };
230
261
  return this.activeTarget;
231
262
  }
@@ -347,6 +378,9 @@ export class MacOSPlatform {
347
378
  }
348
379
  }
349
380
  async getWindowState(windowId, depth, includeBounds = true) {
381
+ if (!windowId || windowId === this.activeTarget?.windowId) {
382
+ await this.validateActiveTarget();
383
+ }
350
384
  const resolvedWindowId = windowId || this.activeTarget?.windowId;
351
385
  if (!resolvedWindowId) {
352
386
  throw new WindowNotFoundError("active target");
@@ -769,13 +803,24 @@ export class MacOSPlatform {
769
803
  // ── Accessibility (AX) Element Actions ───────────────────────────────────
770
804
  async findElement(options) {
771
805
  this.evictExpiredCacheEntries();
772
- const { text, role, app, depth, includeBounds = true } = options;
806
+ const { text, role, app, depth, includeBounds = true, textMode = "contains", visibleOnly = false, value } = options;
773
807
  const effectiveApp = app || this.activeTarget?.appName;
774
808
  const maxDepth = Math.min(depth || 5, 10);
775
809
  const maxResults = Math.min(Math.max(options.maxResults ?? 50, 1), 200);
776
810
  const escapedApp = (effectiveApp || "").replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$');
777
811
  const escapedText = text ? text.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$') : "";
778
812
  const escapedRole = role ? role.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$') : "";
813
+ const escapedValue = value ? value.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/`/g, '\\`').replace(/\$/g, '\\$') : "";
814
+ // Pre-compile regex on TS side to validate syntax before passing to JXA
815
+ if (text && textMode === "regex") {
816
+ try {
817
+ new RegExp(text);
818
+ }
819
+ catch {
820
+ throw new PlatformError(`Invalid regex pattern: ${text}`);
821
+ }
822
+ }
823
+ const startTime = Date.now();
779
824
  const jxaScript = `
780
825
  var se = Application('System Events');
781
826
  function childElements(elem) {
@@ -784,14 +829,72 @@ export class MacOSPlatform {
784
829
  }
785
830
  }
786
831
  var results = [];
832
+ var scannedCount = 0;
833
+ var matchedCount = 0;
787
834
  var resultCount = [0];
788
835
  var maxResults = ${maxResults};
789
836
  var includeBounds = ${includeBounds ? "true" : "false"};
837
+ var visibleOnly = ${visibleOnly ? "true" : "false"};
838
+ var textMode = "${textMode}";
790
839
 
791
840
  var textFilter = ${text ? `"${escapedText}"` : "null"};
792
841
  var roleFilter = ${role ? `"${escapedRole}"` : "null"};
842
+ var valueFilter = ${value ? `"${escapedValue}"` : "null"};
843
+
844
+ function isVisible(elem) {
845
+ try {
846
+ var pos = elem.position();
847
+ var sz = elem.size();
848
+ if (!pos || !sz) return false;
849
+ return sz[0] > 0 && sz[1] > 0 && pos[0] > -10000 && pos[1] > -10000;
850
+ } catch(e) {
851
+ return false;
852
+ }
853
+ }
854
+
855
+ function textMatches(elemName, elemValue, elemDesc) {
856
+ if (textFilter === null) return true;
857
+ var sources = [elemName, elemValue, elemDesc];
858
+ if (textMode === "exact") {
859
+ var t = textFilter.toLowerCase();
860
+ for (var i = 0; i < sources.length; i++) {
861
+ if (sources[i].toLowerCase() === t) return true;
862
+ }
863
+ return false;
864
+ } else if (textMode === "regex") {
865
+ try {
866
+ var re = new RegExp(textFilter, "i");
867
+ for (var i = 0; i < sources.length; i++) {
868
+ if (re.test(sources[i])) return true;
869
+ }
870
+ } catch(e) {}
871
+ return false;
872
+ } else {
873
+ // contains (default)
874
+ var t = textFilter.toLowerCase();
875
+ for (var i = 0; i < sources.length; i++) {
876
+ if (sources[i].toLowerCase().indexOf(t) !== -1) return true;
877
+ }
878
+ return false;
879
+ }
880
+ }
881
+
882
+ function valueMatches(elemValue) {
883
+ if (valueFilter === null) return true;
884
+ if (textMode === "exact") {
885
+ return elemValue.toLowerCase() === valueFilter.toLowerCase();
886
+ } else if (textMode === "regex") {
887
+ try {
888
+ return new RegExp(valueFilter, "i").test(elemValue);
889
+ } catch(e) { return false; }
890
+ } else {
891
+ // contains (default)
892
+ return elemValue.toLowerCase().indexOf(valueFilter.toLowerCase()) !== -1;
893
+ }
894
+ }
793
895
 
794
896
  function matches(elem) {
897
+ scannedCount++;
795
898
  var elemName = '';
796
899
  var elemRole = '';
797
900
  var elemDesc = '';
@@ -801,17 +904,14 @@ export class MacOSPlatform {
801
904
  try { elemDesc = elem.description() || ''; } catch(e) {}
802
905
  try { var v = elem.value(); elemValue = (v !== undefined && v !== null) ? String(v) : ''; } catch(e) {}
803
906
 
804
- if (textFilter !== null) {
805
- var t = textFilter.toLowerCase();
806
- if (elemName.toLowerCase().indexOf(t) === -1 &&
807
- elemValue.toLowerCase().indexOf(t) === -1 &&
808
- elemDesc.toLowerCase().indexOf(t) === -1) {
809
- return false;
810
- }
811
- }
907
+ if (visibleOnly && !isVisible(elem)) return false;
908
+
909
+ if (!textMatches(elemName, elemValue, elemDesc)) return false;
812
910
  if (roleFilter !== null) {
813
911
  if (elemRole !== roleFilter) return false;
814
912
  }
913
+ if (!valueMatches(elemValue)) return false;
914
+ matchedCount++;
815
915
  return true;
816
916
  }
817
917
 
@@ -894,15 +994,16 @@ export class MacOSPlatform {
894
994
  }
895
995
  } catch(e) {}
896
996
 
897
- JSON.stringify(results);
997
+ JSON.stringify({results: results, scannedCount: scannedCount, matchedCount: matchedCount});
898
998
  `;
899
999
  try {
900
1000
  const out = execFileSync("osascript", [
901
1001
  "-l", "JavaScript",
902
1002
  "-e", jxaScript,
903
1003
  ], { encoding: "utf-8", timeout: 30000 }).trim();
904
- const results = JSON.parse(out);
905
- for (const result of results) {
1004
+ const parsed = JSON.parse(out);
1005
+ const durationMs = Date.now() - startTime;
1006
+ for (const result of parsed.results) {
906
1007
  const appName = effectiveApp || result.id.split("/")[0] || "";
907
1008
  this.elementCache.set(result.id, {
908
1009
  elementId: result.id,
@@ -918,7 +1019,32 @@ export class MacOSPlatform {
918
1019
  });
919
1020
  }
920
1021
  this.evictOverflowCacheEntries();
921
- return results;
1022
+ let finalResults = parsed.results;
1023
+ if (options.near) {
1024
+ const nx = options.near.x;
1025
+ const ny = options.near.y;
1026
+ finalResults = [...finalResults].sort((a, b) => {
1027
+ const acx = (a.bounds?.x ?? 0) + (a.bounds?.width ?? 0) / 2;
1028
+ const acy = (a.bounds?.y ?? 0) + (a.bounds?.height ?? 0) / 2;
1029
+ const bcx = (b.bounds?.x ?? 0) + (b.bounds?.width ?? 0) / 2;
1030
+ const bcy = (b.bounds?.y ?? 0) + (b.bounds?.height ?? 0) / 2;
1031
+ return Math.hypot(acx - nx, acy - ny) - Math.hypot(bcx - nx, bcy - ny);
1032
+ });
1033
+ }
1034
+ if (typeof options.index === "number") {
1035
+ finalResults = options.index >= 0 && options.index < finalResults.length
1036
+ ? [finalResults[options.index]]
1037
+ : [];
1038
+ }
1039
+ return {
1040
+ results: finalResults,
1041
+ metrics: {
1042
+ scannedCount: parsed.scannedCount,
1043
+ matchedCount: parsed.matchedCount,
1044
+ durationMs,
1045
+ truncated: parsed.results.length >= maxResults,
1046
+ },
1047
+ };
922
1048
  }
923
1049
  catch (error) {
924
1050
  rethrowAccessibilityError(error, "find_element");
@@ -1397,6 +1523,24 @@ export class MacOSPlatform {
1397
1523
  rethrowElementActionError(error, "type_in_element", elementId);
1398
1524
  }
1399
1525
  }
1526
+ // ── Clipboard ───────────────────────────────────────────────────────────
1527
+ async readClipboard() {
1528
+ try {
1529
+ const out = execFileSync("pbpaste", [], { encoding: "utf-8", timeout: 5000 });
1530
+ return out;
1531
+ }
1532
+ catch (error) {
1533
+ throw new PlatformError(`read_clipboard failed: ${errorMessage(error)}`);
1534
+ }
1535
+ }
1536
+ async writeClipboard(text) {
1537
+ try {
1538
+ execFileSync("pbcopy", [], { input: text, encoding: "utf-8", timeout: 5000 });
1539
+ }
1540
+ catch (error) {
1541
+ throw new PlatformError(`write_clipboard failed: ${errorMessage(error)}`);
1542
+ }
1543
+ }
1400
1544
  async setElementValue(elementId, value, app) {
1401
1545
  this.evictExpiredCacheEntries();
1402
1546
  const effectiveApp = app || this.activeTarget?.appName;
@@ -1,4 +1,4 @@
1
- import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResult } from "./base.js";
1
+ import type { Platform, ScreenRegion, ScreenSize, CursorPosition, WindowInfo, WindowState, OcrResult, FindElementOptions, FindElementResponse } from "./base.js";
2
2
  export declare class WindowsPlatform implements Platform {
3
3
  screenshot(_display?: number, _region?: ScreenRegion): Promise<Buffer>;
4
4
  getScreenSize(_display?: number): ScreenSize;
@@ -12,7 +12,9 @@ export declare class WindowsPlatform implements Platform {
12
12
  type(_text: string, _delay?: number): Promise<void>;
13
13
  key(_keys: string[]): Promise<void>;
14
14
  ocr(_display?: number, _region?: ScreenRegion): Promise<OcrResult>;
15
- findElement(_options: FindElementOptions): Promise<FindElementResult[]>;
15
+ findElement(_options: FindElementOptions): Promise<FindElementResponse>;
16
16
  clickElement(_elementId: string, _app?: string): Promise<void>;
17
17
  typeInElement(_elementId: string, _text: string, _app?: string, _clearFirst?: boolean): Promise<void>;
18
+ readClipboard(): Promise<string>;
19
+ writeClipboard(text: string): Promise<void>;
18
20
  }
@@ -1,3 +1,17 @@
1
+ import { execFileSync } from "node:child_process";
2
+ import { PlatformError } from "../util/errors.js";
3
+ function runPowerShell(script, input) {
4
+ try {
5
+ return execFileSync("powershell.exe", ["-NoProfile", "-NonInteractive", "-Command", script], {
6
+ encoding: "utf-8",
7
+ timeout: 10000,
8
+ ...(input !== undefined ? { input } : {}),
9
+ });
10
+ }
11
+ catch (error) {
12
+ throw new PlatformError(`PowerShell failed: ${error.message}`);
13
+ }
14
+ }
1
15
  export class WindowsPlatform {
2
16
  async screenshot(_display, _region) {
3
17
  throw new Error("Not implemented: Windows screenshot");
@@ -45,4 +59,23 @@ export class WindowsPlatform {
45
59
  async typeInElement(_elementId, _text, _app, _clearFirst) {
46
60
  throw new Error("Not implemented: Windows typeInElement");
47
61
  }
62
+ async readClipboard() {
63
+ try {
64
+ // Get-Clipboard returns the clipboard text; trim trailing newline PowerShell adds
65
+ const out = runPowerShell("Get-Clipboard -Raw");
66
+ return out;
67
+ }
68
+ catch (error) {
69
+ throw new PlatformError(`read_clipboard failed: ${error.message}`);
70
+ }
71
+ }
72
+ async writeClipboard(text) {
73
+ try {
74
+ // Pipe the text to Set-Clipboard via stdin to avoid shell quoting issues
75
+ runPowerShell("$stdin = [Console]::In.ReadToEnd(); Set-Clipboard -Value $stdin", text);
76
+ }
77
+ catch (error) {
78
+ throw new PlatformError(`write_clipboard failed: ${error.message}`);
79
+ }
80
+ }
48
81
  }
@@ -22,6 +22,11 @@ export interface SafetyGuardConfig {
22
22
  /** Minimum milliseconds between consecutive actions (default 100). */
23
23
  rateLimitMs?: number;
24
24
  }
25
+ /** Actions that observe UI/system state without altering it. */
26
+ export declare const OBSERVE_ACTIONS: ReadonlySet<string>;
27
+ /** Actions that synthesize user input — need full user-activity protection. */
28
+ export declare const INPUT_ACTIONS: ReadonlySet<string>;
29
+ export declare function classifyAction(action: string): "observe" | "input" | "other";
25
30
  export declare class SafetyGuard {
26
31
  private readonly blockedKeys;
27
32
  private readonly skippedWindows;
@@ -40,7 +45,9 @@ export declare class SafetyGuard {
40
45
  * - "key": { keys: string[] }
41
46
  * - any action: { windowTitle?: string }
42
47
  */
43
- checkAction(action: string, params?: Record<string, unknown>): SafetyCheckResult;
48
+ checkAction(action: string, params?: Record<string, unknown>, options?: {
49
+ skipUserActivityPause?: boolean;
50
+ }): SafetyCheckResult;
44
51
  /** Record that the user performed an activity (mouse/keyboard). */
45
52
  recordUserActivity(): void;
46
53
  /** Set the pause duration after user activity (default 2000ms). */
@@ -84,6 +84,45 @@ function normalizeShortcut(raw) {
84
84
  .join("+");
85
85
  }
86
86
  // ---------------------------------------------------------------------------
87
+ // Action classification (observe vs input)
88
+ // ---------------------------------------------------------------------------
89
+ /** Actions that observe UI/system state without altering it. */
90
+ export const OBSERVE_ACTIONS = new Set([
91
+ "screenshot",
92
+ "list_windows",
93
+ "list_apps",
94
+ "get_window_state",
95
+ "get_screen_size",
96
+ "get_cursor_position",
97
+ "ocr",
98
+ "find_element",
99
+ "wait",
100
+ "wait_for_element",
101
+ "doctor",
102
+ "clipboard_read",
103
+ ]);
104
+ /** Actions that synthesize user input — need full user-activity protection. */
105
+ export const INPUT_ACTIONS = new Set([
106
+ "click",
107
+ "double_click",
108
+ "scroll",
109
+ "drag",
110
+ "move",
111
+ "type_text",
112
+ "press_key",
113
+ "click_element",
114
+ "type_in_element",
115
+ "set_value",
116
+ "clipboard_write",
117
+ ]);
118
+ export function classifyAction(action) {
119
+ if (OBSERVE_ACTIONS.has(action))
120
+ return "observe";
121
+ if (INPUT_ACTIONS.has(action))
122
+ return "input";
123
+ return "other";
124
+ }
125
+ // ---------------------------------------------------------------------------
87
126
  // SafetyGuard
88
127
  // ---------------------------------------------------------------------------
89
128
  export class SafetyGuard {
@@ -123,7 +162,7 @@ export class SafetyGuard {
123
162
  * - "key": { keys: string[] }
124
163
  * - any action: { windowTitle?: string }
125
164
  */
126
- checkAction(action, params = {}) {
165
+ checkAction(action, params = {}, options = {}) {
127
166
  // 1. Key blocklist -------------------------------------------------------
128
167
  if (action === "key" || action === "press_key") {
129
168
  const keys = params.keys;
@@ -164,7 +203,7 @@ export class SafetyGuard {
164
203
  }
165
204
  }
166
205
  // 4. Text injection scan --------------------------------------------------
167
- if (!this.allowUnsafeText && (action === "type" || action === "type_text" || action === "type_in_element" || action === "set_value")) {
206
+ if (!this.allowUnsafeText && (action === "type" || action === "type_text" || action === "type_in_element" || action === "set_value" || action === "clipboard_write")) {
168
207
  const text = typeof params.text === "string"
169
208
  ? params.text
170
209
  : typeof params.value === "string"
@@ -191,8 +230,8 @@ export class SafetyGuard {
191
230
  };
192
231
  }
193
232
  this.lastActionTime = now;
194
- // 6. User activity pause --------------------------------------------------
195
- if (this.isUserActivityPauseActive()) {
233
+ // 6. User activity pause (skipped for observe-class actions) -----------------
234
+ if (!options.skipUserActivityPause && this.isUserActivityPauseActive()) {
196
235
  return {
197
236
  allowed: false,
198
237
  reason: `User activity detected — pausing automation for ${this.userActivityPauseMs}ms`,
@@ -34,6 +34,12 @@ export declare class PermissionError extends UcuError {
34
34
  export declare class WindowNotFoundError extends UcuError {
35
35
  constructor(windowId: string);
36
36
  }
37
+ /**
38
+ * Active target window is no longer available.
39
+ */
40
+ export declare class TargetStaleError extends UcuError {
41
+ constructor(windowId: string);
42
+ }
37
43
  /**
38
44
  * Requested accessibility element ID no longer resolves.
39
45
  */
@@ -68,6 +68,14 @@ export class WindowNotFoundError extends UcuError {
68
68
  super(`Window ${windowId} not found. It may have been closed. Run list_windows to get fresh IDs.`, "WINDOW_NOT_FOUND", false);
69
69
  }
70
70
  }
71
+ /**
72
+ * Active target window is no longer available.
73
+ */
74
+ export class TargetStaleError extends UcuError {
75
+ constructor(windowId) {
76
+ super(`Active target window ${windowId} is no longer available. Run focus_app or list_windows to refresh.`, "TARGET_STALE", false);
77
+ }
78
+ }
71
79
  /**
72
80
  * Requested accessibility element ID no longer resolves.
73
81
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ucu-mcp",
3
- "version": "0.2.0",
3
+ "version": "0.3.0",
4
4
  "description": "MCP server for Universal Computer Use — desktop automation for AI agents via Model Context Protocol",
5
5
  "type": "module",
6
6
  "bin": {
@@ -38,7 +38,7 @@
38
38
  ],
39
39
  "repository": {
40
40
  "type": "git",
41
- "url": "git+https://github.com/2876674942/ucu-mcp-backup.git"
41
+ "url": "git+https://github.com/kaguyaluna2333/ucu-mcp-backup.git"
42
42
  },
43
43
  "license": "MIT",
44
44
  "dependencies": {