ucu-mcp 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +36 -1
- package/README.md +68 -12
- package/dist/src/mcp/server.js +18 -2
- package/dist/src/mcp/tools.d.ts +1 -0
- package/dist/src/mcp/tools.js +173 -65
- package/dist/src/platform/macos.d.ts +4 -0
- package/dist/src/platform/macos.js +355 -215
- package/dist/src/util/errors.d.ts +6 -0
- package/dist/src/util/errors.js +8 -0
- package/dist/src/utils/input.js +88 -18
- package/native/cgevent/cgevent-helper +0 -0
- package/native/cgevent/main.swift +126 -0
- package/native/ocr/main.swift +89 -0
- package/native/ocr/ocr-helper +0 -0
- package/package.json +6 -3
|
@@ -34,6 +34,12 @@ export declare class PermissionError extends UcuError {
|
|
|
34
34
|
export declare class WindowNotFoundError extends UcuError {
|
|
35
35
|
constructor(windowId: string);
|
|
36
36
|
}
|
|
37
|
+
/**
|
|
38
|
+
* Requested accessibility element ID no longer resolves.
|
|
39
|
+
*/
|
|
40
|
+
export declare class ElementNotFoundError extends UcuError {
|
|
41
|
+
constructor(elementId: string);
|
|
42
|
+
}
|
|
37
43
|
/**
|
|
38
44
|
* Click/scroll target is outside screen bounds.
|
|
39
45
|
*/
|
package/dist/src/util/errors.js
CHANGED
|
@@ -68,6 +68,14 @@ export class WindowNotFoundError extends UcuError {
|
|
|
68
68
|
super(`Window ${windowId} not found. It may have been closed. Run list_windows to get fresh IDs.`, "WINDOW_NOT_FOUND", false);
|
|
69
69
|
}
|
|
70
70
|
}
|
|
71
|
+
/**
|
|
72
|
+
* Requested accessibility element ID no longer resolves.
|
|
73
|
+
*/
|
|
74
|
+
export class ElementNotFoundError extends UcuError {
|
|
75
|
+
constructor(elementId) {
|
|
76
|
+
super(`Element ${elementId} not found. It may have been removed or invalidated. Run find_element to get a fresh ID.`, "ELEMENT_NOT_FOUND", false);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
71
79
|
// ---------------------------------------------------------------------------
|
|
72
80
|
// Input Errors
|
|
73
81
|
// ---------------------------------------------------------------------------
|
package/dist/src/utils/input.js
CHANGED
|
@@ -9,10 +9,51 @@
|
|
|
9
9
|
* Windows: Uses SendInput (stub).
|
|
10
10
|
* Linux: Uses xdotool (stub).
|
|
11
11
|
*/
|
|
12
|
-
import { execFile } from "node:child_process";
|
|
12
|
+
import { execFile, execFileSync } from "node:child_process";
|
|
13
13
|
import { promisify } from "node:util";
|
|
14
|
+
import { join, dirname } from "node:path";
|
|
15
|
+
import { fileURLToPath } from "node:url";
|
|
14
16
|
import { logger } from "../util/logger.js";
|
|
15
17
|
const execFileAsync = promisify(execFile);
|
|
18
|
+
// ── Native CGEvent helper (macOS) ──────────────────────────────────────
|
|
19
|
+
// JXA (osascript -l JavaScript) cannot call CGEventPost without segfault.
|
|
20
|
+
// We ship a small Swift binary that does native CGEvent injection instead.
|
|
21
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
22
|
+
// In dev: src/utils/input.ts → native/cgevent/cgevent-helper
|
|
23
|
+
// In prod: dist/src/utils/input.js → dist/native/cgevent/cgevent-helper
|
|
24
|
+
const nativeHelperPath = join(__dirname, "..", "..", "..", "native", "cgevent", "cgevent-helper");
|
|
25
|
+
// Fallback: try from project root (dev mode)
|
|
26
|
+
const nativeHelperPathAlt = join(__dirname, "..", "..", "native", "cgevent", "cgevent-helper");
|
|
27
|
+
import { existsSync } from "node:fs";
|
|
28
|
+
const resolvedNativePath = existsSync(nativeHelperPath) ? nativeHelperPath : nativeHelperPathAlt;
|
|
29
|
+
let _nativeAvailable;
|
|
30
|
+
function isNativeAvailable() {
|
|
31
|
+
if (_nativeAvailable !== undefined)
|
|
32
|
+
return _nativeAvailable;
|
|
33
|
+
try {
|
|
34
|
+
const stdout = execFileSync(resolvedNativePath, [], {
|
|
35
|
+
input: '{"command":"ping"}',
|
|
36
|
+
encoding: "utf8",
|
|
37
|
+
timeout: 3000,
|
|
38
|
+
});
|
|
39
|
+
_nativeAvailable = stdout.includes('"ok"');
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
_nativeAvailable = false;
|
|
43
|
+
}
|
|
44
|
+
return _nativeAvailable;
|
|
45
|
+
}
|
|
46
|
+
function runNativeChecked(payload) {
|
|
47
|
+
const raw = execFileSync(resolvedNativePath, [], {
|
|
48
|
+
input: JSON.stringify(payload),
|
|
49
|
+
encoding: "utf8",
|
|
50
|
+
timeout: 10000,
|
|
51
|
+
}).trim();
|
|
52
|
+
const resp = JSON.parse(raw);
|
|
53
|
+
if (resp.error) {
|
|
54
|
+
throw new Error(`native helper error: ${resp.error}`);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
16
57
|
// ── Dry-run mode ──────────────────────────────────────────────────────────
|
|
17
58
|
const isDryRun = () => process.env.UCU_DRY_RUN === "true";
|
|
18
59
|
function logDryRun(action, details) {
|
|
@@ -63,6 +104,10 @@ export async function click(x, y, button = "left", _platform = process.platform)
|
|
|
63
104
|
return;
|
|
64
105
|
}
|
|
65
106
|
if (_platform === "darwin") {
|
|
107
|
+
if (isNativeAvailable()) {
|
|
108
|
+
runNativeChecked({ command: "click", x, y, button });
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
66
111
|
const btnType = { left: 0, right: 1, middle: 2 }[button];
|
|
67
112
|
await runJXA(`
|
|
68
113
|
ObjC.import('CoreGraphics');
|
|
@@ -91,6 +136,10 @@ export async function doubleClick(x, y, button = "left", _platform = process.pla
|
|
|
91
136
|
return;
|
|
92
137
|
}
|
|
93
138
|
if (_platform === "darwin") {
|
|
139
|
+
if (isNativeAvailable()) {
|
|
140
|
+
runNativeChecked({ command: "doubleClick", x, y, button });
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
94
143
|
const btnType = { left: 0, right: 1, middle: 2 }[button];
|
|
95
144
|
await runJXA(`
|
|
96
145
|
ObjC.import('CoreGraphics');
|
|
@@ -125,6 +174,10 @@ export async function move(x, y, _platform = process.platform) {
|
|
|
125
174
|
return;
|
|
126
175
|
}
|
|
127
176
|
if (_platform === "darwin") {
|
|
177
|
+
if (isNativeAvailable()) {
|
|
178
|
+
runNativeChecked({ command: "move", x, y });
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
128
181
|
await runJXA(`
|
|
129
182
|
ObjC.import('CoreGraphics');
|
|
130
183
|
var loc = $.CGPointMake(${x}, ${y});
|
|
@@ -146,6 +199,10 @@ export async function drag(fromX, fromY, toX, toY, button = "left", duration = 3
|
|
|
146
199
|
return;
|
|
147
200
|
}
|
|
148
201
|
if (_platform === "darwin") {
|
|
202
|
+
if (isNativeAvailable()) {
|
|
203
|
+
runNativeChecked({ command: "drag", fromX, fromY, toX, toY, button, durationMs: duration });
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
149
206
|
const btnType = { left: 0, right: 1, middle: 2 }[button];
|
|
150
207
|
const steps = Math.max(2, Math.min(60, Math.ceil(duration / 16)));
|
|
151
208
|
const delayMicros = Math.max(0, Math.floor((duration * 1000) / steps));
|
|
@@ -192,6 +249,10 @@ export async function scroll(x, y, deltaX, deltaY, _platform = process.platform)
|
|
|
192
249
|
return;
|
|
193
250
|
}
|
|
194
251
|
if (_platform === "darwin") {
|
|
252
|
+
if (isNativeAvailable()) {
|
|
253
|
+
runNativeChecked({ command: "scroll", x, y, deltaX, deltaY });
|
|
254
|
+
return;
|
|
255
|
+
}
|
|
195
256
|
const verticalDelta = -deltaY;
|
|
196
257
|
const horizontalDelta = deltaX;
|
|
197
258
|
await runJXA(`
|
|
@@ -314,23 +375,28 @@ export async function typeText(text, delay = 20, _platform = process.platform) {
|
|
|
314
375
|
// Process each batch
|
|
315
376
|
for (const batch of batches) {
|
|
316
377
|
if (batch.cgEvent && Array.isArray(batch.chars)) {
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
378
|
+
if (isNativeAvailable()) {
|
|
379
|
+
runNativeChecked({ command: "typeBatch", keys: batch.chars });
|
|
380
|
+
}
|
|
381
|
+
else {
|
|
382
|
+
// Build a single JXA script that types all chars in this CGEvent batch
|
|
383
|
+
const keyStatements = batch.chars.map(({ code, shift }) => {
|
|
384
|
+
const flags = shift ? SHIFT_FLAG : 0;
|
|
385
|
+
return `
|
|
386
|
+
kd = $.CGEventCreateKeyboardEvent(null, ${code}, true);
|
|
387
|
+
ku = $.CGEventCreateKeyboardEvent(null, ${code}, false);
|
|
388
|
+
if (${flags}) { $.CGEventSetFlags(kd, ${flags}); $.CGEventSetFlags(ku, ${flags}); }
|
|
389
|
+
$.CGEventPost(0, kd);
|
|
390
|
+
$.CGEventPost(0, ku);
|
|
391
|
+
$.CFRelease(kd);
|
|
392
|
+
$.CFRelease(ku);`;
|
|
393
|
+
}).join("\n");
|
|
394
|
+
await runJXA(`
|
|
395
|
+
ObjC.import('CoreGraphics');
|
|
396
|
+
var kd, ku;
|
|
397
|
+
${keyStatements}
|
|
398
|
+
`);
|
|
399
|
+
}
|
|
334
400
|
}
|
|
335
401
|
else {
|
|
336
402
|
// Fallback: use osascript keystroke for unsupported chars (emoji, CJK, etc.)
|
|
@@ -369,6 +435,10 @@ export async function pressKey(key, modifiers = [], _platform = process.platform
|
|
|
369
435
|
}
|
|
370
436
|
flags |= flag;
|
|
371
437
|
}
|
|
438
|
+
if (isNativeAvailable()) {
|
|
439
|
+
runNativeChecked({ command: "pressKey", keyCode, flags });
|
|
440
|
+
return;
|
|
441
|
+
}
|
|
372
442
|
await runJXA(`
|
|
373
443
|
ObjC.import('CoreGraphics');
|
|
374
444
|
var flags = ${flags};
|
|
Binary file
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import CoreGraphics
|
|
2
|
+
import Foundation
|
|
3
|
+
|
|
4
|
+
// Simple flat params struct — avoids recursive enum decoding issues
|
|
5
|
+
struct Input: Decodable {
|
|
6
|
+
let command: String
|
|
7
|
+
let x: Double?
|
|
8
|
+
let y: Double?
|
|
9
|
+
let fromX: Double?
|
|
10
|
+
let fromY: Double?
|
|
11
|
+
let toX: Double?
|
|
12
|
+
let toY: Double?
|
|
13
|
+
let button: String?
|
|
14
|
+
let durationMs: Int?
|
|
15
|
+
let deltaX: Int?
|
|
16
|
+
let deltaY: Int?
|
|
17
|
+
let keyCode: Int?
|
|
18
|
+
let flags: Int64?
|
|
19
|
+
let keys: [KeyEntry]?
|
|
20
|
+
struct KeyEntry: Decodable { let code: Int; let shift: Bool? }
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
func out(_ json: String) { FileHandle.standardOutput.write((json + "\n").data(using: .utf8)!); fflush(stdout) }
|
|
24
|
+
func post(_ event: CGEvent) { event.post(tap: CGEventTapLocation.cghidEventTap) }
|
|
25
|
+
|
|
26
|
+
func btn(_ s: String?) -> CGMouseButton {
|
|
27
|
+
switch s { case "right": return .right; case "middle": return .center; default: return .left }
|
|
28
|
+
}
|
|
29
|
+
func downT(_ b: CGMouseButton) -> CGEventType {
|
|
30
|
+
switch b { case .right: return .rightMouseDown; case .center: return .otherMouseDown; default: return .leftMouseDown }
|
|
31
|
+
}
|
|
32
|
+
func upT(_ b: CGMouseButton) -> CGEventType {
|
|
33
|
+
switch b { case .right: return .rightMouseUp; case .center: return .otherMouseUp; default: return .leftMouseUp }
|
|
34
|
+
}
|
|
35
|
+
func dragT(_ b: CGMouseButton) -> CGEventType {
|
|
36
|
+
switch b { case .right: return .rightMouseDragged; case .center: return .otherMouseDragged; default: return .leftMouseDragged }
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
func doClick(_ p: Input) -> String {
|
|
40
|
+
let loc = CGPoint(x: p.x ?? 0, y: p.y ?? 0); let b = btn(p.button)
|
|
41
|
+
guard let dn = CGEvent(mouseEventSource: nil, mouseType: downT(b), mouseCursorPosition: loc, mouseButton: b),
|
|
42
|
+
let up = CGEvent(mouseEventSource: nil, mouseType: upT(b), mouseCursorPosition: loc, mouseButton: b)
|
|
43
|
+
else { return "{\"error\":\"fail\"}" }
|
|
44
|
+
post(dn); post(up); return "{\"ok\":true}"
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
func doDoubleClick(_ p: Input) -> String {
|
|
48
|
+
let loc = CGPoint(x: p.x ?? 0, y: p.y ?? 0); let b = btn(p.button)
|
|
49
|
+
guard let d1 = CGEvent(mouseEventSource: nil, mouseType: downT(b), mouseCursorPosition: loc, mouseButton: b),
|
|
50
|
+
let u1 = CGEvent(mouseEventSource: nil, mouseType: upT(b), mouseCursorPosition: loc, mouseButton: b),
|
|
51
|
+
let d2 = CGEvent(mouseEventSource: nil, mouseType: downT(b), mouseCursorPosition: loc, mouseButton: b),
|
|
52
|
+
let u2 = CGEvent(mouseEventSource: nil, mouseType: upT(b), mouseCursorPosition: loc, mouseButton: b)
|
|
53
|
+
else { return "{\"error\":\"fail\"}" }
|
|
54
|
+
d1.setIntegerValueField(.mouseEventClickState, value: 1); u1.setIntegerValueField(.mouseEventClickState, value: 1)
|
|
55
|
+
d2.setIntegerValueField(.mouseEventClickState, value: 2); u2.setIntegerValueField(.mouseEventClickState, value: 2)
|
|
56
|
+
post(d1); post(u1); post(d2); post(u2); return "{\"ok\":true}"
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
func doMove(_ p: Input) -> String {
|
|
60
|
+
let loc = CGPoint(x: p.x ?? 0, y: p.y ?? 0)
|
|
61
|
+
guard let ev = CGEvent(mouseEventSource: nil, mouseType: .mouseMoved, mouseCursorPosition: loc, mouseButton: .left)
|
|
62
|
+
else { return "{\"error\":\"fail\"}" }
|
|
63
|
+
post(ev); return "{\"ok\":true}"
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
func doDrag(_ p: Input) -> String {
|
|
67
|
+
let from = CGPoint(x: p.fromX ?? 0, y: p.fromY ?? 0)
|
|
68
|
+
let to = CGPoint(x: p.toX ?? 0, y: p.toY ?? 0)
|
|
69
|
+
let ms = p.durationMs ?? 300; let b = btn(p.button)
|
|
70
|
+
let steps = max(2, min(60, Int(ceil(Double(ms) / 16.0))))
|
|
71
|
+
let delay = max(0, (ms * 1000) / steps)
|
|
72
|
+
guard let dn = CGEvent(mouseEventSource: nil, mouseType: downT(b), mouseCursorPosition: from, mouseButton: b)
|
|
73
|
+
else { return "{\"error\":\"fail\"}" }
|
|
74
|
+
post(dn)
|
|
75
|
+
for n in 1...steps {
|
|
76
|
+
let t = Double(n) / Double(steps)
|
|
77
|
+
let pt = CGPoint(x: from.x + (to.x - from.x) * t, y: from.y + (to.y - from.y) * t)
|
|
78
|
+
if let ev = CGEvent(mouseEventSource: nil, mouseType: dragT(b), mouseCursorPosition: pt, mouseButton: b) { post(ev) }
|
|
79
|
+
if delay > 0 && n < steps { usleep(UInt32(delay)) }
|
|
80
|
+
}
|
|
81
|
+
if let up = CGEvent(mouseEventSource: nil, mouseType: upT(b), mouseCursorPosition: to, mouseButton: b) { post(up) }
|
|
82
|
+
return "{\"ok\":true}"
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
func doScroll(_ p: Input) -> String {
|
|
86
|
+
let dy = Int32(-(p.deltaY ?? 0)); let dx = Int32(p.deltaX ?? 0)
|
|
87
|
+
guard let ev = CGEvent(scrollWheelEvent2Source: nil, units: .pixel, wheelCount: 2, wheel1: dy, wheel2: dx, wheel3: 0)
|
|
88
|
+
else { return "{\"error\":\"fail\"}" }
|
|
89
|
+
post(ev); return "{\"ok\":true}"
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
func doPressKey(_ p: Input) -> String {
|
|
93
|
+
let code = UInt16(p.keyCode ?? 0); let flags = CGEventFlags(rawValue: UInt64(p.flags ?? 0))
|
|
94
|
+
guard let dn = CGEvent(keyboardEventSource: nil, virtualKey: code, keyDown: true),
|
|
95
|
+
let up = CGEvent(keyboardEventSource: nil, virtualKey: code, keyDown: false)
|
|
96
|
+
else { return "{\"error\":\"fail\"}" }
|
|
97
|
+
dn.flags = flags; up.flags = flags; post(dn); post(up); return "{\"ok\":true}"
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
func doTypeBatch(_ p: Input) -> String {
|
|
101
|
+
guard let keys = p.keys else { return "{\"error\":\"missing keys\"}" }
|
|
102
|
+
let SHIFT = CGEventFlags(rawValue: 0x00020000)
|
|
103
|
+
for entry in keys {
|
|
104
|
+
let code = UInt16(entry.code); let flags: CGEventFlags = (entry.shift ?? false) ? SHIFT : []
|
|
105
|
+
guard let dn = CGEvent(keyboardEventSource: nil, virtualKey: code, keyDown: true),
|
|
106
|
+
let up = CGEvent(keyboardEventSource: nil, virtualKey: code, keyDown: false) else { continue }
|
|
107
|
+
dn.flags = flags; up.flags = flags; post(dn); post(up)
|
|
108
|
+
}
|
|
109
|
+
return "{\"ok\":true}"
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
guard let line = readLine(), let data = line.data(using: .utf8),
|
|
113
|
+
let input = try? JSONDecoder().decode(Input.self, from: data)
|
|
114
|
+
else { out("{\"error\":\"invalid JSON\"}"); exit(1) }
|
|
115
|
+
|
|
116
|
+
switch input.command {
|
|
117
|
+
case "click": out(doClick(input))
|
|
118
|
+
case "doubleClick": out(doDoubleClick(input))
|
|
119
|
+
case "move": out(doMove(input))
|
|
120
|
+
case "drag": out(doDrag(input))
|
|
121
|
+
case "scroll": out(doScroll(input))
|
|
122
|
+
case "pressKey": out(doPressKey(input))
|
|
123
|
+
case "typeBatch": out(doTypeBatch(input))
|
|
124
|
+
case "ping": out("{\"ok\":true}")
|
|
125
|
+
default: out("{\"error\":\"unknown\"}")
|
|
126
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import Vision
|
|
3
|
+
import AppKit
|
|
4
|
+
|
|
5
|
+
struct OCRInput: Decodable {
|
|
6
|
+
let imagePath: String
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
struct OCRElement: Encodable {
|
|
10
|
+
let text: String
|
|
11
|
+
let x: Int
|
|
12
|
+
let y: Int
|
|
13
|
+
let width: Int
|
|
14
|
+
let height: Int
|
|
15
|
+
let confidence: Double
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
struct OCROutput: Encodable {
|
|
19
|
+
let elements: [OCRElement]
|
|
20
|
+
let fullText: String
|
|
21
|
+
let error: String?
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Read all of stdin as JSON
|
|
25
|
+
let stdinData = FileHandle.standardInput.readDataToEndOfFile()
|
|
26
|
+
guard let input = try? JSONDecoder().decode(OCRInput.self, from: stdinData) else {
|
|
27
|
+
let err = OCROutput(elements: [], fullText: "", error: "Failed to decode input JSON")
|
|
28
|
+
let d = try! JSONEncoder().encode(err)
|
|
29
|
+
FileHandle.standardOutput.write(d)
|
|
30
|
+
exit(1)
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
let url = URL(fileURLWithPath: input.imagePath)
|
|
34
|
+
guard let image = NSImage(contentsOf: url), image.isValid else {
|
|
35
|
+
let err = OCROutput(elements: [], fullText: "", error: "Failed to load image: \(input.imagePath)")
|
|
36
|
+
let d = try! JSONEncoder().encode(err)
|
|
37
|
+
FileHandle.standardOutput.write(d)
|
|
38
|
+
exit(1)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
var proposedRect = NSRect.zero
|
|
42
|
+
guard let cgImage = image.cgImage(forProposedRect: &proposedRect, context: nil, hints: nil) else {
|
|
43
|
+
let err = OCROutput(elements: [], fullText: "", error: "Failed to get CGImage")
|
|
44
|
+
let d = try! JSONEncoder().encode(err)
|
|
45
|
+
FileHandle.standardOutput.write(d)
|
|
46
|
+
exit(1)
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
let request = VNRecognizeTextRequest()
|
|
50
|
+
request.recognitionLevel = .accurate
|
|
51
|
+
request.usesLanguageCorrection = true
|
|
52
|
+
|
|
53
|
+
let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
|
|
54
|
+
|
|
55
|
+
do {
|
|
56
|
+
try handler.perform([request])
|
|
57
|
+
} catch {
|
|
58
|
+
let err = OCROutput(elements: [], fullText: "", error: "OCR failed: \(error.localizedDescription)")
|
|
59
|
+
let d = try! JSONEncoder().encode(err)
|
|
60
|
+
FileHandle.standardOutput.write(d)
|
|
61
|
+
exit(1)
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
guard let observations = request.results else {
|
|
65
|
+
let err = OCROutput(elements: [], fullText: "", error: "No OCR results")
|
|
66
|
+
let d = try! JSONEncoder().encode(err)
|
|
67
|
+
FileHandle.standardOutput.write(d)
|
|
68
|
+
exit(1)
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
let imgWidth = CGFloat(cgImage.width)
|
|
72
|
+
let imgHeight = CGFloat(cgImage.height)
|
|
73
|
+
var elements: [OCRElement] = []
|
|
74
|
+
var fullTextParts: [String] = []
|
|
75
|
+
|
|
76
|
+
for obs in observations {
|
|
77
|
+
guard let candidate = obs.topCandidates(1).first else { continue }
|
|
78
|
+
let bbox = obs.boundingBox
|
|
79
|
+
let bx = Int(bbox.origin.x * imgWidth)
|
|
80
|
+
let by = Int((1 - bbox.origin.y - bbox.height) * imgHeight)
|
|
81
|
+
let bw = Int(bbox.width * imgWidth)
|
|
82
|
+
let bh = Int(bbox.height * imgHeight)
|
|
83
|
+
elements.append(OCRElement(text: candidate.string, x: bx, y: by, width: bw, height: bh, confidence: Double(candidate.confidence)))
|
|
84
|
+
fullTextParts.append(candidate.string)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
let output = OCROutput(elements: elements, fullText: fullTextParts.joined(separator: "\n"), error: nil)
|
|
88
|
+
let encoded = try! JSONEncoder().encode(output)
|
|
89
|
+
FileHandle.standardOutput.write(encoded)
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ucu-mcp",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "MCP server for Universal Computer Use — desktop automation for AI agents via Model Context Protocol",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"ucu-mcp": "dist/bin/ucu-mcp.js"
|
|
8
8
|
},
|
|
9
9
|
"files": [
|
|
10
|
+
"native/",
|
|
10
11
|
"dist/bin/",
|
|
11
12
|
"dist/src/",
|
|
12
13
|
"dist/index.js",
|
|
@@ -17,13 +18,15 @@
|
|
|
17
18
|
"main": "./dist/index.js",
|
|
18
19
|
"types": "./dist/index.d.ts",
|
|
19
20
|
"scripts": {
|
|
20
|
-
"build": "tsc",
|
|
21
|
+
"build": "tsc && npm run build:native",
|
|
21
22
|
"start": "node dist/bin/ucu-mcp.js",
|
|
22
23
|
"dev": "tsx bin/ucu-mcp.ts",
|
|
23
24
|
"test": "vitest run",
|
|
24
25
|
"test:watch": "vitest",
|
|
25
26
|
"test:integration": "vitest run tests/integration/",
|
|
26
|
-
"test:macos-gui": "UCU_MACOS_GUI_SMOKE=1 vitest run tests/integration/macos-gui-smoke.test.ts"
|
|
27
|
+
"test:macos-gui": "UCU_MACOS_GUI_SMOKE=1 vitest run tests/integration/macos-gui-smoke.test.ts",
|
|
28
|
+
"test:client-cli": "UCU_CLIENT_CLI_SMOKE=1 vitest run tests/integration/client-cli-smoke.test.ts",
|
|
29
|
+
"build:native": "cd native/cgevent && swiftc -O -o cgevent-helper main.swift -framework CoreGraphics -framework Foundation && cd ../ocr && swiftc -O -o ocr-helper main.swift -framework Vision -framework AppKit"
|
|
27
30
|
},
|
|
28
31
|
"keywords": [
|
|
29
32
|
"mcp",
|