junis 0.3.16 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +127 -20
- package/dist/server/mcp.js +127 -20
- package/dist/server/stdio.js +9 -0
- package/package.json +1 -1
package/dist/cli/index.js
CHANGED
|
@@ -525,13 +525,19 @@ var toolPermissions = {
|
|
|
525
525
|
desktop_click: "confirm",
|
|
526
526
|
desktop_type: "confirm",
|
|
527
527
|
desktop_hotkey: "confirm",
|
|
528
|
+
desktop_press: "confirm",
|
|
528
529
|
desktop_scroll: "confirm",
|
|
529
530
|
desktop_move: "confirm",
|
|
531
|
+
desktop_drag: "confirm",
|
|
530
532
|
desktop_menu: "confirm",
|
|
531
533
|
desktop_paste: "confirm",
|
|
534
|
+
desktop_clipboard: "confirm",
|
|
535
|
+
desktop_dialog: "confirm",
|
|
532
536
|
desktop_screenshot: "confirm",
|
|
533
537
|
desktop_open_app: "auto",
|
|
534
538
|
desktop_open_url: "auto",
|
|
539
|
+
desktop_app_quit: "confirm",
|
|
540
|
+
desktop_window: "confirm",
|
|
535
541
|
cron_create: "confirm",
|
|
536
542
|
cron_delete: "confirm",
|
|
537
543
|
edit_block: "confirm",
|
|
@@ -564,6 +570,9 @@ var FilesystemTools = class {
|
|
|
564
570
|
"- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
|
|
565
571
|
"- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
|
|
566
572
|
"- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
|
|
573
|
+
"- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
|
|
574
|
+
" Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
|
|
575
|
+
" This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
|
|
567
576
|
"",
|
|
568
577
|
"BEHAVIOR:",
|
|
569
578
|
"- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
|
|
@@ -2095,15 +2104,81 @@ function checkBlacklist(app) {
|
|
|
2095
2104
|
function json(data) {
|
|
2096
2105
|
return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
|
|
2097
2106
|
}
|
|
2107
|
+
async function searchProcessElements(processName, query) {
|
|
2108
|
+
const safeApp = processName.replace(/[\\"]/g, "\\$&");
|
|
2109
|
+
const safeQuery = query.replace(/[\\"]/g, "\\$&");
|
|
2110
|
+
const script = `
|
|
2111
|
+
tell application "System Events"
|
|
2112
|
+
tell process "${safeApp}"
|
|
2113
|
+
set topElems to UI elements
|
|
2114
|
+
repeat with elem in topElems
|
|
2115
|
+
try
|
|
2116
|
+
if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
|
|
2117
|
+
set pos to position of elem
|
|
2118
|
+
set sz to size of elem
|
|
2119
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
2120
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
2121
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
2122
|
+
end if
|
|
2123
|
+
end try
|
|
2124
|
+
end repeat
|
|
2125
|
+
repeat with parent in topElems
|
|
2126
|
+
try
|
|
2127
|
+
repeat with elem in UI elements of parent
|
|
2128
|
+
try
|
|
2129
|
+
if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
|
|
2130
|
+
set pos to position of elem
|
|
2131
|
+
set sz to size of elem
|
|
2132
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
2133
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
2134
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
2135
|
+
end if
|
|
2136
|
+
end try
|
|
2137
|
+
end repeat
|
|
2138
|
+
end try
|
|
2139
|
+
end repeat
|
|
2140
|
+
end tell
|
|
2141
|
+
end tell
|
|
2142
|
+
return "NOT_FOUND"
|
|
2143
|
+
`;
|
|
2144
|
+
try {
|
|
2145
|
+
const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
|
|
2146
|
+
const result = stdout.trim();
|
|
2147
|
+
if (result === "NOT_FOUND" || !result.includes(",")) return null;
|
|
2148
|
+
return result;
|
|
2149
|
+
} catch {
|
|
2150
|
+
return null;
|
|
2151
|
+
}
|
|
2152
|
+
}
|
|
2153
|
+
async function resolveElementCoords(query, app) {
|
|
2154
|
+
let targetApp = app;
|
|
2155
|
+
if (!targetApp) {
|
|
2156
|
+
try {
|
|
2157
|
+
const { stdout } = await execa("osascript", [
|
|
2158
|
+
"-e",
|
|
2159
|
+
'tell application "System Events" to get name of first application process whose frontmost is true'
|
|
2160
|
+
]);
|
|
2161
|
+
targetApp = stdout.trim();
|
|
2162
|
+
} catch {
|
|
2163
|
+
return null;
|
|
2164
|
+
}
|
|
2165
|
+
}
|
|
2166
|
+
const result = await searchProcessElements(targetApp, query);
|
|
2167
|
+
if (result) return result;
|
|
2168
|
+
if (targetApp !== "Dock") {
|
|
2169
|
+
return await searchProcessElements("Dock", query);
|
|
2170
|
+
}
|
|
2171
|
+
return null;
|
|
2172
|
+
}
|
|
2098
2173
|
var DesktopTools = class {
|
|
2099
2174
|
register(server) {
|
|
2100
2175
|
server.tool(
|
|
2101
2176
|
"desktop_see",
|
|
2102
2177
|
[
|
|
2103
|
-
"Capture UI element tree
|
|
2104
|
-
"
|
|
2105
|
-
"
|
|
2106
|
-
"
|
|
2178
|
+
"Capture native UI element tree. Returns snapshot ID + elements with id/role/label/description.",
|
|
2179
|
+
"Useful for simple/moderate apps. May timeout on complex apps (100+ elements) \u2014 use desktop_click(query, app) which auto-resolves coordinates without needing desktop_see.",
|
|
2180
|
+
"IMPORTANT: Only sees native macOS UI. Web page content inside browsers is invisible \u2014 use browser_* tools.",
|
|
2181
|
+
"If timeout, use desktop_screenshot for visual context + desktop_click(query, app) or desktop_click(coords) to interact."
|
|
2107
2182
|
].join("\n"),
|
|
2108
2183
|
{
|
|
2109
2184
|
app: z5.string().optional().describe("App name, 'frontmost', or 'menubar'. Omit for frontmost."),
|
|
@@ -2125,7 +2200,7 @@ var DesktopTools = class {
|
|
|
2125
2200
|
id: e.id,
|
|
2126
2201
|
role: e.role,
|
|
2127
2202
|
label: e.label,
|
|
2128
|
-
|
|
2203
|
+
description: e.description
|
|
2129
2204
|
})) ?? [];
|
|
2130
2205
|
return json({ snapshotId, elements });
|
|
2131
2206
|
}
|
|
@@ -2133,9 +2208,10 @@ var DesktopTools = class {
|
|
|
2133
2208
|
server.tool(
|
|
2134
2209
|
"desktop_screenshot",
|
|
2135
2210
|
[
|
|
2136
|
-
"Take a screenshot. Returns base64 image.",
|
|
2137
|
-
"Use
|
|
2138
|
-
"
|
|
2211
|
+
"Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
|
|
2212
|
+
"Use for visual context or to verify UI state ONLY.",
|
|
2213
|
+
"WARNING: NEVER estimate click coordinates from screenshot images \u2014 visual estimation causes misclicks. Use desktop_click(query, app) for auto-resolution, or execute_command + osascript for exact coords.",
|
|
2214
|
+
"Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
|
|
2139
2215
|
].join("\n"),
|
|
2140
2216
|
{
|
|
2141
2217
|
app: z5.string().optional().describe("Capture specific app window"),
|
|
@@ -2172,9 +2248,13 @@ var DesktopTools = class {
|
|
|
2172
2248
|
server.tool(
|
|
2173
2249
|
"desktop_click",
|
|
2174
2250
|
[
|
|
2175
|
-
"Click a UI element
|
|
2176
|
-
"
|
|
2177
|
-
"
|
|
2251
|
+
"Click a UI element by text label (query), coordinates (coords), or element ID (on).",
|
|
2252
|
+
"BEST: Use query with app name \u2014 auto-resolves to exact screen coords via accessibility API. No desktop_see needed.",
|
|
2253
|
+
"GOOD: Use coords 'x,y' \u2014 MUST be from osascript position+size center calculation, NEVER from visual screenshot estimation.",
|
|
2254
|
+
"Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots.",
|
|
2255
|
+
"CAUTION: 'on' (element ID) has known offset bug \u2014 use coords or query instead when possible.",
|
|
2256
|
+
"If query not found: use execute_command + osascript to get element position+size, then center = (x + w/2, y + h/2). Or try desktop_menu, desktop_hotkey.",
|
|
2257
|
+
"NOTE: Web page elements (inside browser) are invisible to desktop tools. Use browser_* tools instead."
|
|
2178
2258
|
].join("\n"),
|
|
2179
2259
|
{
|
|
2180
2260
|
query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
|
|
@@ -2190,9 +2270,18 @@ var DesktopTools = class {
|
|
|
2190
2270
|
checkBlacklist(app);
|
|
2191
2271
|
if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
|
|
2192
2272
|
const args = ["click"];
|
|
2193
|
-
if (coords)
|
|
2194
|
-
|
|
2195
|
-
else if (query)
|
|
2273
|
+
if (coords) {
|
|
2274
|
+
args.push("--coords", coords);
|
|
2275
|
+
} else if (query) {
|
|
2276
|
+
const resolved = await resolveElementCoords(query, app);
|
|
2277
|
+
if (resolved) {
|
|
2278
|
+
args.push("--coords", resolved);
|
|
2279
|
+
} else {
|
|
2280
|
+
args.push(query);
|
|
2281
|
+
}
|
|
2282
|
+
} else if (on) {
|
|
2283
|
+
args.push("--on", on);
|
|
2284
|
+
}
|
|
2196
2285
|
if (app) args.push("--app", app);
|
|
2197
2286
|
if (snapshot) args.push("--snapshot", snapshot);
|
|
2198
2287
|
if (doubleClick) args.push("--double");
|
|
@@ -2230,6 +2319,7 @@ var DesktopTools = class {
|
|
|
2230
2319
|
"desktop_paste",
|
|
2231
2320
|
[
|
|
2232
2321
|
"Paste via clipboard (Cmd+V). Atomic: saves clipboard \u2192 sets content \u2192 pastes \u2192 restores.",
|
|
2322
|
+
"IMPORTANT: Focus the target field first (click it with desktop_click) before pasting.",
|
|
2233
2323
|
"Supports all Unicode (Korean, Japanese, Chinese, emoji). Use instead of desktop_type for non-ASCII.",
|
|
2234
2324
|
"Can also paste file contents via filePath."
|
|
2235
2325
|
].join("\n"),
|
|
@@ -2312,7 +2402,10 @@ var DesktopTools = class {
|
|
|
2312
2402
|
);
|
|
2313
2403
|
server.tool(
|
|
2314
2404
|
"desktop_move",
|
|
2315
|
-
|
|
2405
|
+
[
|
|
2406
|
+
"Move mouse cursor without clicking. Use before scroll or to hover.",
|
|
2407
|
+
"Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
|
|
2408
|
+
].join("\n"),
|
|
2316
2409
|
{
|
|
2317
2410
|
coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
|
|
2318
2411
|
to: z5.string().optional().describe("Element text/label to move to"),
|
|
@@ -2325,9 +2418,18 @@ var DesktopTools = class {
|
|
|
2325
2418
|
checkBlacklist(app);
|
|
2326
2419
|
if (!coords && !to && !id) throw new Error("Provide coords, to, or id.");
|
|
2327
2420
|
const args = ["move"];
|
|
2328
|
-
if (coords)
|
|
2329
|
-
|
|
2330
|
-
else if (to)
|
|
2421
|
+
if (coords) {
|
|
2422
|
+
args.push(coords);
|
|
2423
|
+
} else if (to) {
|
|
2424
|
+
const resolved = await resolveElementCoords(to, app);
|
|
2425
|
+
if (resolved) {
|
|
2426
|
+
args.push(resolved);
|
|
2427
|
+
} else {
|
|
2428
|
+
args.push("--to", to);
|
|
2429
|
+
}
|
|
2430
|
+
} else if (id) {
|
|
2431
|
+
args.push("--id", id);
|
|
2432
|
+
}
|
|
2331
2433
|
if (app) args.push("--app", app);
|
|
2332
2434
|
if (snapshot) args.push("--snapshot", snapshot);
|
|
2333
2435
|
if (smooth) args.push("--smooth");
|
|
@@ -2338,7 +2440,8 @@ var DesktopTools = class {
|
|
|
2338
2440
|
"desktop_drag",
|
|
2339
2441
|
[
|
|
2340
2442
|
"Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
|
|
2341
|
-
"
|
|
2443
|
+
"Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
|
|
2444
|
+
"Before using fromCoords/toCoords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
|
|
2342
2445
|
].join("\n"),
|
|
2343
2446
|
{
|
|
2344
2447
|
from: z5.string().optional().describe("Source element ID from desktop_see"),
|
|
@@ -2368,7 +2471,10 @@ var DesktopTools = class {
|
|
|
2368
2471
|
);
|
|
2369
2472
|
server.tool(
|
|
2370
2473
|
"desktop_open_app",
|
|
2371
|
-
|
|
2474
|
+
[
|
|
2475
|
+
"Launch or activate a macOS app. Already running apps are brought to front. Terminal/iTerm/Finder blocked.",
|
|
2476
|
+
"After launch, wait briefly then use desktop_click(query, app) to interact. desktop_see may timeout on complex apps \u2014 use desktop_screenshot as visual fallback."
|
|
2477
|
+
].join("\n"),
|
|
2372
2478
|
{
|
|
2373
2479
|
app: z5.string().describe("App name (e.g. 'Safari', 'KakaoTalk', 'Slack')")
|
|
2374
2480
|
},
|
|
@@ -2532,6 +2638,7 @@ var DesktopTools = class {
|
|
|
2532
2638
|
app: z5.string().optional().describe("App to open with")
|
|
2533
2639
|
},
|
|
2534
2640
|
async ({ url, app }) => {
|
|
2641
|
+
checkBlacklist(app);
|
|
2535
2642
|
const args = ["open", url];
|
|
2536
2643
|
if (app) args.push("--app", app);
|
|
2537
2644
|
return json(await peekaboo(args));
|
package/dist/server/mcp.js
CHANGED
|
@@ -41,13 +41,19 @@ var toolPermissions = {
|
|
|
41
41
|
desktop_click: "confirm",
|
|
42
42
|
desktop_type: "confirm",
|
|
43
43
|
desktop_hotkey: "confirm",
|
|
44
|
+
desktop_press: "confirm",
|
|
44
45
|
desktop_scroll: "confirm",
|
|
45
46
|
desktop_move: "confirm",
|
|
47
|
+
desktop_drag: "confirm",
|
|
46
48
|
desktop_menu: "confirm",
|
|
47
49
|
desktop_paste: "confirm",
|
|
50
|
+
desktop_clipboard: "confirm",
|
|
51
|
+
desktop_dialog: "confirm",
|
|
48
52
|
desktop_screenshot: "confirm",
|
|
49
53
|
desktop_open_app: "auto",
|
|
50
54
|
desktop_open_url: "auto",
|
|
55
|
+
desktop_app_quit: "confirm",
|
|
56
|
+
desktop_window: "confirm",
|
|
51
57
|
cron_create: "confirm",
|
|
52
58
|
cron_delete: "confirm",
|
|
53
59
|
edit_block: "confirm",
|
|
@@ -80,6 +86,9 @@ var FilesystemTools = class {
|
|
|
80
86
|
"- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
|
|
81
87
|
"- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
|
|
82
88
|
"- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
|
|
89
|
+
"- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
|
|
90
|
+
" Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
|
|
91
|
+
" This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
|
|
83
92
|
"",
|
|
84
93
|
"BEHAVIOR:",
|
|
85
94
|
"- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
|
|
@@ -1611,15 +1620,81 @@ function checkBlacklist(app) {
|
|
|
1611
1620
|
function json(data) {
|
|
1612
1621
|
return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
|
|
1613
1622
|
}
|
|
1623
|
+
async function searchProcessElements(processName, query) {
|
|
1624
|
+
const safeApp = processName.replace(/[\\"]/g, "\\$&");
|
|
1625
|
+
const safeQuery = query.replace(/[\\"]/g, "\\$&");
|
|
1626
|
+
const script = `
|
|
1627
|
+
tell application "System Events"
|
|
1628
|
+
tell process "${safeApp}"
|
|
1629
|
+
set topElems to UI elements
|
|
1630
|
+
repeat with elem in topElems
|
|
1631
|
+
try
|
|
1632
|
+
if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
|
|
1633
|
+
set pos to position of elem
|
|
1634
|
+
set sz to size of elem
|
|
1635
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
1636
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
1637
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
1638
|
+
end if
|
|
1639
|
+
end try
|
|
1640
|
+
end repeat
|
|
1641
|
+
repeat with parent in topElems
|
|
1642
|
+
try
|
|
1643
|
+
repeat with elem in UI elements of parent
|
|
1644
|
+
try
|
|
1645
|
+
if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
|
|
1646
|
+
set pos to position of elem
|
|
1647
|
+
set sz to size of elem
|
|
1648
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
1649
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
1650
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
1651
|
+
end if
|
|
1652
|
+
end try
|
|
1653
|
+
end repeat
|
|
1654
|
+
end try
|
|
1655
|
+
end repeat
|
|
1656
|
+
end tell
|
|
1657
|
+
end tell
|
|
1658
|
+
return "NOT_FOUND"
|
|
1659
|
+
`;
|
|
1660
|
+
try {
|
|
1661
|
+
const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
|
|
1662
|
+
const result = stdout.trim();
|
|
1663
|
+
if (result === "NOT_FOUND" || !result.includes(",")) return null;
|
|
1664
|
+
return result;
|
|
1665
|
+
} catch {
|
|
1666
|
+
return null;
|
|
1667
|
+
}
|
|
1668
|
+
}
|
|
1669
|
+
async function resolveElementCoords(query, app) {
|
|
1670
|
+
let targetApp = app;
|
|
1671
|
+
if (!targetApp) {
|
|
1672
|
+
try {
|
|
1673
|
+
const { stdout } = await execa("osascript", [
|
|
1674
|
+
"-e",
|
|
1675
|
+
'tell application "System Events" to get name of first application process whose frontmost is true'
|
|
1676
|
+
]);
|
|
1677
|
+
targetApp = stdout.trim();
|
|
1678
|
+
} catch {
|
|
1679
|
+
return null;
|
|
1680
|
+
}
|
|
1681
|
+
}
|
|
1682
|
+
const result = await searchProcessElements(targetApp, query);
|
|
1683
|
+
if (result) return result;
|
|
1684
|
+
if (targetApp !== "Dock") {
|
|
1685
|
+
return await searchProcessElements("Dock", query);
|
|
1686
|
+
}
|
|
1687
|
+
return null;
|
|
1688
|
+
}
|
|
1614
1689
|
var DesktopTools = class {
|
|
1615
1690
|
register(server) {
|
|
1616
1691
|
server.tool(
|
|
1617
1692
|
"desktop_see",
|
|
1618
1693
|
[
|
|
1619
|
-
"Capture UI element tree
|
|
1620
|
-
"
|
|
1621
|
-
"
|
|
1622
|
-
"
|
|
1694
|
+
"Capture native UI element tree. Returns snapshot ID + elements with id/role/label/description.",
|
|
1695
|
+
"Useful for simple/moderate apps. May timeout on complex apps (100+ elements) \u2014 use desktop_click(query, app) which auto-resolves coordinates without needing desktop_see.",
|
|
1696
|
+
"IMPORTANT: Only sees native macOS UI. Web page content inside browsers is invisible \u2014 use browser_* tools.",
|
|
1697
|
+
"If timeout, use desktop_screenshot for visual context + desktop_click(query, app) or desktop_click(coords) to interact."
|
|
1623
1698
|
].join("\n"),
|
|
1624
1699
|
{
|
|
1625
1700
|
app: z5.string().optional().describe("App name, 'frontmost', or 'menubar'. Omit for frontmost."),
|
|
@@ -1641,7 +1716,7 @@ var DesktopTools = class {
|
|
|
1641
1716
|
id: e.id,
|
|
1642
1717
|
role: e.role,
|
|
1643
1718
|
label: e.label,
|
|
1644
|
-
|
|
1719
|
+
description: e.description
|
|
1645
1720
|
})) ?? [];
|
|
1646
1721
|
return json({ snapshotId, elements });
|
|
1647
1722
|
}
|
|
@@ -1649,9 +1724,10 @@ var DesktopTools = class {
|
|
|
1649
1724
|
server.tool(
|
|
1650
1725
|
"desktop_screenshot",
|
|
1651
1726
|
[
|
|
1652
|
-
"Take a screenshot. Returns base64 image.",
|
|
1653
|
-
"Use
|
|
1654
|
-
"
|
|
1727
|
+
"Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
|
|
1728
|
+
"Use for visual context or to verify UI state ONLY.",
|
|
1729
|
+
"WARNING: NEVER estimate click coordinates from screenshot images \u2014 visual estimation causes misclicks. Use desktop_click(query, app) for auto-resolution, or execute_command + osascript for exact coords.",
|
|
1730
|
+
"Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
|
|
1655
1731
|
].join("\n"),
|
|
1656
1732
|
{
|
|
1657
1733
|
app: z5.string().optional().describe("Capture specific app window"),
|
|
@@ -1688,9 +1764,13 @@ var DesktopTools = class {
|
|
|
1688
1764
|
server.tool(
|
|
1689
1765
|
"desktop_click",
|
|
1690
1766
|
[
|
|
1691
|
-
"Click a UI element
|
|
1692
|
-
"
|
|
1693
|
-
"
|
|
1767
|
+
"Click a UI element by text label (query), coordinates (coords), or element ID (on).",
|
|
1768
|
+
"BEST: Use query with app name \u2014 auto-resolves to exact screen coords via accessibility API. No desktop_see needed.",
|
|
1769
|
+
"GOOD: Use coords 'x,y' \u2014 MUST be from osascript position+size center calculation, NEVER from visual screenshot estimation.",
|
|
1770
|
+
"Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots.",
|
|
1771
|
+
"CAUTION: 'on' (element ID) has known offset bug \u2014 use coords or query instead when possible.",
|
|
1772
|
+
"If query not found: use execute_command + osascript to get element position+size, then center = (x + w/2, y + h/2). Or try desktop_menu, desktop_hotkey.",
|
|
1773
|
+
"NOTE: Web page elements (inside browser) are invisible to desktop tools. Use browser_* tools instead."
|
|
1694
1774
|
].join("\n"),
|
|
1695
1775
|
{
|
|
1696
1776
|
query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
|
|
@@ -1706,9 +1786,18 @@ var DesktopTools = class {
|
|
|
1706
1786
|
checkBlacklist(app);
|
|
1707
1787
|
if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
|
|
1708
1788
|
const args = ["click"];
|
|
1709
|
-
if (coords)
|
|
1710
|
-
|
|
1711
|
-
else if (query)
|
|
1789
|
+
if (coords) {
|
|
1790
|
+
args.push("--coords", coords);
|
|
1791
|
+
} else if (query) {
|
|
1792
|
+
const resolved = await resolveElementCoords(query, app);
|
|
1793
|
+
if (resolved) {
|
|
1794
|
+
args.push("--coords", resolved);
|
|
1795
|
+
} else {
|
|
1796
|
+
args.push(query);
|
|
1797
|
+
}
|
|
1798
|
+
} else if (on) {
|
|
1799
|
+
args.push("--on", on);
|
|
1800
|
+
}
|
|
1712
1801
|
if (app) args.push("--app", app);
|
|
1713
1802
|
if (snapshot) args.push("--snapshot", snapshot);
|
|
1714
1803
|
if (doubleClick) args.push("--double");
|
|
@@ -1746,6 +1835,7 @@ var DesktopTools = class {
|
|
|
1746
1835
|
"desktop_paste",
|
|
1747
1836
|
[
|
|
1748
1837
|
"Paste via clipboard (Cmd+V). Atomic: saves clipboard \u2192 sets content \u2192 pastes \u2192 restores.",
|
|
1838
|
+
"IMPORTANT: Focus the target field first (click it with desktop_click) before pasting.",
|
|
1749
1839
|
"Supports all Unicode (Korean, Japanese, Chinese, emoji). Use instead of desktop_type for non-ASCII.",
|
|
1750
1840
|
"Can also paste file contents via filePath."
|
|
1751
1841
|
].join("\n"),
|
|
@@ -1828,7 +1918,10 @@ var DesktopTools = class {
|
|
|
1828
1918
|
);
|
|
1829
1919
|
server.tool(
|
|
1830
1920
|
"desktop_move",
|
|
1831
|
-
|
|
1921
|
+
[
|
|
1922
|
+
"Move mouse cursor without clicking. Use before scroll or to hover.",
|
|
1923
|
+
"Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
|
|
1924
|
+
].join("\n"),
|
|
1832
1925
|
{
|
|
1833
1926
|
coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
|
|
1834
1927
|
to: z5.string().optional().describe("Element text/label to move to"),
|
|
@@ -1841,9 +1934,18 @@ var DesktopTools = class {
|
|
|
1841
1934
|
checkBlacklist(app);
|
|
1842
1935
|
if (!coords && !to && !id) throw new Error("Provide coords, to, or id.");
|
|
1843
1936
|
const args = ["move"];
|
|
1844
|
-
if (coords)
|
|
1845
|
-
|
|
1846
|
-
else if (to)
|
|
1937
|
+
if (coords) {
|
|
1938
|
+
args.push(coords);
|
|
1939
|
+
} else if (to) {
|
|
1940
|
+
const resolved = await resolveElementCoords(to, app);
|
|
1941
|
+
if (resolved) {
|
|
1942
|
+
args.push(resolved);
|
|
1943
|
+
} else {
|
|
1944
|
+
args.push("--to", to);
|
|
1945
|
+
}
|
|
1946
|
+
} else if (id) {
|
|
1947
|
+
args.push("--id", id);
|
|
1948
|
+
}
|
|
1847
1949
|
if (app) args.push("--app", app);
|
|
1848
1950
|
if (snapshot) args.push("--snapshot", snapshot);
|
|
1849
1951
|
if (smooth) args.push("--smooth");
|
|
@@ -1854,7 +1956,8 @@ var DesktopTools = class {
|
|
|
1854
1956
|
"desktop_drag",
|
|
1855
1957
|
[
|
|
1856
1958
|
"Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
|
|
1857
|
-
"
|
|
1959
|
+
"Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
|
|
1960
|
+
"Before using fromCoords/toCoords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
|
|
1858
1961
|
].join("\n"),
|
|
1859
1962
|
{
|
|
1860
1963
|
from: z5.string().optional().describe("Source element ID from desktop_see"),
|
|
@@ -1884,7 +1987,10 @@ var DesktopTools = class {
|
|
|
1884
1987
|
);
|
|
1885
1988
|
server.tool(
|
|
1886
1989
|
"desktop_open_app",
|
|
1887
|
-
|
|
1990
|
+
[
|
|
1991
|
+
"Launch or activate a macOS app. Already running apps are brought to front. Terminal/iTerm/Finder blocked.",
|
|
1992
|
+
"After launch, wait briefly then use desktop_click(query, app) to interact. desktop_see may timeout on complex apps \u2014 use desktop_screenshot as visual fallback."
|
|
1993
|
+
].join("\n"),
|
|
1888
1994
|
{
|
|
1889
1995
|
app: z5.string().describe("App name (e.g. 'Safari', 'KakaoTalk', 'Slack')")
|
|
1890
1996
|
},
|
|
@@ -2048,6 +2154,7 @@ var DesktopTools = class {
|
|
|
2048
2154
|
app: z5.string().optional().describe("App to open with")
|
|
2049
2155
|
},
|
|
2050
2156
|
async ({ url, app }) => {
|
|
2157
|
+
checkBlacklist(app);
|
|
2051
2158
|
const args = ["open", url];
|
|
2052
2159
|
if (app) args.push("--app", app);
|
|
2053
2160
|
return json(await peekaboo(args));
|
package/dist/server/stdio.js
CHANGED
|
@@ -42,13 +42,19 @@ var toolPermissions = {
|
|
|
42
42
|
desktop_click: "confirm",
|
|
43
43
|
desktop_type: "confirm",
|
|
44
44
|
desktop_hotkey: "confirm",
|
|
45
|
+
desktop_press: "confirm",
|
|
45
46
|
desktop_scroll: "confirm",
|
|
46
47
|
desktop_move: "confirm",
|
|
48
|
+
desktop_drag: "confirm",
|
|
47
49
|
desktop_menu: "confirm",
|
|
48
50
|
desktop_paste: "confirm",
|
|
51
|
+
desktop_clipboard: "confirm",
|
|
52
|
+
desktop_dialog: "confirm",
|
|
49
53
|
desktop_screenshot: "confirm",
|
|
50
54
|
desktop_open_app: "auto",
|
|
51
55
|
desktop_open_url: "auto",
|
|
56
|
+
desktop_app_quit: "confirm",
|
|
57
|
+
desktop_window: "confirm",
|
|
52
58
|
cron_create: "confirm",
|
|
53
59
|
cron_delete: "confirm",
|
|
54
60
|
edit_block: "confirm",
|
|
@@ -81,6 +87,9 @@ var FilesystemTools = class {
|
|
|
81
87
|
"- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
|
|
82
88
|
"- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
|
|
83
89
|
"- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
|
|
90
|
+
"- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
|
|
91
|
+
" Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
|
|
92
|
+
" This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
|
|
84
93
|
"",
|
|
85
94
|
"BEHAVIOR:",
|
|
86
95
|
"- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
|