junis 0.4.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +130 -21
- package/dist/server/mcp.js +130 -21
- package/dist/server/stdio.js +2 -2
- package/package.json +1 -1
package/dist/cli/index.js
CHANGED
|
@@ -570,9 +570,9 @@ var FilesystemTools = class {
|
|
|
570
570
|
"- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
|
|
571
571
|
"- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
|
|
572
572
|
"- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
|
|
573
|
-
"- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size
|
|
573
|
+
"- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size for click/move/drag.",
|
|
574
574
|
" Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
|
|
575
|
-
" This is the
|
|
575
|
+
" This is the most reliable way to get pixel-accurate coordinates on macOS.",
|
|
576
576
|
"",
|
|
577
577
|
"BEHAVIOR:",
|
|
578
578
|
"- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
|
|
@@ -2137,6 +2137,28 @@ tell application "System Events"
|
|
|
2137
2137
|
end repeat
|
|
2138
2138
|
end try
|
|
2139
2139
|
end repeat
|
|
2140
|
+
repeat with parent in topElems
|
|
2141
|
+
try
|
|
2142
|
+
repeat with child in UI elements of parent
|
|
2143
|
+
try
|
|
2144
|
+
set childRole to role of child
|
|
2145
|
+
if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
|
|
2146
|
+
repeat with gc in UI elements of child
|
|
2147
|
+
try
|
|
2148
|
+
if (name of gc contains "${safeQuery}") or (description of gc contains "${safeQuery}") then
|
|
2149
|
+
set pos to position of gc
|
|
2150
|
+
set sz to size of gc
|
|
2151
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
2152
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
2153
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
2154
|
+
end if
|
|
2155
|
+
end try
|
|
2156
|
+
end repeat
|
|
2157
|
+
end if
|
|
2158
|
+
end try
|
|
2159
|
+
end repeat
|
|
2160
|
+
end try
|
|
2161
|
+
end repeat
|
|
2140
2162
|
end tell
|
|
2141
2163
|
end tell
|
|
2142
2164
|
return "NOT_FOUND"
|
|
@@ -2170,6 +2192,86 @@ async function resolveElementCoords(query, app) {
|
|
|
2170
2192
|
}
|
|
2171
2193
|
return null;
|
|
2172
2194
|
}
|
|
2195
|
+
async function findFirstByRole(role, app) {
|
|
2196
|
+
let targetApp = app;
|
|
2197
|
+
if (!targetApp) {
|
|
2198
|
+
try {
|
|
2199
|
+
const { stdout } = await execa("osascript", [
|
|
2200
|
+
"-e",
|
|
2201
|
+
'tell application "System Events" to get name of first application process whose frontmost is true'
|
|
2202
|
+
]);
|
|
2203
|
+
targetApp = stdout.trim();
|
|
2204
|
+
} catch {
|
|
2205
|
+
return null;
|
|
2206
|
+
}
|
|
2207
|
+
}
|
|
2208
|
+
const safeApp = targetApp.replace(/[\\"]/g, "\\$&");
|
|
2209
|
+
const safeRole = role.replace(/[\\"]/g, "\\$&");
|
|
2210
|
+
const script = `
|
|
2211
|
+
tell application "System Events"
|
|
2212
|
+
tell process "${safeApp}"
|
|
2213
|
+
set topElems to UI elements
|
|
2214
|
+
repeat with elem in topElems
|
|
2215
|
+
try
|
|
2216
|
+
if role of elem is "${safeRole}" then
|
|
2217
|
+
set pos to position of elem
|
|
2218
|
+
set sz to size of elem
|
|
2219
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
2220
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
2221
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
2222
|
+
end if
|
|
2223
|
+
end try
|
|
2224
|
+
end repeat
|
|
2225
|
+
repeat with parent in topElems
|
|
2226
|
+
try
|
|
2227
|
+
repeat with elem in UI elements of parent
|
|
2228
|
+
try
|
|
2229
|
+
if role of elem is "${safeRole}" then
|
|
2230
|
+
set pos to position of elem
|
|
2231
|
+
set sz to size of elem
|
|
2232
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
2233
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
2234
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
2235
|
+
end if
|
|
2236
|
+
end try
|
|
2237
|
+
end repeat
|
|
2238
|
+
end try
|
|
2239
|
+
end repeat
|
|
2240
|
+
repeat with parent in topElems
|
|
2241
|
+
try
|
|
2242
|
+
repeat with child in UI elements of parent
|
|
2243
|
+
try
|
|
2244
|
+
set childRole to role of child
|
|
2245
|
+
if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
|
|
2246
|
+
repeat with gc in UI elements of child
|
|
2247
|
+
try
|
|
2248
|
+
if role of gc is "${safeRole}" then
|
|
2249
|
+
set pos to position of gc
|
|
2250
|
+
set sz to size of gc
|
|
2251
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
2252
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
2253
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
2254
|
+
end if
|
|
2255
|
+
end try
|
|
2256
|
+
end repeat
|
|
2257
|
+
end if
|
|
2258
|
+
end try
|
|
2259
|
+
end repeat
|
|
2260
|
+
end try
|
|
2261
|
+
end repeat
|
|
2262
|
+
end tell
|
|
2263
|
+
end tell
|
|
2264
|
+
return "NOT_FOUND"
|
|
2265
|
+
`;
|
|
2266
|
+
try {
|
|
2267
|
+
const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
|
|
2268
|
+
const result = stdout.trim();
|
|
2269
|
+
if (result === "NOT_FOUND" || !result.includes(",")) return null;
|
|
2270
|
+
return result;
|
|
2271
|
+
} catch {
|
|
2272
|
+
return null;
|
|
2273
|
+
}
|
|
2274
|
+
}
|
|
2173
2275
|
var DesktopTools = class {
|
|
2174
2276
|
register(server) {
|
|
2175
2277
|
server.tool(
|
|
@@ -2208,10 +2310,9 @@ var DesktopTools = class {
|
|
|
2208
2310
|
server.tool(
|
|
2209
2311
|
"desktop_screenshot",
|
|
2210
2312
|
[
|
|
2211
|
-
"Take a screenshot. Returns base64 image at logical resolution (
|
|
2212
|
-
"Use for visual context
|
|
2213
|
-
"
|
|
2214
|
-
"Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
|
|
2313
|
+
"Take a screenshot. Returns base64 image at logical resolution (1:1 with click coordinate system).",
|
|
2314
|
+
"Use for visual context, verify UI state, or locate elements when query-based methods fail.",
|
|
2315
|
+
"For clicking, prefer desktop_click(query, app) which auto-resolves coords. If that fails, use osascript or visual estimation from this image as fallback."
|
|
2215
2316
|
].join("\n"),
|
|
2216
2317
|
{
|
|
2217
2318
|
app: z5.string().optional().describe("Capture specific app window"),
|
|
@@ -2248,30 +2349,38 @@ var DesktopTools = class {
|
|
|
2248
2349
|
server.tool(
|
|
2249
2350
|
"desktop_click",
|
|
2250
2351
|
[
|
|
2251
|
-
"Click a UI element by text label (query), coordinates (coords), or element ID (on).",
|
|
2252
|
-
"BEST: Use query with app
|
|
2253
|
-
"
|
|
2254
|
-
"
|
|
2255
|
-
"CAUTION: 'on' (element ID) has known offset bug \u2014 use
|
|
2256
|
-
"If
|
|
2257
|
-
"NOTE: Web page elements (inside browser) are invisible
|
|
2352
|
+
"Click a UI element by text label (query), AX role (role), coordinates (coords), or element ID (on).",
|
|
2353
|
+
"BEST: Use query with app \u2014 auto-resolves coords via accessibility API (L1-L3 depth), then falls back to full tree search for deeply nested elements. No desktop_see needed.",
|
|
2354
|
+
"ROLE: Use role (e.g. 'AXTextArea', 'AXTextField') to find elements without visible text labels (input fields, text areas). Searches L1-L3 depth.",
|
|
2355
|
+
"GOOD: Use coords 'x,y' \u2014 best from osascript center calculation (position + size/2), or from screenshot if osascript is unavailable.",
|
|
2356
|
+
"CAUTION: 'on' (element ID) has known offset bug \u2014 use query, role, or coords instead.",
|
|
2357
|
+
"If not found: try desktop_menu for menu items, desktop_hotkey for shortcuts.",
|
|
2358
|
+
"NOTE: Web page elements (inside browser) are invisible \u2014 use browser_* tools instead."
|
|
2258
2359
|
].join("\n"),
|
|
2259
2360
|
{
|
|
2260
|
-
query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
|
|
2361
|
+
query: z5.string().optional().describe("Text/label to click (case-insensitive). Works for deeply nested elements."),
|
|
2362
|
+
role: z5.string().optional().describe("AX role to find (e.g. 'AXTextArea', 'AXTextField', 'AXButton'). Use when element has no text label."),
|
|
2261
2363
|
on: z5.string().optional().describe("Element ID from desktop_see (e.g. 'B1', 'T2')"),
|
|
2262
2364
|
coords: z5.string().optional().describe("Screen coordinates 'x,y' (e.g. '500,300')"),
|
|
2263
|
-
app: z5.string().optional().describe("App name"),
|
|
2365
|
+
app: z5.string().optional().describe("App name (always specify for faster resolution)"),
|
|
2264
2366
|
snapshot: z5.string().optional().describe("Snapshot ID from desktop_see"),
|
|
2265
2367
|
doubleClick: z5.boolean().optional().default(false).describe("Double-click"),
|
|
2266
2368
|
rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)"),
|
|
2267
2369
|
waitFor: z5.number().optional().describe("Max ms to wait for element to appear (default 5000)")
|
|
2268
2370
|
},
|
|
2269
|
-
async ({ query, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
|
|
2371
|
+
async ({ query, role, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
|
|
2270
2372
|
checkBlacklist(app);
|
|
2271
|
-
if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
|
|
2373
|
+
if (!query && !role && !on && !coords) throw new Error("Provide query, role, on, or coords.");
|
|
2272
2374
|
const args = ["click"];
|
|
2273
2375
|
if (coords) {
|
|
2274
2376
|
args.push("--coords", coords);
|
|
2377
|
+
} else if (role) {
|
|
2378
|
+
const resolved = await findFirstByRole(role, app);
|
|
2379
|
+
if (resolved) {
|
|
2380
|
+
args.push("--coords", resolved);
|
|
2381
|
+
} else {
|
|
2382
|
+
throw new Error(`No element with role '${role}' found in ${app ?? "frontmost app"}.`);
|
|
2383
|
+
}
|
|
2275
2384
|
} else if (query) {
|
|
2276
2385
|
const resolved = await resolveElementCoords(query, app);
|
|
2277
2386
|
if (resolved) {
|
|
@@ -2294,7 +2403,7 @@ var DesktopTools = class {
|
|
|
2294
2403
|
"desktop_type",
|
|
2295
2404
|
[
|
|
2296
2405
|
"Type text via keyboard. Supports \\n (return), \\t (tab) escape sequences.",
|
|
2297
|
-
"IMPORTANT: Focus the target field first
|
|
2406
|
+
"IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
|
|
2298
2407
|
"For Korean/Japanese/Chinese/emoji, use desktop_paste instead (keyboard sim is ASCII only).",
|
|
2299
2408
|
"Use clear=true to replace existing text (Cmd+A \u2192 Delete before typing)."
|
|
2300
2409
|
].join("\n"),
|
|
@@ -2319,7 +2428,7 @@ var DesktopTools = class {
|
|
|
2319
2428
|
"desktop_paste",
|
|
2320
2429
|
[
|
|
2321
2430
|
"Paste via clipboard (Cmd+V). Atomic: saves clipboard \u2192 sets content \u2192 pastes \u2192 restores.",
|
|
2322
|
-
"IMPORTANT: Focus the target field first (
|
|
2431
|
+
"IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
|
|
2323
2432
|
"Supports all Unicode (Korean, Japanese, Chinese, emoji). Use instead of desktop_type for non-ASCII.",
|
|
2324
2433
|
"Can also paste file contents via filePath."
|
|
2325
2434
|
].join("\n"),
|
|
@@ -2404,7 +2513,7 @@ var DesktopTools = class {
|
|
|
2404
2513
|
"desktop_move",
|
|
2405
2514
|
[
|
|
2406
2515
|
"Move mouse cursor without clicking. Use before scroll or to hover.",
|
|
2407
|
-
"
|
|
2516
|
+
"For coords, prefer osascript (position + size/2) or use 'to' with text label for auto-resolution."
|
|
2408
2517
|
].join("\n"),
|
|
2409
2518
|
{
|
|
2410
2519
|
coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
|
|
@@ -2441,7 +2550,7 @@ var DesktopTools = class {
|
|
|
2441
2550
|
[
|
|
2442
2551
|
"Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
|
|
2443
2552
|
"Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
|
|
2444
|
-
"
|
|
2553
|
+
"For coords, prefer osascript (position + size/2). If unavailable, screenshot-based estimation is acceptable."
|
|
2445
2554
|
].join("\n"),
|
|
2446
2555
|
{
|
|
2447
2556
|
from: z5.string().optional().describe("Source element ID from desktop_see"),
|
package/dist/server/mcp.js
CHANGED
|
@@ -86,9 +86,9 @@ var FilesystemTools = class {
|
|
|
86
86
|
"- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
|
|
87
87
|
"- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
|
|
88
88
|
"- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
|
|
89
|
-
"- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size
|
|
89
|
+
"- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size for click/move/drag.",
|
|
90
90
|
" Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
|
|
91
|
-
" This is the
|
|
91
|
+
" This is the most reliable way to get pixel-accurate coordinates on macOS.",
|
|
92
92
|
"",
|
|
93
93
|
"BEHAVIOR:",
|
|
94
94
|
"- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
|
|
@@ -1653,6 +1653,28 @@ tell application "System Events"
|
|
|
1653
1653
|
end repeat
|
|
1654
1654
|
end try
|
|
1655
1655
|
end repeat
|
|
1656
|
+
repeat with parent in topElems
|
|
1657
|
+
try
|
|
1658
|
+
repeat with child in UI elements of parent
|
|
1659
|
+
try
|
|
1660
|
+
set childRole to role of child
|
|
1661
|
+
if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
|
|
1662
|
+
repeat with gc in UI elements of child
|
|
1663
|
+
try
|
|
1664
|
+
if (name of gc contains "${safeQuery}") or (description of gc contains "${safeQuery}") then
|
|
1665
|
+
set pos to position of gc
|
|
1666
|
+
set sz to size of gc
|
|
1667
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
1668
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
1669
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
1670
|
+
end if
|
|
1671
|
+
end try
|
|
1672
|
+
end repeat
|
|
1673
|
+
end if
|
|
1674
|
+
end try
|
|
1675
|
+
end repeat
|
|
1676
|
+
end try
|
|
1677
|
+
end repeat
|
|
1656
1678
|
end tell
|
|
1657
1679
|
end tell
|
|
1658
1680
|
return "NOT_FOUND"
|
|
@@ -1686,6 +1708,86 @@ async function resolveElementCoords(query, app) {
|
|
|
1686
1708
|
}
|
|
1687
1709
|
return null;
|
|
1688
1710
|
}
|
|
1711
|
+
async function findFirstByRole(role, app) {
|
|
1712
|
+
let targetApp = app;
|
|
1713
|
+
if (!targetApp) {
|
|
1714
|
+
try {
|
|
1715
|
+
const { stdout } = await execa("osascript", [
|
|
1716
|
+
"-e",
|
|
1717
|
+
'tell application "System Events" to get name of first application process whose frontmost is true'
|
|
1718
|
+
]);
|
|
1719
|
+
targetApp = stdout.trim();
|
|
1720
|
+
} catch {
|
|
1721
|
+
return null;
|
|
1722
|
+
}
|
|
1723
|
+
}
|
|
1724
|
+
const safeApp = targetApp.replace(/[\\"]/g, "\\$&");
|
|
1725
|
+
const safeRole = role.replace(/[\\"]/g, "\\$&");
|
|
1726
|
+
const script = `
|
|
1727
|
+
tell application "System Events"
|
|
1728
|
+
tell process "${safeApp}"
|
|
1729
|
+
set topElems to UI elements
|
|
1730
|
+
repeat with elem in topElems
|
|
1731
|
+
try
|
|
1732
|
+
if role of elem is "${safeRole}" then
|
|
1733
|
+
set pos to position of elem
|
|
1734
|
+
set sz to size of elem
|
|
1735
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
1736
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
1737
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
1738
|
+
end if
|
|
1739
|
+
end try
|
|
1740
|
+
end repeat
|
|
1741
|
+
repeat with parent in topElems
|
|
1742
|
+
try
|
|
1743
|
+
repeat with elem in UI elements of parent
|
|
1744
|
+
try
|
|
1745
|
+
if role of elem is "${safeRole}" then
|
|
1746
|
+
set pos to position of elem
|
|
1747
|
+
set sz to size of elem
|
|
1748
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
1749
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
1750
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
1751
|
+
end if
|
|
1752
|
+
end try
|
|
1753
|
+
end repeat
|
|
1754
|
+
end try
|
|
1755
|
+
end repeat
|
|
1756
|
+
repeat with parent in topElems
|
|
1757
|
+
try
|
|
1758
|
+
repeat with child in UI elements of parent
|
|
1759
|
+
try
|
|
1760
|
+
set childRole to role of child
|
|
1761
|
+
if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
|
|
1762
|
+
repeat with gc in UI elements of child
|
|
1763
|
+
try
|
|
1764
|
+
if role of gc is "${safeRole}" then
|
|
1765
|
+
set pos to position of gc
|
|
1766
|
+
set sz to size of gc
|
|
1767
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
1768
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
1769
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
1770
|
+
end if
|
|
1771
|
+
end try
|
|
1772
|
+
end repeat
|
|
1773
|
+
end if
|
|
1774
|
+
end try
|
|
1775
|
+
end repeat
|
|
1776
|
+
end try
|
|
1777
|
+
end repeat
|
|
1778
|
+
end tell
|
|
1779
|
+
end tell
|
|
1780
|
+
return "NOT_FOUND"
|
|
1781
|
+
`;
|
|
1782
|
+
try {
|
|
1783
|
+
const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
|
|
1784
|
+
const result = stdout.trim();
|
|
1785
|
+
if (result === "NOT_FOUND" || !result.includes(",")) return null;
|
|
1786
|
+
return result;
|
|
1787
|
+
} catch {
|
|
1788
|
+
return null;
|
|
1789
|
+
}
|
|
1790
|
+
}
|
|
1689
1791
|
var DesktopTools = class {
|
|
1690
1792
|
register(server) {
|
|
1691
1793
|
server.tool(
|
|
@@ -1724,10 +1826,9 @@ var DesktopTools = class {
|
|
|
1724
1826
|
server.tool(
|
|
1725
1827
|
"desktop_screenshot",
|
|
1726
1828
|
[
|
|
1727
|
-
"Take a screenshot. Returns base64 image at logical resolution (
|
|
1728
|
-
"Use for visual context
|
|
1729
|
-
"
|
|
1730
|
-
"Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
|
|
1829
|
+
"Take a screenshot. Returns base64 image at logical resolution (1:1 with click coordinate system).",
|
|
1830
|
+
"Use for visual context, verify UI state, or locate elements when query-based methods fail.",
|
|
1831
|
+
"For clicking, prefer desktop_click(query, app) which auto-resolves coords. If that fails, use osascript or visual estimation from this image as fallback."
|
|
1731
1832
|
].join("\n"),
|
|
1732
1833
|
{
|
|
1733
1834
|
app: z5.string().optional().describe("Capture specific app window"),
|
|
@@ -1764,30 +1865,38 @@ var DesktopTools = class {
|
|
|
1764
1865
|
server.tool(
|
|
1765
1866
|
"desktop_click",
|
|
1766
1867
|
[
|
|
1767
|
-
"Click a UI element by text label (query), coordinates (coords), or element ID (on).",
|
|
1768
|
-
"BEST: Use query with app
|
|
1769
|
-
"
|
|
1770
|
-
"
|
|
1771
|
-
"CAUTION: 'on' (element ID) has known offset bug \u2014 use
|
|
1772
|
-
"If
|
|
1773
|
-
"NOTE: Web page elements (inside browser) are invisible
|
|
1868
|
+
"Click a UI element by text label (query), AX role (role), coordinates (coords), or element ID (on).",
|
|
1869
|
+
"BEST: Use query with app \u2014 auto-resolves coords via accessibility API (L1-L3 depth), then falls back to full tree search for deeply nested elements. No desktop_see needed.",
|
|
1870
|
+
"ROLE: Use role (e.g. 'AXTextArea', 'AXTextField') to find elements without visible text labels (input fields, text areas). Searches L1-L3 depth.",
|
|
1871
|
+
"GOOD: Use coords 'x,y' \u2014 best from osascript center calculation (position + size/2), or from screenshot if osascript is unavailable.",
|
|
1872
|
+
"CAUTION: 'on' (element ID) has known offset bug \u2014 use query, role, or coords instead.",
|
|
1873
|
+
"If not found: try desktop_menu for menu items, desktop_hotkey for shortcuts.",
|
|
1874
|
+
"NOTE: Web page elements (inside browser) are invisible \u2014 use browser_* tools instead."
|
|
1774
1875
|
].join("\n"),
|
|
1775
1876
|
{
|
|
1776
|
-
query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
|
|
1877
|
+
query: z5.string().optional().describe("Text/label to click (case-insensitive). Works for deeply nested elements."),
|
|
1878
|
+
role: z5.string().optional().describe("AX role to find (e.g. 'AXTextArea', 'AXTextField', 'AXButton'). Use when element has no text label."),
|
|
1777
1879
|
on: z5.string().optional().describe("Element ID from desktop_see (e.g. 'B1', 'T2')"),
|
|
1778
1880
|
coords: z5.string().optional().describe("Screen coordinates 'x,y' (e.g. '500,300')"),
|
|
1779
|
-
app: z5.string().optional().describe("App name"),
|
|
1881
|
+
app: z5.string().optional().describe("App name (always specify for faster resolution)"),
|
|
1780
1882
|
snapshot: z5.string().optional().describe("Snapshot ID from desktop_see"),
|
|
1781
1883
|
doubleClick: z5.boolean().optional().default(false).describe("Double-click"),
|
|
1782
1884
|
rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)"),
|
|
1783
1885
|
waitFor: z5.number().optional().describe("Max ms to wait for element to appear (default 5000)")
|
|
1784
1886
|
},
|
|
1785
|
-
async ({ query, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
|
|
1887
|
+
async ({ query, role, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
|
|
1786
1888
|
checkBlacklist(app);
|
|
1787
|
-
if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
|
|
1889
|
+
if (!query && !role && !on && !coords) throw new Error("Provide query, role, on, or coords.");
|
|
1788
1890
|
const args = ["click"];
|
|
1789
1891
|
if (coords) {
|
|
1790
1892
|
args.push("--coords", coords);
|
|
1893
|
+
} else if (role) {
|
|
1894
|
+
const resolved = await findFirstByRole(role, app);
|
|
1895
|
+
if (resolved) {
|
|
1896
|
+
args.push("--coords", resolved);
|
|
1897
|
+
} else {
|
|
1898
|
+
throw new Error(`No element with role '${role}' found in ${app ?? "frontmost app"}.`);
|
|
1899
|
+
}
|
|
1791
1900
|
} else if (query) {
|
|
1792
1901
|
const resolved = await resolveElementCoords(query, app);
|
|
1793
1902
|
if (resolved) {
|
|
@@ -1810,7 +1919,7 @@ var DesktopTools = class {
|
|
|
1810
1919
|
"desktop_type",
|
|
1811
1920
|
[
|
|
1812
1921
|
"Type text via keyboard. Supports \\n (return), \\t (tab) escape sequences.",
|
|
1813
|
-
"IMPORTANT: Focus the target field first
|
|
1922
|
+
"IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
|
|
1814
1923
|
"For Korean/Japanese/Chinese/emoji, use desktop_paste instead (keyboard sim is ASCII only).",
|
|
1815
1924
|
"Use clear=true to replace existing text (Cmd+A \u2192 Delete before typing)."
|
|
1816
1925
|
].join("\n"),
|
|
@@ -1835,7 +1944,7 @@ var DesktopTools = class {
|
|
|
1835
1944
|
"desktop_paste",
|
|
1836
1945
|
[
|
|
1837
1946
|
"Paste via clipboard (Cmd+V). Atomic: saves clipboard \u2192 sets content \u2192 pastes \u2192 restores.",
|
|
1838
|
-
"IMPORTANT: Focus the target field first (
|
|
1947
|
+
"IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
|
|
1839
1948
|
"Supports all Unicode (Korean, Japanese, Chinese, emoji). Use instead of desktop_type for non-ASCII.",
|
|
1840
1949
|
"Can also paste file contents via filePath."
|
|
1841
1950
|
].join("\n"),
|
|
@@ -1920,7 +2029,7 @@ var DesktopTools = class {
|
|
|
1920
2029
|
"desktop_move",
|
|
1921
2030
|
[
|
|
1922
2031
|
"Move mouse cursor without clicking. Use before scroll or to hover.",
|
|
1923
|
-
"
|
|
2032
|
+
"For coords, prefer osascript (position + size/2) or use 'to' with text label for auto-resolution."
|
|
1924
2033
|
].join("\n"),
|
|
1925
2034
|
{
|
|
1926
2035
|
coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
|
|
@@ -1957,7 +2066,7 @@ var DesktopTools = class {
|
|
|
1957
2066
|
[
|
|
1958
2067
|
"Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
|
|
1959
2068
|
"Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
|
|
1960
|
-
"
|
|
2069
|
+
"For coords, prefer osascript (position + size/2). If unavailable, screenshot-based estimation is acceptable."
|
|
1961
2070
|
].join("\n"),
|
|
1962
2071
|
{
|
|
1963
2072
|
from: z5.string().optional().describe("Source element ID from desktop_see"),
|
package/dist/server/stdio.js
CHANGED
|
@@ -87,9 +87,9 @@ var FilesystemTools = class {
|
|
|
87
87
|
"- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
|
|
88
88
|
"- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
|
|
89
89
|
"- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
|
|
90
|
-
"- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size
|
|
90
|
+
"- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size for click/move/drag.",
|
|
91
91
|
" Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
|
|
92
|
-
" This is the
|
|
92
|
+
" This is the most reliable way to get pixel-accurate coordinates on macOS.",
|
|
93
93
|
"",
|
|
94
94
|
"BEHAVIOR:",
|
|
95
95
|
"- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
|