junis 0.3.17 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +150 -20
- package/dist/server/mcp.js +150 -20
- package/dist/server/stdio.js +3 -0
- package/package.json +1 -1
package/dist/cli/index.js
CHANGED
|
@@ -570,6 +570,9 @@ var FilesystemTools = class {
|
|
|
570
570
|
"- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
|
|
571
571
|
"- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
|
|
572
572
|
"- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
|
|
573
|
+
"- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
|
|
574
|
+
" Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
|
|
575
|
+
" This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
|
|
573
576
|
"",
|
|
574
577
|
"BEHAVIOR:",
|
|
575
578
|
"- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
|
|
@@ -2101,27 +2104,116 @@ function checkBlacklist(app) {
|
|
|
2101
2104
|
function json(data) {
|
|
2102
2105
|
return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
|
|
2103
2106
|
}
|
|
2107
|
+
async function searchProcessElements(processName, query) {
|
|
2108
|
+
const safeApp = processName.replace(/[\\"]/g, "\\$&");
|
|
2109
|
+
const safeQuery = query.replace(/[\\"]/g, "\\$&");
|
|
2110
|
+
const script = `
|
|
2111
|
+
tell application "System Events"
|
|
2112
|
+
tell process "${safeApp}"
|
|
2113
|
+
set topElems to UI elements
|
|
2114
|
+
repeat with elem in topElems
|
|
2115
|
+
try
|
|
2116
|
+
if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
|
|
2117
|
+
set pos to position of elem
|
|
2118
|
+
set sz to size of elem
|
|
2119
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
2120
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
2121
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
2122
|
+
end if
|
|
2123
|
+
end try
|
|
2124
|
+
end repeat
|
|
2125
|
+
repeat with parent in topElems
|
|
2126
|
+
try
|
|
2127
|
+
repeat with elem in UI elements of parent
|
|
2128
|
+
try
|
|
2129
|
+
if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
|
|
2130
|
+
set pos to position of elem
|
|
2131
|
+
set sz to size of elem
|
|
2132
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
2133
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
2134
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
2135
|
+
end if
|
|
2136
|
+
end try
|
|
2137
|
+
end repeat
|
|
2138
|
+
end try
|
|
2139
|
+
end repeat
|
|
2140
|
+
repeat with parent in topElems
|
|
2141
|
+
try
|
|
2142
|
+
repeat with child in UI elements of parent
|
|
2143
|
+
try
|
|
2144
|
+
set childRole to role of child
|
|
2145
|
+
if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
|
|
2146
|
+
repeat with gc in UI elements of child
|
|
2147
|
+
try
|
|
2148
|
+
if (name of gc contains "${safeQuery}") or (description of gc contains "${safeQuery}") then
|
|
2149
|
+
set pos to position of gc
|
|
2150
|
+
set sz to size of gc
|
|
2151
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
2152
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
2153
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
2154
|
+
end if
|
|
2155
|
+
end try
|
|
2156
|
+
end repeat
|
|
2157
|
+
end if
|
|
2158
|
+
end try
|
|
2159
|
+
end repeat
|
|
2160
|
+
end try
|
|
2161
|
+
end repeat
|
|
2162
|
+
end tell
|
|
2163
|
+
end tell
|
|
2164
|
+
return "NOT_FOUND"
|
|
2165
|
+
`;
|
|
2166
|
+
try {
|
|
2167
|
+
const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
|
|
2168
|
+
const result = stdout.trim();
|
|
2169
|
+
if (result === "NOT_FOUND" || !result.includes(",")) return null;
|
|
2170
|
+
return result;
|
|
2171
|
+
} catch {
|
|
2172
|
+
return null;
|
|
2173
|
+
}
|
|
2174
|
+
}
|
|
2104
2175
|
async function resolveElementCoords(query, app) {
|
|
2105
|
-
|
|
2176
|
+
let targetApp = app;
|
|
2177
|
+
if (!targetApp) {
|
|
2106
2178
|
try {
|
|
2107
2179
|
const { stdout } = await execa("osascript", [
|
|
2108
2180
|
"-e",
|
|
2109
2181
|
'tell application "System Events" to get name of first application process whose frontmost is true'
|
|
2110
2182
|
]);
|
|
2111
|
-
|
|
2183
|
+
targetApp = stdout.trim();
|
|
2112
2184
|
} catch {
|
|
2113
2185
|
return null;
|
|
2114
2186
|
}
|
|
2115
2187
|
}
|
|
2116
|
-
const
|
|
2117
|
-
|
|
2188
|
+
const result = await searchProcessElements(targetApp, query);
|
|
2189
|
+
if (result) return result;
|
|
2190
|
+
if (targetApp !== "Dock") {
|
|
2191
|
+
return await searchProcessElements("Dock", query);
|
|
2192
|
+
}
|
|
2193
|
+
return null;
|
|
2194
|
+
}
|
|
2195
|
+
async function findFirstByRole(role, app) {
|
|
2196
|
+
let targetApp = app;
|
|
2197
|
+
if (!targetApp) {
|
|
2198
|
+
try {
|
|
2199
|
+
const { stdout } = await execa("osascript", [
|
|
2200
|
+
"-e",
|
|
2201
|
+
'tell application "System Events" to get name of first application process whose frontmost is true'
|
|
2202
|
+
]);
|
|
2203
|
+
targetApp = stdout.trim();
|
|
2204
|
+
} catch {
|
|
2205
|
+
return null;
|
|
2206
|
+
}
|
|
2207
|
+
}
|
|
2208
|
+
const safeApp = targetApp.replace(/[\\"]/g, "\\$&");
|
|
2209
|
+
const safeRole = role.replace(/[\\"]/g, "\\$&");
|
|
2118
2210
|
const script = `
|
|
2119
2211
|
tell application "System Events"
|
|
2120
2212
|
tell process "${safeApp}"
|
|
2121
2213
|
set topElems to UI elements
|
|
2122
2214
|
repeat with elem in topElems
|
|
2123
2215
|
try
|
|
2124
|
-
if
|
|
2216
|
+
if role of elem is "${safeRole}" then
|
|
2125
2217
|
set pos to position of elem
|
|
2126
2218
|
set sz to size of elem
|
|
2127
2219
|
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
@@ -2134,7 +2226,7 @@ tell application "System Events"
|
|
|
2134
2226
|
try
|
|
2135
2227
|
repeat with elem in UI elements of parent
|
|
2136
2228
|
try
|
|
2137
|
-
if
|
|
2229
|
+
if role of elem is "${safeRole}" then
|
|
2138
2230
|
set pos to position of elem
|
|
2139
2231
|
set sz to size of elem
|
|
2140
2232
|
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
@@ -2145,6 +2237,28 @@ tell application "System Events"
|
|
|
2145
2237
|
end repeat
|
|
2146
2238
|
end try
|
|
2147
2239
|
end repeat
|
|
2240
|
+
repeat with parent in topElems
|
|
2241
|
+
try
|
|
2242
|
+
repeat with child in UI elements of parent
|
|
2243
|
+
try
|
|
2244
|
+
set childRole to role of child
|
|
2245
|
+
if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
|
|
2246
|
+
repeat with gc in UI elements of child
|
|
2247
|
+
try
|
|
2248
|
+
if role of gc is "${safeRole}" then
|
|
2249
|
+
set pos to position of gc
|
|
2250
|
+
set sz to size of gc
|
|
2251
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
2252
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
2253
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
2254
|
+
end if
|
|
2255
|
+
end try
|
|
2256
|
+
end repeat
|
|
2257
|
+
end if
|
|
2258
|
+
end try
|
|
2259
|
+
end repeat
|
|
2260
|
+
end try
|
|
2261
|
+
end repeat
|
|
2148
2262
|
end tell
|
|
2149
2263
|
end tell
|
|
2150
2264
|
return "NOT_FOUND"
|
|
@@ -2197,8 +2311,9 @@ var DesktopTools = class {
|
|
|
2197
2311
|
"desktop_screenshot",
|
|
2198
2312
|
[
|
|
2199
2313
|
"Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
|
|
2200
|
-
"Use for visual context or to verify UI state
|
|
2201
|
-
"
|
|
2314
|
+
"Use for visual context or to verify UI state ONLY.",
|
|
2315
|
+
"WARNING: NEVER estimate click coordinates from screenshot images \u2014 visual estimation causes misclicks. Use desktop_click(query, app) for auto-resolution, or execute_command + osascript for exact coords.",
|
|
2316
|
+
"Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
|
|
2202
2317
|
].join("\n"),
|
|
2203
2318
|
{
|
|
2204
2319
|
app: z5.string().optional().describe("Capture specific app window"),
|
|
@@ -2235,15 +2350,17 @@ var DesktopTools = class {
|
|
|
2235
2350
|
server.tool(
|
|
2236
2351
|
"desktop_click",
|
|
2237
2352
|
[
|
|
2238
|
-
"Click a UI element by text label (query), coordinates (coords), or element ID (on).",
|
|
2239
|
-
"BEST: Use query with app
|
|
2240
|
-
"
|
|
2241
|
-
"
|
|
2242
|
-
"
|
|
2243
|
-
"
|
|
2353
|
+
"Click a UI element by text label (query), AX role (role), coordinates (coords), or element ID (on).",
|
|
2354
|
+
"BEST: Use query with app \u2014 auto-resolves coords via accessibility API (L1-L3 depth), then falls back to full tree search for deeply nested elements. No desktop_see needed.",
|
|
2355
|
+
"ROLE: Use role (e.g. 'AXTextArea', 'AXTextField') to find elements without visible text labels (input fields, text areas). Searches L1-L3 depth.",
|
|
2356
|
+
"GOOD: Use coords 'x,y' \u2014 MUST be from osascript center calculation (position + size/2). NEVER estimate from screenshots.",
|
|
2357
|
+
"CAUTION: 'on' (element ID) has known offset bug \u2014 use query, role, or coords instead.",
|
|
2358
|
+
"If not found: try desktop_menu for menu items, desktop_hotkey for shortcuts.",
|
|
2359
|
+
"NOTE: Web page elements (inside browser) are invisible \u2014 use browser_* tools instead."
|
|
2244
2360
|
].join("\n"),
|
|
2245
2361
|
{
|
|
2246
|
-
query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
|
|
2362
|
+
query: z5.string().optional().describe("Text/label to click (case-insensitive). Works for deeply nested elements."),
|
|
2363
|
+
role: z5.string().optional().describe("AX role to find (e.g. 'AXTextArea', 'AXTextField', 'AXButton'). Use when element has no text label."),
|
|
2247
2364
|
on: z5.string().optional().describe("Element ID from desktop_see (e.g. 'B1', 'T2')"),
|
|
2248
2365
|
coords: z5.string().optional().describe("Screen coordinates 'x,y' (e.g. '500,300')"),
|
|
2249
2366
|
app: z5.string().optional().describe("App name"),
|
|
@@ -2252,12 +2369,19 @@ var DesktopTools = class {
|
|
|
2252
2369
|
rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)"),
|
|
2253
2370
|
waitFor: z5.number().optional().describe("Max ms to wait for element to appear (default 5000)")
|
|
2254
2371
|
},
|
|
2255
|
-
async ({ query, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
|
|
2372
|
+
async ({ query, role, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
|
|
2256
2373
|
checkBlacklist(app);
|
|
2257
|
-
if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
|
|
2374
|
+
if (!query && !role && !on && !coords) throw new Error("Provide query, role, on, or coords.");
|
|
2258
2375
|
const args = ["click"];
|
|
2259
2376
|
if (coords) {
|
|
2260
2377
|
args.push("--coords", coords);
|
|
2378
|
+
} else if (role) {
|
|
2379
|
+
const resolved = await findFirstByRole(role, app);
|
|
2380
|
+
if (resolved) {
|
|
2381
|
+
args.push("--coords", resolved);
|
|
2382
|
+
} else {
|
|
2383
|
+
throw new Error(`No element with role '${role}' found in ${app ?? "frontmost app"}.`);
|
|
2384
|
+
}
|
|
2261
2385
|
} else if (query) {
|
|
2262
2386
|
const resolved = await resolveElementCoords(query, app);
|
|
2263
2387
|
if (resolved) {
|
|
@@ -2280,7 +2404,7 @@ var DesktopTools = class {
|
|
|
2280
2404
|
"desktop_type",
|
|
2281
2405
|
[
|
|
2282
2406
|
"Type text via keyboard. Supports \\n (return), \\t (tab) escape sequences.",
|
|
2283
|
-
"IMPORTANT: Focus the target field first
|
|
2407
|
+
"IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
|
|
2284
2408
|
"For Korean/Japanese/Chinese/emoji, use desktop_paste instead (keyboard sim is ASCII only).",
|
|
2285
2409
|
"Use clear=true to replace existing text (Cmd+A \u2192 Delete before typing)."
|
|
2286
2410
|
].join("\n"),
|
|
@@ -2305,6 +2429,7 @@ var DesktopTools = class {
|
|
|
2305
2429
|
"desktop_paste",
|
|
2306
2430
|
[
|
|
2307
2431
|
"Paste via clipboard (Cmd+V). Atomic: saves clipboard \u2192 sets content \u2192 pastes \u2192 restores.",
|
|
2432
|
+
"IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
|
|
2308
2433
|
"Supports all Unicode (Korean, Japanese, Chinese, emoji). Use instead of desktop_type for non-ASCII.",
|
|
2309
2434
|
"Can also paste file contents via filePath."
|
|
2310
2435
|
].join("\n"),
|
|
@@ -2387,7 +2512,10 @@ var DesktopTools = class {
|
|
|
2387
2512
|
);
|
|
2388
2513
|
server.tool(
|
|
2389
2514
|
"desktop_move",
|
|
2390
|
-
|
|
2515
|
+
[
|
|
2516
|
+
"Move mouse cursor without clicking. Use before scroll or to hover.",
|
|
2517
|
+
"Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
|
|
2518
|
+
].join("\n"),
|
|
2391
2519
|
{
|
|
2392
2520
|
coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
|
|
2393
2521
|
to: z5.string().optional().describe("Element text/label to move to"),
|
|
@@ -2422,7 +2550,8 @@ var DesktopTools = class {
|
|
|
2422
2550
|
"desktop_drag",
|
|
2423
2551
|
[
|
|
2424
2552
|
"Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
|
|
2425
|
-
"
|
|
2553
|
+
"Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
|
|
2554
|
+
"Before using fromCoords/toCoords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
|
|
2426
2555
|
].join("\n"),
|
|
2427
2556
|
{
|
|
2428
2557
|
from: z5.string().optional().describe("Source element ID from desktop_see"),
|
|
@@ -2619,6 +2748,7 @@ var DesktopTools = class {
|
|
|
2619
2748
|
app: z5.string().optional().describe("App to open with")
|
|
2620
2749
|
},
|
|
2621
2750
|
async ({ url, app }) => {
|
|
2751
|
+
checkBlacklist(app);
|
|
2622
2752
|
const args = ["open", url];
|
|
2623
2753
|
if (app) args.push("--app", app);
|
|
2624
2754
|
return json(await peekaboo(args));
|
package/dist/server/mcp.js
CHANGED
|
@@ -86,6 +86,9 @@ var FilesystemTools = class {
|
|
|
86
86
|
"- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
|
|
87
87
|
"- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
|
|
88
88
|
"- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
|
|
89
|
+
"- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
|
|
90
|
+
" Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
|
|
91
|
+
" This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
|
|
89
92
|
"",
|
|
90
93
|
"BEHAVIOR:",
|
|
91
94
|
"- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
|
|
@@ -1617,27 +1620,116 @@ function checkBlacklist(app) {
|
|
|
1617
1620
|
function json(data) {
|
|
1618
1621
|
return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
|
|
1619
1622
|
}
|
|
1623
|
+
async function searchProcessElements(processName, query) {
|
|
1624
|
+
const safeApp = processName.replace(/[\\"]/g, "\\$&");
|
|
1625
|
+
const safeQuery = query.replace(/[\\"]/g, "\\$&");
|
|
1626
|
+
const script = `
|
|
1627
|
+
tell application "System Events"
|
|
1628
|
+
tell process "${safeApp}"
|
|
1629
|
+
set topElems to UI elements
|
|
1630
|
+
repeat with elem in topElems
|
|
1631
|
+
try
|
|
1632
|
+
if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
|
|
1633
|
+
set pos to position of elem
|
|
1634
|
+
set sz to size of elem
|
|
1635
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
1636
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
1637
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
1638
|
+
end if
|
|
1639
|
+
end try
|
|
1640
|
+
end repeat
|
|
1641
|
+
repeat with parent in topElems
|
|
1642
|
+
try
|
|
1643
|
+
repeat with elem in UI elements of parent
|
|
1644
|
+
try
|
|
1645
|
+
if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
|
|
1646
|
+
set pos to position of elem
|
|
1647
|
+
set sz to size of elem
|
|
1648
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
1649
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
1650
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
1651
|
+
end if
|
|
1652
|
+
end try
|
|
1653
|
+
end repeat
|
|
1654
|
+
end try
|
|
1655
|
+
end repeat
|
|
1656
|
+
repeat with parent in topElems
|
|
1657
|
+
try
|
|
1658
|
+
repeat with child in UI elements of parent
|
|
1659
|
+
try
|
|
1660
|
+
set childRole to role of child
|
|
1661
|
+
if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
|
|
1662
|
+
repeat with gc in UI elements of child
|
|
1663
|
+
try
|
|
1664
|
+
if (name of gc contains "${safeQuery}") or (description of gc contains "${safeQuery}") then
|
|
1665
|
+
set pos to position of gc
|
|
1666
|
+
set sz to size of gc
|
|
1667
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
1668
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
1669
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
1670
|
+
end if
|
|
1671
|
+
end try
|
|
1672
|
+
end repeat
|
|
1673
|
+
end if
|
|
1674
|
+
end try
|
|
1675
|
+
end repeat
|
|
1676
|
+
end try
|
|
1677
|
+
end repeat
|
|
1678
|
+
end tell
|
|
1679
|
+
end tell
|
|
1680
|
+
return "NOT_FOUND"
|
|
1681
|
+
`;
|
|
1682
|
+
try {
|
|
1683
|
+
const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
|
|
1684
|
+
const result = stdout.trim();
|
|
1685
|
+
if (result === "NOT_FOUND" || !result.includes(",")) return null;
|
|
1686
|
+
return result;
|
|
1687
|
+
} catch {
|
|
1688
|
+
return null;
|
|
1689
|
+
}
|
|
1690
|
+
}
|
|
1620
1691
|
async function resolveElementCoords(query, app) {
|
|
1621
|
-
|
|
1692
|
+
let targetApp = app;
|
|
1693
|
+
if (!targetApp) {
|
|
1622
1694
|
try {
|
|
1623
1695
|
const { stdout } = await execa("osascript", [
|
|
1624
1696
|
"-e",
|
|
1625
1697
|
'tell application "System Events" to get name of first application process whose frontmost is true'
|
|
1626
1698
|
]);
|
|
1627
|
-
|
|
1699
|
+
targetApp = stdout.trim();
|
|
1628
1700
|
} catch {
|
|
1629
1701
|
return null;
|
|
1630
1702
|
}
|
|
1631
1703
|
}
|
|
1632
|
-
const
|
|
1633
|
-
|
|
1704
|
+
const result = await searchProcessElements(targetApp, query);
|
|
1705
|
+
if (result) return result;
|
|
1706
|
+
if (targetApp !== "Dock") {
|
|
1707
|
+
return await searchProcessElements("Dock", query);
|
|
1708
|
+
}
|
|
1709
|
+
return null;
|
|
1710
|
+
}
|
|
1711
|
+
async function findFirstByRole(role, app) {
|
|
1712
|
+
let targetApp = app;
|
|
1713
|
+
if (!targetApp) {
|
|
1714
|
+
try {
|
|
1715
|
+
const { stdout } = await execa("osascript", [
|
|
1716
|
+
"-e",
|
|
1717
|
+
'tell application "System Events" to get name of first application process whose frontmost is true'
|
|
1718
|
+
]);
|
|
1719
|
+
targetApp = stdout.trim();
|
|
1720
|
+
} catch {
|
|
1721
|
+
return null;
|
|
1722
|
+
}
|
|
1723
|
+
}
|
|
1724
|
+
const safeApp = targetApp.replace(/[\\"]/g, "\\$&");
|
|
1725
|
+
const safeRole = role.replace(/[\\"]/g, "\\$&");
|
|
1634
1726
|
const script = `
|
|
1635
1727
|
tell application "System Events"
|
|
1636
1728
|
tell process "${safeApp}"
|
|
1637
1729
|
set topElems to UI elements
|
|
1638
1730
|
repeat with elem in topElems
|
|
1639
1731
|
try
|
|
1640
|
-
if
|
|
1732
|
+
if role of elem is "${safeRole}" then
|
|
1641
1733
|
set pos to position of elem
|
|
1642
1734
|
set sz to size of elem
|
|
1643
1735
|
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
@@ -1650,7 +1742,7 @@ tell application "System Events"
|
|
|
1650
1742
|
try
|
|
1651
1743
|
repeat with elem in UI elements of parent
|
|
1652
1744
|
try
|
|
1653
|
-
if
|
|
1745
|
+
if role of elem is "${safeRole}" then
|
|
1654
1746
|
set pos to position of elem
|
|
1655
1747
|
set sz to size of elem
|
|
1656
1748
|
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
@@ -1661,6 +1753,28 @@ tell application "System Events"
|
|
|
1661
1753
|
end repeat
|
|
1662
1754
|
end try
|
|
1663
1755
|
end repeat
|
|
1756
|
+
repeat with parent in topElems
|
|
1757
|
+
try
|
|
1758
|
+
repeat with child in UI elements of parent
|
|
1759
|
+
try
|
|
1760
|
+
set childRole to role of child
|
|
1761
|
+
if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
|
|
1762
|
+
repeat with gc in UI elements of child
|
|
1763
|
+
try
|
|
1764
|
+
if role of gc is "${safeRole}" then
|
|
1765
|
+
set pos to position of gc
|
|
1766
|
+
set sz to size of gc
|
|
1767
|
+
set cx to (item 1 of pos) + (item 1 of sz) / 2
|
|
1768
|
+
set cy to (item 2 of pos) + (item 2 of sz) / 2
|
|
1769
|
+
return ((cx as integer) as text) & "," & ((cy as integer) as text)
|
|
1770
|
+
end if
|
|
1771
|
+
end try
|
|
1772
|
+
end repeat
|
|
1773
|
+
end if
|
|
1774
|
+
end try
|
|
1775
|
+
end repeat
|
|
1776
|
+
end try
|
|
1777
|
+
end repeat
|
|
1664
1778
|
end tell
|
|
1665
1779
|
end tell
|
|
1666
1780
|
return "NOT_FOUND"
|
|
@@ -1713,8 +1827,9 @@ var DesktopTools = class {
|
|
|
1713
1827
|
"desktop_screenshot",
|
|
1714
1828
|
[
|
|
1715
1829
|
"Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
|
|
1716
|
-
"Use for visual context or to verify UI state
|
|
1717
|
-
"
|
|
1830
|
+
"Use for visual context or to verify UI state ONLY.",
|
|
1831
|
+
"WARNING: NEVER estimate click coordinates from screenshot images \u2014 visual estimation causes misclicks. Use desktop_click(query, app) for auto-resolution, or execute_command + osascript for exact coords.",
|
|
1832
|
+
"Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
|
|
1718
1833
|
].join("\n"),
|
|
1719
1834
|
{
|
|
1720
1835
|
app: z5.string().optional().describe("Capture specific app window"),
|
|
@@ -1751,15 +1866,17 @@ var DesktopTools = class {
|
|
|
1751
1866
|
server.tool(
|
|
1752
1867
|
"desktop_click",
|
|
1753
1868
|
[
|
|
1754
|
-
"Click a UI element by text label (query), coordinates (coords), or element ID (on).",
|
|
1755
|
-
"BEST: Use query with app
|
|
1756
|
-
"
|
|
1757
|
-
"
|
|
1758
|
-
"
|
|
1759
|
-
"
|
|
1869
|
+
"Click a UI element by text label (query), AX role (role), coordinates (coords), or element ID (on).",
|
|
1870
|
+
"BEST: Use query with app \u2014 auto-resolves coords via accessibility API (L1-L3 depth), then falls back to full tree search for deeply nested elements. No desktop_see needed.",
|
|
1871
|
+
"ROLE: Use role (e.g. 'AXTextArea', 'AXTextField') to find elements without visible text labels (input fields, text areas). Searches L1-L3 depth.",
|
|
1872
|
+
"GOOD: Use coords 'x,y' \u2014 MUST be from osascript center calculation (position + size/2). NEVER estimate from screenshots.",
|
|
1873
|
+
"CAUTION: 'on' (element ID) has known offset bug \u2014 use query, role, or coords instead.",
|
|
1874
|
+
"If not found: try desktop_menu for menu items, desktop_hotkey for shortcuts.",
|
|
1875
|
+
"NOTE: Web page elements (inside browser) are invisible \u2014 use browser_* tools instead."
|
|
1760
1876
|
].join("\n"),
|
|
1761
1877
|
{
|
|
1762
|
-
query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
|
|
1878
|
+
query: z5.string().optional().describe("Text/label to click (case-insensitive). Works for deeply nested elements."),
|
|
1879
|
+
role: z5.string().optional().describe("AX role to find (e.g. 'AXTextArea', 'AXTextField', 'AXButton'). Use when element has no text label."),
|
|
1763
1880
|
on: z5.string().optional().describe("Element ID from desktop_see (e.g. 'B1', 'T2')"),
|
|
1764
1881
|
coords: z5.string().optional().describe("Screen coordinates 'x,y' (e.g. '500,300')"),
|
|
1765
1882
|
app: z5.string().optional().describe("App name"),
|
|
@@ -1768,12 +1885,19 @@ var DesktopTools = class {
|
|
|
1768
1885
|
rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)"),
|
|
1769
1886
|
waitFor: z5.number().optional().describe("Max ms to wait for element to appear (default 5000)")
|
|
1770
1887
|
},
|
|
1771
|
-
async ({ query, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
|
|
1888
|
+
async ({ query, role, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
|
|
1772
1889
|
checkBlacklist(app);
|
|
1773
|
-
if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
|
|
1890
|
+
if (!query && !role && !on && !coords) throw new Error("Provide query, role, on, or coords.");
|
|
1774
1891
|
const args = ["click"];
|
|
1775
1892
|
if (coords) {
|
|
1776
1893
|
args.push("--coords", coords);
|
|
1894
|
+
} else if (role) {
|
|
1895
|
+
const resolved = await findFirstByRole(role, app);
|
|
1896
|
+
if (resolved) {
|
|
1897
|
+
args.push("--coords", resolved);
|
|
1898
|
+
} else {
|
|
1899
|
+
throw new Error(`No element with role '${role}' found in ${app ?? "frontmost app"}.`);
|
|
1900
|
+
}
|
|
1777
1901
|
} else if (query) {
|
|
1778
1902
|
const resolved = await resolveElementCoords(query, app);
|
|
1779
1903
|
if (resolved) {
|
|
@@ -1796,7 +1920,7 @@ var DesktopTools = class {
|
|
|
1796
1920
|
"desktop_type",
|
|
1797
1921
|
[
|
|
1798
1922
|
"Type text via keyboard. Supports \\n (return), \\t (tab) escape sequences.",
|
|
1799
|
-
"IMPORTANT: Focus the target field first
|
|
1923
|
+
"IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
|
|
1800
1924
|
"For Korean/Japanese/Chinese/emoji, use desktop_paste instead (keyboard sim is ASCII only).",
|
|
1801
1925
|
"Use clear=true to replace existing text (Cmd+A \u2192 Delete before typing)."
|
|
1802
1926
|
].join("\n"),
|
|
@@ -1821,6 +1945,7 @@ var DesktopTools = class {
|
|
|
1821
1945
|
"desktop_paste",
|
|
1822
1946
|
[
|
|
1823
1947
|
"Paste via clipboard (Cmd+V). Atomic: saves clipboard \u2192 sets content \u2192 pastes \u2192 restores.",
|
|
1948
|
+
"IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
|
|
1824
1949
|
"Supports all Unicode (Korean, Japanese, Chinese, emoji). Use instead of desktop_type for non-ASCII.",
|
|
1825
1950
|
"Can also paste file contents via filePath."
|
|
1826
1951
|
].join("\n"),
|
|
@@ -1903,7 +2028,10 @@ var DesktopTools = class {
|
|
|
1903
2028
|
);
|
|
1904
2029
|
server.tool(
|
|
1905
2030
|
"desktop_move",
|
|
1906
|
-
|
|
2031
|
+
[
|
|
2032
|
+
"Move mouse cursor without clicking. Use before scroll or to hover.",
|
|
2033
|
+
"Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
|
|
2034
|
+
].join("\n"),
|
|
1907
2035
|
{
|
|
1908
2036
|
coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
|
|
1909
2037
|
to: z5.string().optional().describe("Element text/label to move to"),
|
|
@@ -1938,7 +2066,8 @@ var DesktopTools = class {
|
|
|
1938
2066
|
"desktop_drag",
|
|
1939
2067
|
[
|
|
1940
2068
|
"Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
|
|
1941
|
-
"
|
|
2069
|
+
"Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
|
|
2070
|
+
"Before using fromCoords/toCoords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
|
|
1942
2071
|
].join("\n"),
|
|
1943
2072
|
{
|
|
1944
2073
|
from: z5.string().optional().describe("Source element ID from desktop_see"),
|
|
@@ -2135,6 +2264,7 @@ var DesktopTools = class {
|
|
|
2135
2264
|
app: z5.string().optional().describe("App to open with")
|
|
2136
2265
|
},
|
|
2137
2266
|
async ({ url, app }) => {
|
|
2267
|
+
checkBlacklist(app);
|
|
2138
2268
|
const args = ["open", url];
|
|
2139
2269
|
if (app) args.push("--app", app);
|
|
2140
2270
|
return json(await peekaboo(args));
|
package/dist/server/stdio.js
CHANGED
|
@@ -87,6 +87,9 @@ var FilesystemTools = class {
|
|
|
87
87
|
"- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
|
|
88
88
|
"- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
|
|
89
89
|
"- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
|
|
90
|
+
"- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
|
|
91
|
+
" Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
|
|
92
|
+
" This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
|
|
90
93
|
"",
|
|
91
94
|
"BEHAVIOR:",
|
|
92
95
|
"- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
|