junis 0.3.17 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -570,6 +570,9 @@ var FilesystemTools = class {
570
570
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
571
571
  "- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
572
572
  "- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
573
+ "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
574
+ " Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
575
+ " This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
573
576
  "",
574
577
  "BEHAVIOR:",
575
578
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
@@ -2101,27 +2104,116 @@ function checkBlacklist(app) {
2101
2104
  function json(data) {
2102
2105
  return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
2103
2106
  }
2107
+ async function searchProcessElements(processName, query) {
2108
+ const safeApp = processName.replace(/[\\"]/g, "\\$&");
2109
+ const safeQuery = query.replace(/[\\"]/g, "\\$&");
2110
+ const script = `
2111
+ tell application "System Events"
2112
+ tell process "${safeApp}"
2113
+ set topElems to UI elements
2114
+ repeat with elem in topElems
2115
+ try
2116
+ if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
2117
+ set pos to position of elem
2118
+ set sz to size of elem
2119
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
2120
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
2121
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
2122
+ end if
2123
+ end try
2124
+ end repeat
2125
+ repeat with parent in topElems
2126
+ try
2127
+ repeat with elem in UI elements of parent
2128
+ try
2129
+ if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
2130
+ set pos to position of elem
2131
+ set sz to size of elem
2132
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
2133
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
2134
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
2135
+ end if
2136
+ end try
2137
+ end repeat
2138
+ end try
2139
+ end repeat
2140
+ repeat with parent in topElems
2141
+ try
2142
+ repeat with child in UI elements of parent
2143
+ try
2144
+ set childRole to role of child
2145
+ if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
2146
+ repeat with gc in UI elements of child
2147
+ try
2148
+ if (name of gc contains "${safeQuery}") or (description of gc contains "${safeQuery}") then
2149
+ set pos to position of gc
2150
+ set sz to size of gc
2151
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
2152
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
2153
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
2154
+ end if
2155
+ end try
2156
+ end repeat
2157
+ end if
2158
+ end try
2159
+ end repeat
2160
+ end try
2161
+ end repeat
2162
+ end tell
2163
+ end tell
2164
+ return "NOT_FOUND"
2165
+ `;
2166
+ try {
2167
+ const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
2168
+ const result = stdout.trim();
2169
+ if (result === "NOT_FOUND" || !result.includes(",")) return null;
2170
+ return result;
2171
+ } catch {
2172
+ return null;
2173
+ }
2174
+ }
2104
2175
  async function resolveElementCoords(query, app) {
2105
- if (!app) {
2176
+ let targetApp = app;
2177
+ if (!targetApp) {
2106
2178
  try {
2107
2179
  const { stdout } = await execa("osascript", [
2108
2180
  "-e",
2109
2181
  'tell application "System Events" to get name of first application process whose frontmost is true'
2110
2182
  ]);
2111
- app = stdout.trim();
2183
+ targetApp = stdout.trim();
2112
2184
  } catch {
2113
2185
  return null;
2114
2186
  }
2115
2187
  }
2116
- const safeApp = app.replace(/[\\"]/g, "\\$&");
2117
- const safeQuery = query.replace(/[\\"]/g, "\\$&");
2188
+ const result = await searchProcessElements(targetApp, query);
2189
+ if (result) return result;
2190
+ if (targetApp !== "Dock") {
2191
+ return await searchProcessElements("Dock", query);
2192
+ }
2193
+ return null;
2194
+ }
2195
+ async function findFirstByRole(role, app) {
2196
+ let targetApp = app;
2197
+ if (!targetApp) {
2198
+ try {
2199
+ const { stdout } = await execa("osascript", [
2200
+ "-e",
2201
+ 'tell application "System Events" to get name of first application process whose frontmost is true'
2202
+ ]);
2203
+ targetApp = stdout.trim();
2204
+ } catch {
2205
+ return null;
2206
+ }
2207
+ }
2208
+ const safeApp = targetApp.replace(/[\\"]/g, "\\$&");
2209
+ const safeRole = role.replace(/[\\"]/g, "\\$&");
2118
2210
  const script = `
2119
2211
  tell application "System Events"
2120
2212
  tell process "${safeApp}"
2121
2213
  set topElems to UI elements
2122
2214
  repeat with elem in topElems
2123
2215
  try
2124
- if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
2216
+ if role of elem is "${safeRole}" then
2125
2217
  set pos to position of elem
2126
2218
  set sz to size of elem
2127
2219
  set cx to (item 1 of pos) + (item 1 of sz) / 2
@@ -2134,7 +2226,7 @@ tell application "System Events"
2134
2226
  try
2135
2227
  repeat with elem in UI elements of parent
2136
2228
  try
2137
- if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
2229
+ if role of elem is "${safeRole}" then
2138
2230
  set pos to position of elem
2139
2231
  set sz to size of elem
2140
2232
  set cx to (item 1 of pos) + (item 1 of sz) / 2
@@ -2145,6 +2237,28 @@ tell application "System Events"
2145
2237
  end repeat
2146
2238
  end try
2147
2239
  end repeat
2240
+ repeat with parent in topElems
2241
+ try
2242
+ repeat with child in UI elements of parent
2243
+ try
2244
+ set childRole to role of child
2245
+ if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
2246
+ repeat with gc in UI elements of child
2247
+ try
2248
+ if role of gc is "${safeRole}" then
2249
+ set pos to position of gc
2250
+ set sz to size of gc
2251
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
2252
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
2253
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
2254
+ end if
2255
+ end try
2256
+ end repeat
2257
+ end if
2258
+ end try
2259
+ end repeat
2260
+ end try
2261
+ end repeat
2148
2262
  end tell
2149
2263
  end tell
2150
2264
  return "NOT_FOUND"
@@ -2197,8 +2311,9 @@ var DesktopTools = class {
2197
2311
  "desktop_screenshot",
2198
2312
  [
2199
2313
  "Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
2200
- "Use for visual context or to verify UI state. Screenshot pixel coordinates map directly to desktop_click(coords).",
2201
- "For clicking, prefer desktop_click(query, app) which auto-resolves coordinates. Use screenshot coords as fallback."
2314
+ "Use for visual context or to verify UI state ONLY.",
2315
+ "WARNING: NEVER estimate click coordinates from screenshot images \u2014 visual estimation causes misclicks. Use desktop_click(query, app) for auto-resolution, or execute_command + osascript for exact coords.",
2316
+ "Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
2202
2317
  ].join("\n"),
2203
2318
  {
2204
2319
  app: z5.string().optional().describe("Capture specific app window"),
@@ -2235,15 +2350,17 @@ var DesktopTools = class {
2235
2350
  server.tool(
2236
2351
  "desktop_click",
2237
2352
  [
2238
- "Click a UI element by text label (query), coordinates (coords), or element ID (on).",
2239
- "BEST: Use query with app name \u2014 auto-resolves to exact screen coords via accessibility API. No desktop_see needed.",
2240
- "GOOD: Use coords 'x,y' from desktop_screenshot for pixel-perfect accuracy.",
2241
- "CAUTION: 'on' (element ID) has known offset bug \u2014 use coords or query instead when possible.",
2242
- "If not found, try: desktop_screenshot to find coords, desktop_menu for menu items, or desktop_hotkey for shortcuts.",
2243
- "NOTE: Web page elements (inside browser) are invisible to desktop tools. Use browser_* tools instead."
2353
+ "Click a UI element by text label (query), AX role (role), coordinates (coords), or element ID (on).",
2354
+ "BEST: Use query with app \u2014 auto-resolves coords via accessibility API (L1-L3 depth), then falls back to full tree search for deeply nested elements. No desktop_see needed.",
2355
+ "ROLE: Use role (e.g. 'AXTextArea', 'AXTextField') to find elements without visible text labels (input fields, text areas). Searches L1-L3 depth.",
2356
+ "GOOD: Use coords 'x,y' \u2014 MUST be from osascript center calculation (position + size/2). NEVER estimate from screenshots.",
2357
+ "CAUTION: 'on' (element ID) has known offset bug \u2014 use query, role, or coords instead.",
2358
+ "If not found: try desktop_menu for menu items, desktop_hotkey for shortcuts.",
2359
+ "NOTE: Web page elements (inside browser) are invisible \u2014 use browser_* tools instead."
2244
2360
  ].join("\n"),
2245
2361
  {
2246
- query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
2362
+ query: z5.string().optional().describe("Text/label to click (case-insensitive). Works for deeply nested elements."),
2363
+ role: z5.string().optional().describe("AX role to find (e.g. 'AXTextArea', 'AXTextField', 'AXButton'). Use when element has no text label."),
2247
2364
  on: z5.string().optional().describe("Element ID from desktop_see (e.g. 'B1', 'T2')"),
2248
2365
  coords: z5.string().optional().describe("Screen coordinates 'x,y' (e.g. '500,300')"),
2249
2366
  app: z5.string().optional().describe("App name"),
@@ -2252,12 +2369,19 @@ var DesktopTools = class {
2252
2369
  rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)"),
2253
2370
  waitFor: z5.number().optional().describe("Max ms to wait for element to appear (default 5000)")
2254
2371
  },
2255
- async ({ query, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
2372
+ async ({ query, role, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
2256
2373
  checkBlacklist(app);
2257
- if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
2374
+ if (!query && !role && !on && !coords) throw new Error("Provide query, role, on, or coords.");
2258
2375
  const args = ["click"];
2259
2376
  if (coords) {
2260
2377
  args.push("--coords", coords);
2378
+ } else if (role) {
2379
+ const resolved = await findFirstByRole(role, app);
2380
+ if (resolved) {
2381
+ args.push("--coords", resolved);
2382
+ } else {
2383
+ throw new Error(`No element with role '${role}' found in ${app ?? "frontmost app"}.`);
2384
+ }
2261
2385
  } else if (query) {
2262
2386
  const resolved = await resolveElementCoords(query, app);
2263
2387
  if (resolved) {
@@ -2280,7 +2404,7 @@ var DesktopTools = class {
2280
2404
  "desktop_type",
2281
2405
  [
2282
2406
  "Type text via keyboard. Supports \\n (return), \\t (tab) escape sequences.",
2283
- "IMPORTANT: Focus the target field first (click it with desktop_click) before typing. Types at current keyboard focus.",
2407
+ "IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
2284
2408
  "For Korean/Japanese/Chinese/emoji, use desktop_paste instead (keyboard sim is ASCII only).",
2285
2409
  "Use clear=true to replace existing text (Cmd+A \u2192 Delete before typing)."
2286
2410
  ].join("\n"),
@@ -2305,6 +2429,7 @@ var DesktopTools = class {
2305
2429
  "desktop_paste",
2306
2430
  [
2307
2431
  "Paste via clipboard (Cmd+V). Atomic: saves clipboard \u2192 sets content \u2192 pastes \u2192 restores.",
2432
+ "IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
2308
2433
  "Supports all Unicode (Korean, Japanese, Chinese, emoji). Use instead of desktop_type for non-ASCII.",
2309
2434
  "Can also paste file contents via filePath."
2310
2435
  ].join("\n"),
@@ -2387,7 +2512,10 @@ var DesktopTools = class {
2387
2512
  );
2388
2513
  server.tool(
2389
2514
  "desktop_move",
2390
- "Move mouse cursor without clicking. Use before scroll or to hover.",
2515
+ [
2516
+ "Move mouse cursor without clicking. Use before scroll or to hover.",
2517
+ "Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
2518
+ ].join("\n"),
2391
2519
  {
2392
2520
  coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
2393
2521
  to: z5.string().optional().describe("Element text/label to move to"),
@@ -2422,7 +2550,8 @@ var DesktopTools = class {
2422
2550
  "desktop_drag",
2423
2551
  [
2424
2552
  "Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
2425
- "Use element IDs from desktop_see or raw coordinates."
2553
+ "Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
2554
+ "Before using fromCoords/toCoords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
2426
2555
  ].join("\n"),
2427
2556
  {
2428
2557
  from: z5.string().optional().describe("Source element ID from desktop_see"),
@@ -2619,6 +2748,7 @@ var DesktopTools = class {
2619
2748
  app: z5.string().optional().describe("App to open with")
2620
2749
  },
2621
2750
  async ({ url, app }) => {
2751
+ checkBlacklist(app);
2622
2752
  const args = ["open", url];
2623
2753
  if (app) args.push("--app", app);
2624
2754
  return json(await peekaboo(args));
@@ -86,6 +86,9 @@ var FilesystemTools = class {
86
86
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
87
87
  "- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
88
88
  "- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
89
+ "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
90
+ " Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
91
+ " This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
89
92
  "",
90
93
  "BEHAVIOR:",
91
94
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
@@ -1617,27 +1620,116 @@ function checkBlacklist(app) {
1617
1620
  function json(data) {
1618
1621
  return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
1619
1622
  }
1623
+ async function searchProcessElements(processName, query) {
1624
+ const safeApp = processName.replace(/[\\"]/g, "\\$&");
1625
+ const safeQuery = query.replace(/[\\"]/g, "\\$&");
1626
+ const script = `
1627
+ tell application "System Events"
1628
+ tell process "${safeApp}"
1629
+ set topElems to UI elements
1630
+ repeat with elem in topElems
1631
+ try
1632
+ if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
1633
+ set pos to position of elem
1634
+ set sz to size of elem
1635
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
1636
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
1637
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
1638
+ end if
1639
+ end try
1640
+ end repeat
1641
+ repeat with parent in topElems
1642
+ try
1643
+ repeat with elem in UI elements of parent
1644
+ try
1645
+ if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
1646
+ set pos to position of elem
1647
+ set sz to size of elem
1648
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
1649
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
1650
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
1651
+ end if
1652
+ end try
1653
+ end repeat
1654
+ end try
1655
+ end repeat
1656
+ repeat with parent in topElems
1657
+ try
1658
+ repeat with child in UI elements of parent
1659
+ try
1660
+ set childRole to role of child
1661
+ if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
1662
+ repeat with gc in UI elements of child
1663
+ try
1664
+ if (name of gc contains "${safeQuery}") or (description of gc contains "${safeQuery}") then
1665
+ set pos to position of gc
1666
+ set sz to size of gc
1667
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
1668
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
1669
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
1670
+ end if
1671
+ end try
1672
+ end repeat
1673
+ end if
1674
+ end try
1675
+ end repeat
1676
+ end try
1677
+ end repeat
1678
+ end tell
1679
+ end tell
1680
+ return "NOT_FOUND"
1681
+ `;
1682
+ try {
1683
+ const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
1684
+ const result = stdout.trim();
1685
+ if (result === "NOT_FOUND" || !result.includes(",")) return null;
1686
+ return result;
1687
+ } catch {
1688
+ return null;
1689
+ }
1690
+ }
1620
1691
  async function resolveElementCoords(query, app) {
1621
- if (!app) {
1692
+ let targetApp = app;
1693
+ if (!targetApp) {
1622
1694
  try {
1623
1695
  const { stdout } = await execa("osascript", [
1624
1696
  "-e",
1625
1697
  'tell application "System Events" to get name of first application process whose frontmost is true'
1626
1698
  ]);
1627
- app = stdout.trim();
1699
+ targetApp = stdout.trim();
1628
1700
  } catch {
1629
1701
  return null;
1630
1702
  }
1631
1703
  }
1632
- const safeApp = app.replace(/[\\"]/g, "\\$&");
1633
- const safeQuery = query.replace(/[\\"]/g, "\\$&");
1704
+ const result = await searchProcessElements(targetApp, query);
1705
+ if (result) return result;
1706
+ if (targetApp !== "Dock") {
1707
+ return await searchProcessElements("Dock", query);
1708
+ }
1709
+ return null;
1710
+ }
1711
+ async function findFirstByRole(role, app) {
1712
+ let targetApp = app;
1713
+ if (!targetApp) {
1714
+ try {
1715
+ const { stdout } = await execa("osascript", [
1716
+ "-e",
1717
+ 'tell application "System Events" to get name of first application process whose frontmost is true'
1718
+ ]);
1719
+ targetApp = stdout.trim();
1720
+ } catch {
1721
+ return null;
1722
+ }
1723
+ }
1724
+ const safeApp = targetApp.replace(/[\\"]/g, "\\$&");
1725
+ const safeRole = role.replace(/[\\"]/g, "\\$&");
1634
1726
  const script = `
1635
1727
  tell application "System Events"
1636
1728
  tell process "${safeApp}"
1637
1729
  set topElems to UI elements
1638
1730
  repeat with elem in topElems
1639
1731
  try
1640
- if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
1732
+ if role of elem is "${safeRole}" then
1641
1733
  set pos to position of elem
1642
1734
  set sz to size of elem
1643
1735
  set cx to (item 1 of pos) + (item 1 of sz) / 2
@@ -1650,7 +1742,7 @@ tell application "System Events"
1650
1742
  try
1651
1743
  repeat with elem in UI elements of parent
1652
1744
  try
1653
- if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
1745
+ if role of elem is "${safeRole}" then
1654
1746
  set pos to position of elem
1655
1747
  set sz to size of elem
1656
1748
  set cx to (item 1 of pos) + (item 1 of sz) / 2
@@ -1661,6 +1753,28 @@ tell application "System Events"
1661
1753
  end repeat
1662
1754
  end try
1663
1755
  end repeat
1756
+ repeat with parent in topElems
1757
+ try
1758
+ repeat with child in UI elements of parent
1759
+ try
1760
+ set childRole to role of child
1761
+ if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
1762
+ repeat with gc in UI elements of child
1763
+ try
1764
+ if role of gc is "${safeRole}" then
1765
+ set pos to position of gc
1766
+ set sz to size of gc
1767
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
1768
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
1769
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
1770
+ end if
1771
+ end try
1772
+ end repeat
1773
+ end if
1774
+ end try
1775
+ end repeat
1776
+ end try
1777
+ end repeat
1664
1778
  end tell
1665
1779
  end tell
1666
1780
  return "NOT_FOUND"
@@ -1713,8 +1827,9 @@ var DesktopTools = class {
1713
1827
  "desktop_screenshot",
1714
1828
  [
1715
1829
  "Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
1716
- "Use for visual context or to verify UI state. Screenshot pixel coordinates map directly to desktop_click(coords).",
1717
- "For clicking, prefer desktop_click(query, app) which auto-resolves coordinates. Use screenshot coords as fallback."
1830
+ "Use for visual context or to verify UI state ONLY.",
1831
+ "WARNING: NEVER estimate click coordinates from screenshot images \u2014 visual estimation causes misclicks. Use desktop_click(query, app) for auto-resolution, or execute_command + osascript for exact coords.",
1832
+ "Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
1718
1833
  ].join("\n"),
1719
1834
  {
1720
1835
  app: z5.string().optional().describe("Capture specific app window"),
@@ -1751,15 +1866,17 @@ var DesktopTools = class {
1751
1866
  server.tool(
1752
1867
  "desktop_click",
1753
1868
  [
1754
- "Click a UI element by text label (query), coordinates (coords), or element ID (on).",
1755
- "BEST: Use query with app name \u2014 auto-resolves to exact screen coords via accessibility API. No desktop_see needed.",
1756
- "GOOD: Use coords 'x,y' from desktop_screenshot for pixel-perfect accuracy.",
1757
- "CAUTION: 'on' (element ID) has known offset bug \u2014 use coords or query instead when possible.",
1758
- "If not found, try: desktop_screenshot to find coords, desktop_menu for menu items, or desktop_hotkey for shortcuts.",
1759
- "NOTE: Web page elements (inside browser) are invisible to desktop tools. Use browser_* tools instead."
1869
+ "Click a UI element by text label (query), AX role (role), coordinates (coords), or element ID (on).",
1870
+ "BEST: Use query with app \u2014 auto-resolves coords via accessibility API (L1-L3 depth), then falls back to full tree search for deeply nested elements. No desktop_see needed.",
1871
+ "ROLE: Use role (e.g. 'AXTextArea', 'AXTextField') to find elements without visible text labels (input fields, text areas). Searches L1-L3 depth.",
1872
+ "GOOD: Use coords 'x,y' \u2014 MUST be from osascript center calculation (position + size/2). NEVER estimate from screenshots.",
1873
+ "CAUTION: 'on' (element ID) has known offset bug \u2014 use query, role, or coords instead.",
1874
+ "If not found: try desktop_menu for menu items, desktop_hotkey for shortcuts.",
1875
+ "NOTE: Web page elements (inside browser) are invisible \u2014 use browser_* tools instead."
1760
1876
  ].join("\n"),
1761
1877
  {
1762
- query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
1878
+ query: z5.string().optional().describe("Text/label to click (case-insensitive). Works for deeply nested elements."),
1879
+ role: z5.string().optional().describe("AX role to find (e.g. 'AXTextArea', 'AXTextField', 'AXButton'). Use when element has no text label."),
1763
1880
  on: z5.string().optional().describe("Element ID from desktop_see (e.g. 'B1', 'T2')"),
1764
1881
  coords: z5.string().optional().describe("Screen coordinates 'x,y' (e.g. '500,300')"),
1765
1882
  app: z5.string().optional().describe("App name"),
@@ -1768,12 +1885,19 @@ var DesktopTools = class {
1768
1885
  rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)"),
1769
1886
  waitFor: z5.number().optional().describe("Max ms to wait for element to appear (default 5000)")
1770
1887
  },
1771
- async ({ query, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
1888
+ async ({ query, role, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
1772
1889
  checkBlacklist(app);
1773
- if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
1890
+ if (!query && !role && !on && !coords) throw new Error("Provide query, role, on, or coords.");
1774
1891
  const args = ["click"];
1775
1892
  if (coords) {
1776
1893
  args.push("--coords", coords);
1894
+ } else if (role) {
1895
+ const resolved = await findFirstByRole(role, app);
1896
+ if (resolved) {
1897
+ args.push("--coords", resolved);
1898
+ } else {
1899
+ throw new Error(`No element with role '${role}' found in ${app ?? "frontmost app"}.`);
1900
+ }
1777
1901
  } else if (query) {
1778
1902
  const resolved = await resolveElementCoords(query, app);
1779
1903
  if (resolved) {
@@ -1796,7 +1920,7 @@ var DesktopTools = class {
1796
1920
  "desktop_type",
1797
1921
  [
1798
1922
  "Type text via keyboard. Supports \\n (return), \\t (tab) escape sequences.",
1799
- "IMPORTANT: Focus the target field first (click it with desktop_click) before typing. Types at current keyboard focus.",
1923
+ "IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
1800
1924
  "For Korean/Japanese/Chinese/emoji, use desktop_paste instead (keyboard sim is ASCII only).",
1801
1925
  "Use clear=true to replace existing text (Cmd+A \u2192 Delete before typing)."
1802
1926
  ].join("\n"),
@@ -1821,6 +1945,7 @@ var DesktopTools = class {
1821
1945
  "desktop_paste",
1822
1946
  [
1823
1947
  "Paste via clipboard (Cmd+V). Atomic: saves clipboard \u2192 sets content \u2192 pastes \u2192 restores.",
1948
+ "IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
1824
1949
  "Supports all Unicode (Korean, Japanese, Chinese, emoji). Use instead of desktop_type for non-ASCII.",
1825
1950
  "Can also paste file contents via filePath."
1826
1951
  ].join("\n"),
@@ -1903,7 +2028,10 @@ var DesktopTools = class {
1903
2028
  );
1904
2029
  server.tool(
1905
2030
  "desktop_move",
1906
- "Move mouse cursor without clicking. Use before scroll or to hover.",
2031
+ [
2032
+ "Move mouse cursor without clicking. Use before scroll or to hover.",
2033
+ "Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
2034
+ ].join("\n"),
1907
2035
  {
1908
2036
  coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
1909
2037
  to: z5.string().optional().describe("Element text/label to move to"),
@@ -1938,7 +2066,8 @@ var DesktopTools = class {
1938
2066
  "desktop_drag",
1939
2067
  [
1940
2068
  "Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
1941
- "Use element IDs from desktop_see or raw coordinates."
2069
+ "Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
2070
+ "Before using fromCoords/toCoords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
1942
2071
  ].join("\n"),
1943
2072
  {
1944
2073
  from: z5.string().optional().describe("Source element ID from desktop_see"),
@@ -2135,6 +2264,7 @@ var DesktopTools = class {
2135
2264
  app: z5.string().optional().describe("App to open with")
2136
2265
  },
2137
2266
  async ({ url, app }) => {
2267
+ checkBlacklist(app);
2138
2268
  const args = ["open", url];
2139
2269
  if (app) args.push("--app", app);
2140
2270
  return json(await peekaboo(args));
@@ -87,6 +87,9 @@ var FilesystemTools = class {
87
87
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
88
88
  "- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
89
89
  "- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
90
+ "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
91
+ " Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
92
+ " This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
90
93
  "",
91
94
  "BEHAVIOR:",
92
95
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "junis",
3
- "version": "0.3.17",
3
+ "version": "0.4.1",
4
4
  "description": "One-line device control for AI agents",
5
5
  "type": "module",
6
6
  "bin": {