junis 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -525,13 +525,19 @@ var toolPermissions = {
525
525
  desktop_click: "confirm",
526
526
  desktop_type: "confirm",
527
527
  desktop_hotkey: "confirm",
528
+ desktop_press: "confirm",
528
529
  desktop_scroll: "confirm",
529
530
  desktop_move: "confirm",
531
+ desktop_drag: "confirm",
530
532
  desktop_menu: "confirm",
531
533
  desktop_paste: "confirm",
534
+ desktop_clipboard: "confirm",
535
+ desktop_dialog: "confirm",
532
536
  desktop_screenshot: "confirm",
533
537
  desktop_open_app: "auto",
534
538
  desktop_open_url: "auto",
539
+ desktop_app_quit: "confirm",
540
+ desktop_window: "confirm",
535
541
  cron_create: "confirm",
536
542
  cron_delete: "confirm",
537
543
  edit_block: "confirm",
@@ -564,6 +570,9 @@ var FilesystemTools = class {
564
570
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
565
571
  "- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
566
572
  "- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
573
+ "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
574
+ " Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
575
+ " This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
567
576
  "",
568
577
  "BEHAVIOR:",
569
578
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
@@ -2095,15 +2104,81 @@ function checkBlacklist(app) {
2095
2104
  function json(data) {
2096
2105
  return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
2097
2106
  }
2107
+ async function searchProcessElements(processName, query) {
2108
+ const safeApp = processName.replace(/[\\"]/g, "\\$&");
2109
+ const safeQuery = query.replace(/[\\"]/g, "\\$&");
2110
+ const script = `
2111
+ tell application "System Events"
2112
+ tell process "${safeApp}"
2113
+ set topElems to UI elements
2114
+ repeat with elem in topElems
2115
+ try
2116
+ if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
2117
+ set pos to position of elem
2118
+ set sz to size of elem
2119
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
2120
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
2121
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
2122
+ end if
2123
+ end try
2124
+ end repeat
2125
+ repeat with parent in topElems
2126
+ try
2127
+ repeat with elem in UI elements of parent
2128
+ try
2129
+ if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
2130
+ set pos to position of elem
2131
+ set sz to size of elem
2132
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
2133
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
2134
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
2135
+ end if
2136
+ end try
2137
+ end repeat
2138
+ end try
2139
+ end repeat
2140
+ end tell
2141
+ end tell
2142
+ return "NOT_FOUND"
2143
+ `;
2144
+ try {
2145
+ const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
2146
+ const result = stdout.trim();
2147
+ if (result === "NOT_FOUND" || !result.includes(",")) return null;
2148
+ return result;
2149
+ } catch {
2150
+ return null;
2151
+ }
2152
+ }
2153
+ async function resolveElementCoords(query, app) {
2154
+ let targetApp = app;
2155
+ if (!targetApp) {
2156
+ try {
2157
+ const { stdout } = await execa("osascript", [
2158
+ "-e",
2159
+ 'tell application "System Events" to get name of first application process whose frontmost is true'
2160
+ ]);
2161
+ targetApp = stdout.trim();
2162
+ } catch {
2163
+ return null;
2164
+ }
2165
+ }
2166
+ const result = await searchProcessElements(targetApp, query);
2167
+ if (result) return result;
2168
+ if (targetApp !== "Dock") {
2169
+ return await searchProcessElements("Dock", query);
2170
+ }
2171
+ return null;
2172
+ }
2098
2173
  var DesktopTools = class {
2099
2174
  register(server) {
2100
2175
  server.tool(
2101
2176
  "desktop_see",
2102
2177
  [
2103
- "Capture UI element tree of an app. Returns snapshot ID + element IDs (B1 for buttons, T1 for text fields\u2026) with absolute screen coordinates.",
2104
- "ALWAYS call this before clicking or typing to get fresh element IDs. Snapshots are ephemeral \u2014 re-capture when stale.",
2105
- "If timeout on complex apps, use desktop_screenshot + desktop_click(coords) as fallback.",
2106
- "For CJK/emoji text input, use desktop_paste (not desktop_type)."
2178
+ "Capture native UI element tree. Returns snapshot ID + elements with id/role/label/description.",
2179
+ "Useful for simple/moderate apps. May timeout on complex apps (100+ elements) \u2014 use desktop_click(query, app) which auto-resolves coordinates without needing desktop_see.",
2180
+ "IMPORTANT: Only sees native macOS UI. Web page content inside browsers is invisible \u2014 use browser_* tools.",
2181
+ "If timeout, use desktop_screenshot for visual context + desktop_click(query, app) or desktop_click(coords) to interact."
2107
2182
  ].join("\n"),
2108
2183
  {
2109
2184
  app: z5.string().optional().describe("App name, 'frontmost', or 'menubar'. Omit for frontmost."),
@@ -2125,7 +2200,7 @@ var DesktopTools = class {
2125
2200
  id: e.id,
2126
2201
  role: e.role,
2127
2202
  label: e.label,
2128
- bounds: e.bounds
2203
+ description: e.description
2129
2204
  })) ?? [];
2130
2205
  return json({ snapshotId, elements });
2131
2206
  }
@@ -2133,9 +2208,10 @@ var DesktopTools = class {
2133
2208
  server.tool(
2134
2209
  "desktop_screenshot",
2135
2210
  [
2136
- "Take a screenshot. Returns base64 image.",
2137
- "Use when you need visual context or as fallback when desktop_see times out.",
2138
- "For automation, prefer desktop_see which returns actionable element IDs."
2211
+ "Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
2212
+ "Use for visual context or to verify UI state ONLY.",
2213
+ "WARNING: NEVER estimate click coordinates from screenshot images \u2014 visual estimation causes misclicks. Use desktop_click(query, app) for auto-resolution, or execute_command + osascript for exact coords.",
2214
+ "Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
2139
2215
  ].join("\n"),
2140
2216
  {
2141
2217
  app: z5.string().optional().describe("Capture specific app window"),
@@ -2172,9 +2248,13 @@ var DesktopTools = class {
2172
2248
  server.tool(
2173
2249
  "desktop_click",
2174
2250
  [
2175
- "Click a UI element. Provide one of: query (text search), on (element ID from desktop_see), or coords ('x,y').",
2176
- "Prefer element IDs from desktop_see for reliability. Clicks the center of the element.",
2177
- "If click fails or element not found, re-capture with desktop_see and try again. Alternatively try desktop_menu or desktop_hotkey."
2251
+ "Click a UI element by text label (query), coordinates (coords), or element ID (on).",
2252
+ "BEST: Use query with app name \u2014 auto-resolves to exact screen coords via accessibility API. No desktop_see needed.",
2253
+ "GOOD: Use coords 'x,y' \u2014 MUST be from osascript position+size center calculation, NEVER from visual screenshot estimation.",
2254
+ "Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots.",
2255
+ "CAUTION: 'on' (element ID) has known offset bug \u2014 use coords or query instead when possible.",
2256
+ "If query not found: use execute_command + osascript to get element position+size, then center = (x + w/2, y + h/2). Or try desktop_menu, desktop_hotkey.",
2257
+ "NOTE: Web page elements (inside browser) are invisible to desktop tools. Use browser_* tools instead."
2178
2258
  ].join("\n"),
2179
2259
  {
2180
2260
  query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
@@ -2190,9 +2270,18 @@ var DesktopTools = class {
2190
2270
  checkBlacklist(app);
2191
2271
  if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
2192
2272
  const args = ["click"];
2193
- if (coords) args.push("--coords", coords);
2194
- else if (on) args.push("--on", on);
2195
- else if (query) args.push(query);
2273
+ if (coords) {
2274
+ args.push("--coords", coords);
2275
+ } else if (query) {
2276
+ const resolved = await resolveElementCoords(query, app);
2277
+ if (resolved) {
2278
+ args.push("--coords", resolved);
2279
+ } else {
2280
+ args.push(query);
2281
+ }
2282
+ } else if (on) {
2283
+ args.push("--on", on);
2284
+ }
2196
2285
  if (app) args.push("--app", app);
2197
2286
  if (snapshot) args.push("--snapshot", snapshot);
2198
2287
  if (doubleClick) args.push("--double");
@@ -2230,6 +2319,7 @@ var DesktopTools = class {
2230
2319
  "desktop_paste",
2231
2320
  [
2232
2321
  "Paste via clipboard (Cmd+V). Atomic: saves clipboard \u2192 sets content \u2192 pastes \u2192 restores.",
2322
+ "IMPORTANT: Focus the target field first (click it with desktop_click) before pasting.",
2233
2323
  "Supports all Unicode (Korean, Japanese, Chinese, emoji). Use instead of desktop_type for non-ASCII.",
2234
2324
  "Can also paste file contents via filePath."
2235
2325
  ].join("\n"),
@@ -2312,7 +2402,10 @@ var DesktopTools = class {
2312
2402
  );
2313
2403
  server.tool(
2314
2404
  "desktop_move",
2315
- "Move mouse cursor without clicking. Use before scroll or to hover.",
2405
+ [
2406
+ "Move mouse cursor without clicking. Use before scroll or to hover.",
2407
+ "Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
2408
+ ].join("\n"),
2316
2409
  {
2317
2410
  coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
2318
2411
  to: z5.string().optional().describe("Element text/label to move to"),
@@ -2325,9 +2418,18 @@ var DesktopTools = class {
2325
2418
  checkBlacklist(app);
2326
2419
  if (!coords && !to && !id) throw new Error("Provide coords, to, or id.");
2327
2420
  const args = ["move"];
2328
- if (coords) args.push(coords);
2329
- else if (id) args.push("--id", id);
2330
- else if (to) args.push("--to", to);
2421
+ if (coords) {
2422
+ args.push(coords);
2423
+ } else if (to) {
2424
+ const resolved = await resolveElementCoords(to, app);
2425
+ if (resolved) {
2426
+ args.push(resolved);
2427
+ } else {
2428
+ args.push("--to", to);
2429
+ }
2430
+ } else if (id) {
2431
+ args.push("--id", id);
2432
+ }
2331
2433
  if (app) args.push("--app", app);
2332
2434
  if (snapshot) args.push("--snapshot", snapshot);
2333
2435
  if (smooth) args.push("--smooth");
@@ -2338,7 +2440,8 @@ var DesktopTools = class {
2338
2440
  "desktop_drag",
2339
2441
  [
2340
2442
  "Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
2341
- "Use element IDs from desktop_see or raw coordinates."
2443
+ "Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
2444
+ "Before using fromCoords/toCoords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
2342
2445
  ].join("\n"),
2343
2446
  {
2344
2447
  from: z5.string().optional().describe("Source element ID from desktop_see"),
@@ -2368,7 +2471,10 @@ var DesktopTools = class {
2368
2471
  );
2369
2472
  server.tool(
2370
2473
  "desktop_open_app",
2371
- "Launch or activate a macOS app. Already running apps are brought to front. After launch, call desktop_see to confirm UI is ready before automation. Terminal/iTerm/Finder blocked.",
2474
+ [
2475
+ "Launch or activate a macOS app. Already running apps are brought to front. Terminal/iTerm/Finder blocked.",
2476
+ "After launch, wait briefly then use desktop_click(query, app) to interact. desktop_see may timeout on complex apps \u2014 use desktop_screenshot as visual fallback."
2477
+ ].join("\n"),
2372
2478
  {
2373
2479
  app: z5.string().describe("App name (e.g. 'Safari', 'KakaoTalk', 'Slack')")
2374
2480
  },
@@ -2532,6 +2638,7 @@ var DesktopTools = class {
2532
2638
  app: z5.string().optional().describe("App to open with")
2533
2639
  },
2534
2640
  async ({ url, app }) => {
2641
+ checkBlacklist(app);
2535
2642
  const args = ["open", url];
2536
2643
  if (app) args.push("--app", app);
2537
2644
  return json(await peekaboo(args));
@@ -41,13 +41,19 @@ var toolPermissions = {
41
41
  desktop_click: "confirm",
42
42
  desktop_type: "confirm",
43
43
  desktop_hotkey: "confirm",
44
+ desktop_press: "confirm",
44
45
  desktop_scroll: "confirm",
45
46
  desktop_move: "confirm",
47
+ desktop_drag: "confirm",
46
48
  desktop_menu: "confirm",
47
49
  desktop_paste: "confirm",
50
+ desktop_clipboard: "confirm",
51
+ desktop_dialog: "confirm",
48
52
  desktop_screenshot: "confirm",
49
53
  desktop_open_app: "auto",
50
54
  desktop_open_url: "auto",
55
+ desktop_app_quit: "confirm",
56
+ desktop_window: "confirm",
51
57
  cron_create: "confirm",
52
58
  cron_delete: "confirm",
53
59
  edit_block: "confirm",
@@ -80,6 +86,9 @@ var FilesystemTools = class {
80
86
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
81
87
  "- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
82
88
  "- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
89
+ "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
90
+ " Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
91
+ " This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
83
92
  "",
84
93
  "BEHAVIOR:",
85
94
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
@@ -1611,15 +1620,81 @@ function checkBlacklist(app) {
1611
1620
  function json(data) {
1612
1621
  return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
1613
1622
  }
1623
+ async function searchProcessElements(processName, query) {
1624
+ const safeApp = processName.replace(/[\\"]/g, "\\$&");
1625
+ const safeQuery = query.replace(/[\\"]/g, "\\$&");
1626
+ const script = `
1627
+ tell application "System Events"
1628
+ tell process "${safeApp}"
1629
+ set topElems to UI elements
1630
+ repeat with elem in topElems
1631
+ try
1632
+ if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
1633
+ set pos to position of elem
1634
+ set sz to size of elem
1635
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
1636
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
1637
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
1638
+ end if
1639
+ end try
1640
+ end repeat
1641
+ repeat with parent in topElems
1642
+ try
1643
+ repeat with elem in UI elements of parent
1644
+ try
1645
+ if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
1646
+ set pos to position of elem
1647
+ set sz to size of elem
1648
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
1649
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
1650
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
1651
+ end if
1652
+ end try
1653
+ end repeat
1654
+ end try
1655
+ end repeat
1656
+ end tell
1657
+ end tell
1658
+ return "NOT_FOUND"
1659
+ `;
1660
+ try {
1661
+ const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
1662
+ const result = stdout.trim();
1663
+ if (result === "NOT_FOUND" || !result.includes(",")) return null;
1664
+ return result;
1665
+ } catch {
1666
+ return null;
1667
+ }
1668
+ }
1669
+ async function resolveElementCoords(query, app) {
1670
+ let targetApp = app;
1671
+ if (!targetApp) {
1672
+ try {
1673
+ const { stdout } = await execa("osascript", [
1674
+ "-e",
1675
+ 'tell application "System Events" to get name of first application process whose frontmost is true'
1676
+ ]);
1677
+ targetApp = stdout.trim();
1678
+ } catch {
1679
+ return null;
1680
+ }
1681
+ }
1682
+ const result = await searchProcessElements(targetApp, query);
1683
+ if (result) return result;
1684
+ if (targetApp !== "Dock") {
1685
+ return await searchProcessElements("Dock", query);
1686
+ }
1687
+ return null;
1688
+ }
1614
1689
  var DesktopTools = class {
1615
1690
  register(server) {
1616
1691
  server.tool(
1617
1692
  "desktop_see",
1618
1693
  [
1619
- "Capture UI element tree of an app. Returns snapshot ID + element IDs (B1 for buttons, T1 for text fields\u2026) with absolute screen coordinates.",
1620
- "ALWAYS call this before clicking or typing to get fresh element IDs. Snapshots are ephemeral \u2014 re-capture when stale.",
1621
- "If timeout on complex apps, use desktop_screenshot + desktop_click(coords) as fallback.",
1622
- "For CJK/emoji text input, use desktop_paste (not desktop_type)."
1694
+ "Capture native UI element tree. Returns snapshot ID + elements with id/role/label/description.",
1695
+ "Useful for simple/moderate apps. May timeout on complex apps (100+ elements) \u2014 use desktop_click(query, app) which auto-resolves coordinates without needing desktop_see.",
1696
+ "IMPORTANT: Only sees native macOS UI. Web page content inside browsers is invisible \u2014 use browser_* tools.",
1697
+ "If timeout, use desktop_screenshot for visual context + desktop_click(query, app) or desktop_click(coords) to interact."
1623
1698
  ].join("\n"),
1624
1699
  {
1625
1700
  app: z5.string().optional().describe("App name, 'frontmost', or 'menubar'. Omit for frontmost."),
@@ -1641,7 +1716,7 @@ var DesktopTools = class {
1641
1716
  id: e.id,
1642
1717
  role: e.role,
1643
1718
  label: e.label,
1644
- bounds: e.bounds
1719
+ description: e.description
1645
1720
  })) ?? [];
1646
1721
  return json({ snapshotId, elements });
1647
1722
  }
@@ -1649,9 +1724,10 @@ var DesktopTools = class {
1649
1724
  server.tool(
1650
1725
  "desktop_screenshot",
1651
1726
  [
1652
- "Take a screenshot. Returns base64 image.",
1653
- "Use when you need visual context or as fallback when desktop_see times out.",
1654
- "For automation, prefer desktop_see which returns actionable element IDs."
1727
+ "Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
1728
+ "Use for visual context or to verify UI state ONLY.",
1729
+ "WARNING: NEVER estimate click coordinates from screenshot images \u2014 visual estimation causes misclicks. Use desktop_click(query, app) for auto-resolution, or execute_command + osascript for exact coords.",
1730
+ "Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
1655
1731
  ].join("\n"),
1656
1732
  {
1657
1733
  app: z5.string().optional().describe("Capture specific app window"),
@@ -1688,9 +1764,13 @@ var DesktopTools = class {
1688
1764
  server.tool(
1689
1765
  "desktop_click",
1690
1766
  [
1691
- "Click a UI element. Provide one of: query (text search), on (element ID from desktop_see), or coords ('x,y').",
1692
- "Prefer element IDs from desktop_see for reliability. Clicks the center of the element.",
1693
- "If click fails or element not found, re-capture with desktop_see and try again. Alternatively try desktop_menu or desktop_hotkey."
1767
+ "Click a UI element by text label (query), coordinates (coords), or element ID (on).",
1768
+ "BEST: Use query with app name \u2014 auto-resolves to exact screen coords via accessibility API. No desktop_see needed.",
1769
+ "GOOD: Use coords 'x,y' \u2014 MUST be from osascript position+size center calculation, NEVER from visual screenshot estimation.",
1770
+ "Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots.",
1771
+ "CAUTION: 'on' (element ID) has known offset bug \u2014 use coords or query instead when possible.",
1772
+ "If query not found: use execute_command + osascript to get element position+size, then center = (x + w/2, y + h/2). Or try desktop_menu, desktop_hotkey.",
1773
+ "NOTE: Web page elements (inside browser) are invisible to desktop tools. Use browser_* tools instead."
1694
1774
  ].join("\n"),
1695
1775
  {
1696
1776
  query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
@@ -1706,9 +1786,18 @@ var DesktopTools = class {
1706
1786
  checkBlacklist(app);
1707
1787
  if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
1708
1788
  const args = ["click"];
1709
- if (coords) args.push("--coords", coords);
1710
- else if (on) args.push("--on", on);
1711
- else if (query) args.push(query);
1789
+ if (coords) {
1790
+ args.push("--coords", coords);
1791
+ } else if (query) {
1792
+ const resolved = await resolveElementCoords(query, app);
1793
+ if (resolved) {
1794
+ args.push("--coords", resolved);
1795
+ } else {
1796
+ args.push(query);
1797
+ }
1798
+ } else if (on) {
1799
+ args.push("--on", on);
1800
+ }
1712
1801
  if (app) args.push("--app", app);
1713
1802
  if (snapshot) args.push("--snapshot", snapshot);
1714
1803
  if (doubleClick) args.push("--double");
@@ -1746,6 +1835,7 @@ var DesktopTools = class {
1746
1835
  "desktop_paste",
1747
1836
  [
1748
1837
  "Paste via clipboard (Cmd+V). Atomic: saves clipboard \u2192 sets content \u2192 pastes \u2192 restores.",
1838
+ "IMPORTANT: Focus the target field first (click it with desktop_click) before pasting.",
1749
1839
  "Supports all Unicode (Korean, Japanese, Chinese, emoji). Use instead of desktop_type for non-ASCII.",
1750
1840
  "Can also paste file contents via filePath."
1751
1841
  ].join("\n"),
@@ -1828,7 +1918,10 @@ var DesktopTools = class {
1828
1918
  );
1829
1919
  server.tool(
1830
1920
  "desktop_move",
1831
- "Move mouse cursor without clicking. Use before scroll or to hover.",
1921
+ [
1922
+ "Move mouse cursor without clicking. Use before scroll or to hover.",
1923
+ "Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
1924
+ ].join("\n"),
1832
1925
  {
1833
1926
  coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
1834
1927
  to: z5.string().optional().describe("Element text/label to move to"),
@@ -1841,9 +1934,18 @@ var DesktopTools = class {
1841
1934
  checkBlacklist(app);
1842
1935
  if (!coords && !to && !id) throw new Error("Provide coords, to, or id.");
1843
1936
  const args = ["move"];
1844
- if (coords) args.push(coords);
1845
- else if (id) args.push("--id", id);
1846
- else if (to) args.push("--to", to);
1937
+ if (coords) {
1938
+ args.push(coords);
1939
+ } else if (to) {
1940
+ const resolved = await resolveElementCoords(to, app);
1941
+ if (resolved) {
1942
+ args.push(resolved);
1943
+ } else {
1944
+ args.push("--to", to);
1945
+ }
1946
+ } else if (id) {
1947
+ args.push("--id", id);
1948
+ }
1847
1949
  if (app) args.push("--app", app);
1848
1950
  if (snapshot) args.push("--snapshot", snapshot);
1849
1951
  if (smooth) args.push("--smooth");
@@ -1854,7 +1956,8 @@ var DesktopTools = class {
1854
1956
  "desktop_drag",
1855
1957
  [
1856
1958
  "Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
1857
- "Use element IDs from desktop_see or raw coordinates."
1959
+ "Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
1960
+ "Before using fromCoords/toCoords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
1858
1961
  ].join("\n"),
1859
1962
  {
1860
1963
  from: z5.string().optional().describe("Source element ID from desktop_see"),
@@ -1884,7 +1987,10 @@ var DesktopTools = class {
1884
1987
  );
1885
1988
  server.tool(
1886
1989
  "desktop_open_app",
1887
- "Launch or activate a macOS app. Already running apps are brought to front. After launch, call desktop_see to confirm UI is ready before automation. Terminal/iTerm/Finder blocked.",
1990
+ [
1991
+ "Launch or activate a macOS app. Already running apps are brought to front. Terminal/iTerm/Finder blocked.",
1992
+ "After launch, wait briefly then use desktop_click(query, app) to interact. desktop_see may timeout on complex apps \u2014 use desktop_screenshot as visual fallback."
1993
+ ].join("\n"),
1888
1994
  {
1889
1995
  app: z5.string().describe("App name (e.g. 'Safari', 'KakaoTalk', 'Slack')")
1890
1996
  },
@@ -2048,6 +2154,7 @@ var DesktopTools = class {
2048
2154
  app: z5.string().optional().describe("App to open with")
2049
2155
  },
2050
2156
  async ({ url, app }) => {
2157
+ checkBlacklist(app);
2051
2158
  const args = ["open", url];
2052
2159
  if (app) args.push("--app", app);
2053
2160
  return json(await peekaboo(args));
@@ -42,13 +42,19 @@ var toolPermissions = {
42
42
  desktop_click: "confirm",
43
43
  desktop_type: "confirm",
44
44
  desktop_hotkey: "confirm",
45
+ desktop_press: "confirm",
45
46
  desktop_scroll: "confirm",
46
47
  desktop_move: "confirm",
48
+ desktop_drag: "confirm",
47
49
  desktop_menu: "confirm",
48
50
  desktop_paste: "confirm",
51
+ desktop_clipboard: "confirm",
52
+ desktop_dialog: "confirm",
49
53
  desktop_screenshot: "confirm",
50
54
  desktop_open_app: "auto",
51
55
  desktop_open_url: "auto",
56
+ desktop_app_quit: "confirm",
57
+ desktop_window: "confirm",
52
58
  cron_create: "confirm",
53
59
  cron_delete: "confirm",
54
60
  edit_block: "confirm",
@@ -81,6 +87,9 @@ var FilesystemTools = class {
81
87
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
82
88
  "- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
83
89
  "- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
90
+ "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
91
+ " Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
92
+ " This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
84
93
  "",
85
94
  "BEHAVIOR:",
86
95
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "junis",
3
- "version": "0.3.16",
3
+ "version": "0.4.0",
4
4
  "description": "One-line device control for AI agents",
5
5
  "type": "module",
6
6
  "bin": {