junis 0.3.16 → 0.3.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -525,13 +525,19 @@ var toolPermissions = {
525
525
  desktop_click: "confirm",
526
526
  desktop_type: "confirm",
527
527
  desktop_hotkey: "confirm",
528
+ desktop_press: "confirm",
528
529
  desktop_scroll: "confirm",
529
530
  desktop_move: "confirm",
531
+ desktop_drag: "confirm",
530
532
  desktop_menu: "confirm",
531
533
  desktop_paste: "confirm",
534
+ desktop_clipboard: "confirm",
535
+ desktop_dialog: "confirm",
532
536
  desktop_screenshot: "confirm",
533
537
  desktop_open_app: "auto",
534
538
  desktop_open_url: "auto",
539
+ desktop_app_quit: "confirm",
540
+ desktop_window: "confirm",
535
541
  cron_create: "confirm",
536
542
  cron_delete: "confirm",
537
543
  edit_block: "confirm",
@@ -2095,15 +2101,72 @@ function checkBlacklist(app) {
2095
2101
  function json(data) {
2096
2102
  return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
2097
2103
  }
2104
+ async function resolveElementCoords(query, app) {
2105
+ if (!app) {
2106
+ try {
2107
+ const { stdout } = await execa("osascript", [
2108
+ "-e",
2109
+ 'tell application "System Events" to get name of first application process whose frontmost is true'
2110
+ ]);
2111
+ app = stdout.trim();
2112
+ } catch {
2113
+ return null;
2114
+ }
2115
+ }
2116
+ const safeApp = app.replace(/[\\"]/g, "\\$&");
2117
+ const safeQuery = query.replace(/[\\"]/g, "\\$&");
2118
+ const script = `
2119
+ tell application "System Events"
2120
+ tell process "${safeApp}"
2121
+ set topElems to UI elements
2122
+ repeat with elem in topElems
2123
+ try
2124
+ if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
2125
+ set pos to position of elem
2126
+ set sz to size of elem
2127
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
2128
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
2129
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
2130
+ end if
2131
+ end try
2132
+ end repeat
2133
+ repeat with parent in topElems
2134
+ try
2135
+ repeat with elem in UI elements of parent
2136
+ try
2137
+ if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
2138
+ set pos to position of elem
2139
+ set sz to size of elem
2140
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
2141
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
2142
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
2143
+ end if
2144
+ end try
2145
+ end repeat
2146
+ end try
2147
+ end repeat
2148
+ end tell
2149
+ end tell
2150
+ return "NOT_FOUND"
2151
+ `;
2152
+ try {
2153
+ const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
2154
+ const result = stdout.trim();
2155
+ if (result === "NOT_FOUND" || !result.includes(",")) return null;
2156
+ return result;
2157
+ } catch {
2158
+ return null;
2159
+ }
2160
+ }
2098
2161
  var DesktopTools = class {
2099
2162
  register(server) {
2100
2163
  server.tool(
2101
2164
  "desktop_see",
2102
2165
  [
2103
- "Capture UI element tree of an app. Returns snapshot ID + element IDs (B1 for buttons, T1 for text fields\u2026) with absolute screen coordinates.",
2104
- "ALWAYS call this before clicking or typing to get fresh element IDs. Snapshots are ephemeral \u2014 re-capture when stale.",
2105
- "If timeout on complex apps, use desktop_screenshot + desktop_click(coords) as fallback.",
2106
- "For CJK/emoji text input, use desktop_paste (not desktop_type)."
2166
+ "Capture native UI element tree. Returns snapshot ID + elements with id/role/label/description.",
2167
+ "Useful for simple/moderate apps. May timeout on complex apps (100+ elements) \u2014 use desktop_click(query, app) which auto-resolves coordinates without needing desktop_see.",
2168
+ "IMPORTANT: Only sees native macOS UI. Web page content inside browsers is invisible \u2014 use browser_* tools.",
2169
+ "If timeout, use desktop_screenshot for visual context + desktop_click(query, app) or desktop_click(coords) to interact."
2107
2170
  ].join("\n"),
2108
2171
  {
2109
2172
  app: z5.string().optional().describe("App name, 'frontmost', or 'menubar'. Omit for frontmost."),
@@ -2125,7 +2188,7 @@ var DesktopTools = class {
2125
2188
  id: e.id,
2126
2189
  role: e.role,
2127
2190
  label: e.label,
2128
- bounds: e.bounds
2191
+ description: e.description
2129
2192
  })) ?? [];
2130
2193
  return json({ snapshotId, elements });
2131
2194
  }
@@ -2133,9 +2196,9 @@ var DesktopTools = class {
2133
2196
  server.tool(
2134
2197
  "desktop_screenshot",
2135
2198
  [
2136
- "Take a screenshot. Returns base64 image.",
2137
- "Use when you need visual context or as fallback when desktop_see times out.",
2138
- "For automation, prefer desktop_see which returns actionable element IDs."
2199
+ "Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
2200
+ "Use for visual context or to verify UI state. Screenshot pixel coordinates map directly to desktop_click(coords).",
2201
+ "For clicking, prefer desktop_click(query, app) which auto-resolves coordinates. Use screenshot coords as fallback."
2139
2202
  ].join("\n"),
2140
2203
  {
2141
2204
  app: z5.string().optional().describe("Capture specific app window"),
@@ -2172,9 +2235,12 @@ var DesktopTools = class {
2172
2235
  server.tool(
2173
2236
  "desktop_click",
2174
2237
  [
2175
- "Click a UI element. Provide one of: query (text search), on (element ID from desktop_see), or coords ('x,y').",
2176
- "Prefer element IDs from desktop_see for reliability. Clicks the center of the element.",
2177
- "If click fails or element not found, re-capture with desktop_see and try again. Alternatively try desktop_menu or desktop_hotkey."
2238
+ "Click a UI element by text label (query), coordinates (coords), or element ID (on).",
2239
+ "BEST: Use query with app name \u2014 auto-resolves to exact screen coords via accessibility API. No desktop_see needed.",
2240
+ "GOOD: Use coords 'x,y' from desktop_screenshot for pixel-perfect accuracy.",
2241
+ "CAUTION: 'on' (element ID) has known offset bug \u2014 use coords or query instead when possible.",
2242
+ "If not found, try: desktop_screenshot to find coords, desktop_menu for menu items, or desktop_hotkey for shortcuts.",
2243
+ "NOTE: Web page elements (inside browser) are invisible to desktop tools. Use browser_* tools instead."
2178
2244
  ].join("\n"),
2179
2245
  {
2180
2246
  query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
@@ -2190,9 +2256,18 @@ var DesktopTools = class {
2190
2256
  checkBlacklist(app);
2191
2257
  if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
2192
2258
  const args = ["click"];
2193
- if (coords) args.push("--coords", coords);
2194
- else if (on) args.push("--on", on);
2195
- else if (query) args.push(query);
2259
+ if (coords) {
2260
+ args.push("--coords", coords);
2261
+ } else if (query) {
2262
+ const resolved = await resolveElementCoords(query, app);
2263
+ if (resolved) {
2264
+ args.push("--coords", resolved);
2265
+ } else {
2266
+ args.push(query);
2267
+ }
2268
+ } else if (on) {
2269
+ args.push("--on", on);
2270
+ }
2196
2271
  if (app) args.push("--app", app);
2197
2272
  if (snapshot) args.push("--snapshot", snapshot);
2198
2273
  if (doubleClick) args.push("--double");
@@ -2325,9 +2400,18 @@ var DesktopTools = class {
2325
2400
  checkBlacklist(app);
2326
2401
  if (!coords && !to && !id) throw new Error("Provide coords, to, or id.");
2327
2402
  const args = ["move"];
2328
- if (coords) args.push(coords);
2329
- else if (id) args.push("--id", id);
2330
- else if (to) args.push("--to", to);
2403
+ if (coords) {
2404
+ args.push(coords);
2405
+ } else if (to) {
2406
+ const resolved = await resolveElementCoords(to, app);
2407
+ if (resolved) {
2408
+ args.push(resolved);
2409
+ } else {
2410
+ args.push("--to", to);
2411
+ }
2412
+ } else if (id) {
2413
+ args.push("--id", id);
2414
+ }
2331
2415
  if (app) args.push("--app", app);
2332
2416
  if (snapshot) args.push("--snapshot", snapshot);
2333
2417
  if (smooth) args.push("--smooth");
@@ -2368,7 +2452,10 @@ var DesktopTools = class {
2368
2452
  );
2369
2453
  server.tool(
2370
2454
  "desktop_open_app",
2371
- "Launch or activate a macOS app. Already running apps are brought to front. After launch, call desktop_see to confirm UI is ready before automation. Terminal/iTerm/Finder blocked.",
2455
+ [
2456
+ "Launch or activate a macOS app. Already running apps are brought to front. Terminal/iTerm/Finder blocked.",
2457
+ "After launch, wait briefly then use desktop_click(query, app) to interact. desktop_see may timeout on complex apps \u2014 use desktop_screenshot as visual fallback."
2458
+ ].join("\n"),
2372
2459
  {
2373
2460
  app: z5.string().describe("App name (e.g. 'Safari', 'KakaoTalk', 'Slack')")
2374
2461
  },
@@ -41,13 +41,19 @@ var toolPermissions = {
41
41
  desktop_click: "confirm",
42
42
  desktop_type: "confirm",
43
43
  desktop_hotkey: "confirm",
44
+ desktop_press: "confirm",
44
45
  desktop_scroll: "confirm",
45
46
  desktop_move: "confirm",
47
+ desktop_drag: "confirm",
46
48
  desktop_menu: "confirm",
47
49
  desktop_paste: "confirm",
50
+ desktop_clipboard: "confirm",
51
+ desktop_dialog: "confirm",
48
52
  desktop_screenshot: "confirm",
49
53
  desktop_open_app: "auto",
50
54
  desktop_open_url: "auto",
55
+ desktop_app_quit: "confirm",
56
+ desktop_window: "confirm",
51
57
  cron_create: "confirm",
52
58
  cron_delete: "confirm",
53
59
  edit_block: "confirm",
@@ -1611,15 +1617,72 @@ function checkBlacklist(app) {
1611
1617
  function json(data) {
1612
1618
  return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
1613
1619
  }
1620
+ async function resolveElementCoords(query, app) {
1621
+ if (!app) {
1622
+ try {
1623
+ const { stdout } = await execa("osascript", [
1624
+ "-e",
1625
+ 'tell application "System Events" to get name of first application process whose frontmost is true'
1626
+ ]);
1627
+ app = stdout.trim();
1628
+ } catch {
1629
+ return null;
1630
+ }
1631
+ }
1632
+ const safeApp = app.replace(/[\\"]/g, "\\$&");
1633
+ const safeQuery = query.replace(/[\\"]/g, "\\$&");
1634
+ const script = `
1635
+ tell application "System Events"
1636
+ tell process "${safeApp}"
1637
+ set topElems to UI elements
1638
+ repeat with elem in topElems
1639
+ try
1640
+ if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
1641
+ set pos to position of elem
1642
+ set sz to size of elem
1643
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
1644
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
1645
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
1646
+ end if
1647
+ end try
1648
+ end repeat
1649
+ repeat with parent in topElems
1650
+ try
1651
+ repeat with elem in UI elements of parent
1652
+ try
1653
+ if (name of elem contains "${safeQuery}") or (description of elem contains "${safeQuery}") then
1654
+ set pos to position of elem
1655
+ set sz to size of elem
1656
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
1657
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
1658
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
1659
+ end if
1660
+ end try
1661
+ end repeat
1662
+ end try
1663
+ end repeat
1664
+ end tell
1665
+ end tell
1666
+ return "NOT_FOUND"
1667
+ `;
1668
+ try {
1669
+ const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
1670
+ const result = stdout.trim();
1671
+ if (result === "NOT_FOUND" || !result.includes(",")) return null;
1672
+ return result;
1673
+ } catch {
1674
+ return null;
1675
+ }
1676
+ }
1614
1677
  var DesktopTools = class {
1615
1678
  register(server) {
1616
1679
  server.tool(
1617
1680
  "desktop_see",
1618
1681
  [
1619
- "Capture UI element tree of an app. Returns snapshot ID + element IDs (B1 for buttons, T1 for text fields\u2026) with absolute screen coordinates.",
1620
- "ALWAYS call this before clicking or typing to get fresh element IDs. Snapshots are ephemeral \u2014 re-capture when stale.",
1621
- "If timeout on complex apps, use desktop_screenshot + desktop_click(coords) as fallback.",
1622
- "For CJK/emoji text input, use desktop_paste (not desktop_type)."
1682
+ "Capture native UI element tree. Returns snapshot ID + elements with id/role/label/description.",
1683
+ "Useful for simple/moderate apps. May timeout on complex apps (100+ elements) \u2014 use desktop_click(query, app) which auto-resolves coordinates without needing desktop_see.",
1684
+ "IMPORTANT: Only sees native macOS UI. Web page content inside browsers is invisible \u2014 use browser_* tools.",
1685
+ "If timeout, use desktop_screenshot for visual context + desktop_click(query, app) or desktop_click(coords) to interact."
1623
1686
  ].join("\n"),
1624
1687
  {
1625
1688
  app: z5.string().optional().describe("App name, 'frontmost', or 'menubar'. Omit for frontmost."),
@@ -1641,7 +1704,7 @@ var DesktopTools = class {
1641
1704
  id: e.id,
1642
1705
  role: e.role,
1643
1706
  label: e.label,
1644
- bounds: e.bounds
1707
+ description: e.description
1645
1708
  })) ?? [];
1646
1709
  return json({ snapshotId, elements });
1647
1710
  }
@@ -1649,9 +1712,9 @@ var DesktopTools = class {
1649
1712
  server.tool(
1650
1713
  "desktop_screenshot",
1651
1714
  [
1652
- "Take a screenshot. Returns base64 image.",
1653
- "Use when you need visual context or as fallback when desktop_see times out.",
1654
- "For automation, prefer desktop_see which returns actionable element IDs."
1715
+ "Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
1716
+ "Use for visual context or to verify UI state. Screenshot pixel coordinates map directly to desktop_click(coords).",
1717
+ "For clicking, prefer desktop_click(query, app) which auto-resolves coordinates. Use screenshot coords as fallback."
1655
1718
  ].join("\n"),
1656
1719
  {
1657
1720
  app: z5.string().optional().describe("Capture specific app window"),
@@ -1688,9 +1751,12 @@ var DesktopTools = class {
1688
1751
  server.tool(
1689
1752
  "desktop_click",
1690
1753
  [
1691
- "Click a UI element. Provide one of: query (text search), on (element ID from desktop_see), or coords ('x,y').",
1692
- "Prefer element IDs from desktop_see for reliability. Clicks the center of the element.",
1693
- "If click fails or element not found, re-capture with desktop_see and try again. Alternatively try desktop_menu or desktop_hotkey."
1754
+ "Click a UI element by text label (query), coordinates (coords), or element ID (on).",
1755
+ "BEST: Use query with app name \u2014 auto-resolves to exact screen coords via accessibility API. No desktop_see needed.",
1756
+ "GOOD: Use coords 'x,y' from desktop_screenshot for pixel-perfect accuracy.",
1757
+ "CAUTION: 'on' (element ID) has known offset bug \u2014 use coords or query instead when possible.",
1758
+ "If not found, try: desktop_screenshot to find coords, desktop_menu for menu items, or desktop_hotkey for shortcuts.",
1759
+ "NOTE: Web page elements (inside browser) are invisible to desktop tools. Use browser_* tools instead."
1694
1760
  ].join("\n"),
1695
1761
  {
1696
1762
  query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
@@ -1706,9 +1772,18 @@ var DesktopTools = class {
1706
1772
  checkBlacklist(app);
1707
1773
  if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
1708
1774
  const args = ["click"];
1709
- if (coords) args.push("--coords", coords);
1710
- else if (on) args.push("--on", on);
1711
- else if (query) args.push(query);
1775
+ if (coords) {
1776
+ args.push("--coords", coords);
1777
+ } else if (query) {
1778
+ const resolved = await resolveElementCoords(query, app);
1779
+ if (resolved) {
1780
+ args.push("--coords", resolved);
1781
+ } else {
1782
+ args.push(query);
1783
+ }
1784
+ } else if (on) {
1785
+ args.push("--on", on);
1786
+ }
1712
1787
  if (app) args.push("--app", app);
1713
1788
  if (snapshot) args.push("--snapshot", snapshot);
1714
1789
  if (doubleClick) args.push("--double");
@@ -1841,9 +1916,18 @@ var DesktopTools = class {
1841
1916
  checkBlacklist(app);
1842
1917
  if (!coords && !to && !id) throw new Error("Provide coords, to, or id.");
1843
1918
  const args = ["move"];
1844
- if (coords) args.push(coords);
1845
- else if (id) args.push("--id", id);
1846
- else if (to) args.push("--to", to);
1919
+ if (coords) {
1920
+ args.push(coords);
1921
+ } else if (to) {
1922
+ const resolved = await resolveElementCoords(to, app);
1923
+ if (resolved) {
1924
+ args.push(resolved);
1925
+ } else {
1926
+ args.push("--to", to);
1927
+ }
1928
+ } else if (id) {
1929
+ args.push("--id", id);
1930
+ }
1847
1931
  if (app) args.push("--app", app);
1848
1932
  if (snapshot) args.push("--snapshot", snapshot);
1849
1933
  if (smooth) args.push("--smooth");
@@ -1884,7 +1968,10 @@ var DesktopTools = class {
1884
1968
  );
1885
1969
  server.tool(
1886
1970
  "desktop_open_app",
1887
- "Launch or activate a macOS app. Already running apps are brought to front. After launch, call desktop_see to confirm UI is ready before automation. Terminal/iTerm/Finder blocked.",
1971
+ [
1972
+ "Launch or activate a macOS app. Already running apps are brought to front. Terminal/iTerm/Finder blocked.",
1973
+ "After launch, wait briefly then use desktop_click(query, app) to interact. desktop_see may timeout on complex apps \u2014 use desktop_screenshot as visual fallback."
1974
+ ].join("\n"),
1888
1975
  {
1889
1976
  app: z5.string().describe("App name (e.g. 'Safari', 'KakaoTalk', 'Slack')")
1890
1977
  },
@@ -42,13 +42,19 @@ var toolPermissions = {
42
42
  desktop_click: "confirm",
43
43
  desktop_type: "confirm",
44
44
  desktop_hotkey: "confirm",
45
+ desktop_press: "confirm",
45
46
  desktop_scroll: "confirm",
46
47
  desktop_move: "confirm",
48
+ desktop_drag: "confirm",
47
49
  desktop_menu: "confirm",
48
50
  desktop_paste: "confirm",
51
+ desktop_clipboard: "confirm",
52
+ desktop_dialog: "confirm",
49
53
  desktop_screenshot: "confirm",
50
54
  desktop_open_app: "auto",
51
55
  desktop_open_url: "auto",
56
+ desktop_app_quit: "confirm",
57
+ desktop_window: "confirm",
52
58
  cron_create: "confirm",
53
59
  cron_delete: "confirm",
54
60
  edit_block: "confirm",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "junis",
3
- "version": "0.3.16",
3
+ "version": "0.3.17",
4
4
  "description": "One-line device control for AI agents",
5
5
  "type": "module",
6
6
  "bin": {