junis 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -2137,6 +2137,28 @@ tell application "System Events"
2137
2137
  end repeat
2138
2138
  end try
2139
2139
  end repeat
2140
+ repeat with parent in topElems
2141
+ try
2142
+ repeat with child in UI elements of parent
2143
+ try
2144
+ set childRole to role of child
2145
+ if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
2146
+ repeat with gc in UI elements of child
2147
+ try
2148
+ if (name of gc contains "${safeQuery}") or (description of gc contains "${safeQuery}") then
2149
+ set pos to position of gc
2150
+ set sz to size of gc
2151
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
2152
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
2153
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
2154
+ end if
2155
+ end try
2156
+ end repeat
2157
+ end if
2158
+ end try
2159
+ end repeat
2160
+ end try
2161
+ end repeat
2140
2162
  end tell
2141
2163
  end tell
2142
2164
  return "NOT_FOUND"
@@ -2170,6 +2192,86 @@ async function resolveElementCoords(query, app) {
2170
2192
  }
2171
2193
  return null;
2172
2194
  }
2195
+ async function findFirstByRole(role, app) {
2196
+ let targetApp = app;
2197
+ if (!targetApp) {
2198
+ try {
2199
+ const { stdout } = await execa("osascript", [
2200
+ "-e",
2201
+ 'tell application "System Events" to get name of first application process whose frontmost is true'
2202
+ ]);
2203
+ targetApp = stdout.trim();
2204
+ } catch {
2205
+ return null;
2206
+ }
2207
+ }
2208
+ const safeApp = targetApp.replace(/[\\"]/g, "\\$&");
2209
+ const safeRole = role.replace(/[\\"]/g, "\\$&");
2210
+ const script = `
2211
+ tell application "System Events"
2212
+ tell process "${safeApp}"
2213
+ set topElems to UI elements
2214
+ repeat with elem in topElems
2215
+ try
2216
+ if role of elem is "${safeRole}" then
2217
+ set pos to position of elem
2218
+ set sz to size of elem
2219
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
2220
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
2221
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
2222
+ end if
2223
+ end try
2224
+ end repeat
2225
+ repeat with parent in topElems
2226
+ try
2227
+ repeat with elem in UI elements of parent
2228
+ try
2229
+ if role of elem is "${safeRole}" then
2230
+ set pos to position of elem
2231
+ set sz to size of elem
2232
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
2233
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
2234
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
2235
+ end if
2236
+ end try
2237
+ end repeat
2238
+ end try
2239
+ end repeat
2240
+ repeat with parent in topElems
2241
+ try
2242
+ repeat with child in UI elements of parent
2243
+ try
2244
+ set childRole to role of child
2245
+ if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
2246
+ repeat with gc in UI elements of child
2247
+ try
2248
+ if role of gc is "${safeRole}" then
2249
+ set pos to position of gc
2250
+ set sz to size of gc
2251
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
2252
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
2253
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
2254
+ end if
2255
+ end try
2256
+ end repeat
2257
+ end if
2258
+ end try
2259
+ end repeat
2260
+ end try
2261
+ end repeat
2262
+ end tell
2263
+ end tell
2264
+ return "NOT_FOUND"
2265
+ `;
2266
+ try {
2267
+ const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
2268
+ const result = stdout.trim();
2269
+ if (result === "NOT_FOUND" || !result.includes(",")) return null;
2270
+ return result;
2271
+ } catch {
2272
+ return null;
2273
+ }
2274
+ }
2173
2275
  var DesktopTools = class {
2174
2276
  register(server) {
2175
2277
  server.tool(
@@ -2248,16 +2350,17 @@ var DesktopTools = class {
2248
2350
  server.tool(
2249
2351
  "desktop_click",
2250
2352
  [
2251
- "Click a UI element by text label (query), coordinates (coords), or element ID (on).",
2252
- "BEST: Use query with app name \u2014 auto-resolves to exact screen coords via accessibility API. No desktop_see needed.",
2253
- "GOOD: Use coords 'x,y' \u2014 MUST be from osascript position+size center calculation, NEVER from visual screenshot estimation.",
2254
- "Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots.",
2255
- "CAUTION: 'on' (element ID) has known offset bug \u2014 use coords or query instead when possible.",
2256
- "If query not found: use execute_command + osascript to get element position+size, then center = (x + w/2, y + h/2). Or try desktop_menu, desktop_hotkey.",
2257
- "NOTE: Web page elements (inside browser) are invisible to desktop tools. Use browser_* tools instead."
2353
+ "Click a UI element by text label (query), AX role (role), coordinates (coords), or element ID (on).",
2354
+ "BEST: Use query with app \u2014 auto-resolves coords via accessibility API (L1-L3 depth), then falls back to full tree search for deeply nested elements. No desktop_see needed.",
2355
+ "ROLE: Use role (e.g. 'AXTextArea', 'AXTextField') to find elements without visible text labels (input fields, text areas). Searches L1-L3 depth.",
2356
+ "GOOD: Use coords 'x,y' \u2014 MUST be from osascript center calculation (position + size/2). NEVER estimate from screenshots.",
2357
+ "CAUTION: 'on' (element ID) has known offset bug \u2014 use query, role, or coords instead.",
2358
+ "If not found: try desktop_menu for menu items, desktop_hotkey for shortcuts.",
2359
+ "NOTE: Web page elements (inside browser) are invisible \u2014 use browser_* tools instead."
2258
2360
  ].join("\n"),
2259
2361
  {
2260
- query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
2362
+ query: z5.string().optional().describe("Text/label to click (case-insensitive). Works for deeply nested elements."),
2363
+ role: z5.string().optional().describe("AX role to find (e.g. 'AXTextArea', 'AXTextField', 'AXButton'). Use when element has no text label."),
2261
2364
  on: z5.string().optional().describe("Element ID from desktop_see (e.g. 'B1', 'T2')"),
2262
2365
  coords: z5.string().optional().describe("Screen coordinates 'x,y' (e.g. '500,300')"),
2263
2366
  app: z5.string().optional().describe("App name"),
@@ -2266,12 +2369,19 @@ var DesktopTools = class {
2266
2369
  rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)"),
2267
2370
  waitFor: z5.number().optional().describe("Max ms to wait for element to appear (default 5000)")
2268
2371
  },
2269
- async ({ query, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
2372
+ async ({ query, role, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
2270
2373
  checkBlacklist(app);
2271
- if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
2374
+ if (!query && !role && !on && !coords) throw new Error("Provide query, role, on, or coords.");
2272
2375
  const args = ["click"];
2273
2376
  if (coords) {
2274
2377
  args.push("--coords", coords);
2378
+ } else if (role) {
2379
+ const resolved = await findFirstByRole(role, app);
2380
+ if (resolved) {
2381
+ args.push("--coords", resolved);
2382
+ } else {
2383
+ throw new Error(`No element with role '${role}' found in ${app ?? "frontmost app"}.`);
2384
+ }
2275
2385
  } else if (query) {
2276
2386
  const resolved = await resolveElementCoords(query, app);
2277
2387
  if (resolved) {
@@ -2294,7 +2404,7 @@ var DesktopTools = class {
2294
2404
  "desktop_type",
2295
2405
  [
2296
2406
  "Type text via keyboard. Supports \\n (return), \\t (tab) escape sequences.",
2297
- "IMPORTANT: Focus the target field first (click it with desktop_click) before typing. Types at current keyboard focus.",
2407
+ "IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
2298
2408
  "For Korean/Japanese/Chinese/emoji, use desktop_paste instead (keyboard sim is ASCII only).",
2299
2409
  "Use clear=true to replace existing text (Cmd+A \u2192 Delete before typing)."
2300
2410
  ].join("\n"),
@@ -2319,7 +2429,7 @@ var DesktopTools = class {
2319
2429
  "desktop_paste",
2320
2430
  [
2321
2431
  "Paste via clipboard (Cmd+V). Atomic: saves clipboard \u2192 sets content \u2192 pastes \u2192 restores.",
2322
- "IMPORTANT: Focus the target field first (click it with desktop_click) before pasting.",
2432
+ "IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
2323
2433
  "Supports all Unicode (Korean, Japanese, Chinese, emoji). Use instead of desktop_type for non-ASCII.",
2324
2434
  "Can also paste file contents via filePath."
2325
2435
  ].join("\n"),
@@ -1653,6 +1653,28 @@ tell application "System Events"
1653
1653
  end repeat
1654
1654
  end try
1655
1655
  end repeat
1656
+ repeat with parent in topElems
1657
+ try
1658
+ repeat with child in UI elements of parent
1659
+ try
1660
+ set childRole to role of child
1661
+ if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
1662
+ repeat with gc in UI elements of child
1663
+ try
1664
+ if (name of gc contains "${safeQuery}") or (description of gc contains "${safeQuery}") then
1665
+ set pos to position of gc
1666
+ set sz to size of gc
1667
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
1668
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
1669
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
1670
+ end if
1671
+ end try
1672
+ end repeat
1673
+ end if
1674
+ end try
1675
+ end repeat
1676
+ end try
1677
+ end repeat
1656
1678
  end tell
1657
1679
  end tell
1658
1680
  return "NOT_FOUND"
@@ -1686,6 +1708,86 @@ async function resolveElementCoords(query, app) {
1686
1708
  }
1687
1709
  return null;
1688
1710
  }
1711
+ async function findFirstByRole(role, app) {
1712
+ let targetApp = app;
1713
+ if (!targetApp) {
1714
+ try {
1715
+ const { stdout } = await execa("osascript", [
1716
+ "-e",
1717
+ 'tell application "System Events" to get name of first application process whose frontmost is true'
1718
+ ]);
1719
+ targetApp = stdout.trim();
1720
+ } catch {
1721
+ return null;
1722
+ }
1723
+ }
1724
+ const safeApp = targetApp.replace(/[\\"]/g, "\\$&");
1725
+ const safeRole = role.replace(/[\\"]/g, "\\$&");
1726
+ const script = `
1727
+ tell application "System Events"
1728
+ tell process "${safeApp}"
1729
+ set topElems to UI elements
1730
+ repeat with elem in topElems
1731
+ try
1732
+ if role of elem is "${safeRole}" then
1733
+ set pos to position of elem
1734
+ set sz to size of elem
1735
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
1736
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
1737
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
1738
+ end if
1739
+ end try
1740
+ end repeat
1741
+ repeat with parent in topElems
1742
+ try
1743
+ repeat with elem in UI elements of parent
1744
+ try
1745
+ if role of elem is "${safeRole}" then
1746
+ set pos to position of elem
1747
+ set sz to size of elem
1748
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
1749
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
1750
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
1751
+ end if
1752
+ end try
1753
+ end repeat
1754
+ end try
1755
+ end repeat
1756
+ repeat with parent in topElems
1757
+ try
1758
+ repeat with child in UI elements of parent
1759
+ try
1760
+ set childRole to role of child
1761
+ if childRole is not "AXTable" and childRole is not "AXOutline" and childRole is not "AXList" then
1762
+ repeat with gc in UI elements of child
1763
+ try
1764
+ if role of gc is "${safeRole}" then
1765
+ set pos to position of gc
1766
+ set sz to size of gc
1767
+ set cx to (item 1 of pos) + (item 1 of sz) / 2
1768
+ set cy to (item 2 of pos) + (item 2 of sz) / 2
1769
+ return ((cx as integer) as text) & "," & ((cy as integer) as text)
1770
+ end if
1771
+ end try
1772
+ end repeat
1773
+ end if
1774
+ end try
1775
+ end repeat
1776
+ end try
1777
+ end repeat
1778
+ end tell
1779
+ end tell
1780
+ return "NOT_FOUND"
1781
+ `;
1782
+ try {
1783
+ const { stdout } = await execa("osascript", ["-e", script], { timeout: 1e4 });
1784
+ const result = stdout.trim();
1785
+ if (result === "NOT_FOUND" || !result.includes(",")) return null;
1786
+ return result;
1787
+ } catch {
1788
+ return null;
1789
+ }
1790
+ }
1689
1791
  var DesktopTools = class {
1690
1792
  register(server) {
1691
1793
  server.tool(
@@ -1764,16 +1866,17 @@ var DesktopTools = class {
1764
1866
  server.tool(
1765
1867
  "desktop_click",
1766
1868
  [
1767
- "Click a UI element by text label (query), coordinates (coords), or element ID (on).",
1768
- "BEST: Use query with app name \u2014 auto-resolves to exact screen coords via accessibility API. No desktop_see needed.",
1769
- "GOOD: Use coords 'x,y' \u2014 MUST be from osascript position+size center calculation, NEVER from visual screenshot estimation.",
1770
- "Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots.",
1771
- "CAUTION: 'on' (element ID) has known offset bug \u2014 use coords or query instead when possible.",
1772
- "If query not found: use execute_command + osascript to get element position+size, then center = (x + w/2, y + h/2). Or try desktop_menu, desktop_hotkey.",
1773
- "NOTE: Web page elements (inside browser) are invisible to desktop tools. Use browser_* tools instead."
1869
+ "Click a UI element by text label (query), AX role (role), coordinates (coords), or element ID (on).",
1870
+ "BEST: Use query with app \u2014 auto-resolves coords via accessibility API (L1-L3 depth), then falls back to full tree search for deeply nested elements. No desktop_see needed.",
1871
+ "ROLE: Use role (e.g. 'AXTextArea', 'AXTextField') to find elements without visible text labels (input fields, text areas). Searches L1-L3 depth.",
1872
+ "GOOD: Use coords 'x,y' \u2014 MUST be from osascript center calculation (position + size/2). NEVER estimate from screenshots.",
1873
+ "CAUTION: 'on' (element ID) has known offset bug \u2014 use query, role, or coords instead.",
1874
+ "If not found: try desktop_menu for menu items, desktop_hotkey for shortcuts.",
1875
+ "NOTE: Web page elements (inside browser) are invisible \u2014 use browser_* tools instead."
1774
1876
  ].join("\n"),
1775
1877
  {
1776
- query: z5.string().optional().describe("Text/label to click (case-insensitive)"),
1878
+ query: z5.string().optional().describe("Text/label to click (case-insensitive). Works for deeply nested elements."),
1879
+ role: z5.string().optional().describe("AX role to find (e.g. 'AXTextArea', 'AXTextField', 'AXButton'). Use when element has no text label."),
1777
1880
  on: z5.string().optional().describe("Element ID from desktop_see (e.g. 'B1', 'T2')"),
1778
1881
  coords: z5.string().optional().describe("Screen coordinates 'x,y' (e.g. '500,300')"),
1779
1882
  app: z5.string().optional().describe("App name"),
@@ -1782,12 +1885,19 @@ var DesktopTools = class {
1782
1885
  rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)"),
1783
1886
  waitFor: z5.number().optional().describe("Max ms to wait for element to appear (default 5000)")
1784
1887
  },
1785
- async ({ query, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
1888
+ async ({ query, role, on, coords, app, snapshot, doubleClick, rightClick, waitFor }) => {
1786
1889
  checkBlacklist(app);
1787
- if (!query && !on && !coords) throw new Error("Provide query, on, or coords.");
1890
+ if (!query && !role && !on && !coords) throw new Error("Provide query, role, on, or coords.");
1788
1891
  const args = ["click"];
1789
1892
  if (coords) {
1790
1893
  args.push("--coords", coords);
1894
+ } else if (role) {
1895
+ const resolved = await findFirstByRole(role, app);
1896
+ if (resolved) {
1897
+ args.push("--coords", resolved);
1898
+ } else {
1899
+ throw new Error(`No element with role '${role}' found in ${app ?? "frontmost app"}.`);
1900
+ }
1791
1901
  } else if (query) {
1792
1902
  const resolved = await resolveElementCoords(query, app);
1793
1903
  if (resolved) {
@@ -1810,7 +1920,7 @@ var DesktopTools = class {
1810
1920
  "desktop_type",
1811
1921
  [
1812
1922
  "Type text via keyboard. Supports \\n (return), \\t (tab) escape sequences.",
1813
- "IMPORTANT: Focus the target field first (click it with desktop_click) before typing. Types at current keyboard focus.",
1923
+ "IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
1814
1924
  "For Korean/Japanese/Chinese/emoji, use desktop_paste instead (keyboard sim is ASCII only).",
1815
1925
  "Use clear=true to replace existing text (Cmd+A \u2192 Delete before typing)."
1816
1926
  ].join("\n"),
@@ -1835,7 +1945,7 @@ var DesktopTools = class {
1835
1945
  "desktop_paste",
1836
1946
  [
1837
1947
  "Paste via clipboard (Cmd+V). Atomic: saves clipboard \u2192 sets content \u2192 pastes \u2192 restores.",
1838
- "IMPORTANT: Focus the target field first (click it with desktop_click) before pasting.",
1948
+ "IMPORTANT: Focus the target field first \u2014 use desktop_click(query, app) for labeled fields, or desktop_click(role:'AXTextArea', app) for unlabeled input areas.",
1839
1949
  "Supports all Unicode (Korean, Japanese, Chinese, emoji). Use instead of desktop_type for non-ASCII.",
1840
1950
  "Can also paste file contents via filePath."
1841
1951
  ].join("\n"),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "junis",
3
- "version": "0.4.0",
3
+ "version": "0.4.1",
4
4
  "description": "One-line device control for AI agents",
5
5
  "type": "module",
6
6
  "bin": {