junis 0.3.10 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -456,7 +456,10 @@ var toolPermissions = {
456
456
  desktop_hotkey: "confirm",
457
457
  desktop_scroll: "confirm",
458
458
  desktop_menu: "confirm",
459
+ desktop_paste: "confirm",
459
460
  desktop_screenshot: "confirm",
461
+ desktop_open_app: "auto",
462
+ desktop_open_url: "auto",
460
463
  cron_create: "confirm",
461
464
  cron_delete: "confirm",
462
465
  edit_block: "confirm",
@@ -487,6 +490,8 @@ var FilesystemTools = class {
487
490
  "ROUTING:",
488
491
  "- Use for system commands, package managers (npm, pip, brew), git, build tools, and scripting.",
489
492
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
493
+ "- NOT for macOS app GUI interaction. When the user asks to interact with, control, or automate any application (clicking, typing, reading screen, navigating menus), use the desktop_* tools instead (desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_menu, desktop_screenshot).",
494
+ "- The ONLY exception: permission fix commands (swift -e for CGRequestScreenCaptureAccess/AXIsProcessTrustedWithOptions, peekaboo permissions, or open 'x-apple.systempreferences:...').",
490
495
  "",
491
496
  "BEHAVIOR:",
492
497
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
@@ -1626,7 +1631,12 @@ Cause: ${e.message}${hint}` }],
1626
1631
  "Start or stop screen recording. Captures the full screen as MP4 video.",
1627
1632
  "",
1628
1633
  "Use action='start' to begin, action='stop' to end and save. Only one recording can be active at a time.",
1629
- "Platform-specific: macOS (screencapture -v), Windows/Linux (ffmpeg)."
1634
+ "Platform-specific: macOS (screencapture -v), Windows/Linux (ffmpeg).",
1635
+ "",
1636
+ "PERMISSIONS (macOS): Screen Recording permission is needed (inherited from terminal app).",
1637
+ "If denied, run via execute_command: swift -e 'import CoreGraphics; CGRequestScreenCaptureAccess()'",
1638
+ "\u2192 macOS dialog appears. Ask user to click Allow, then retry.",
1639
+ "Fallback: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'"
1630
1640
  ].join("\n"),
1631
1641
  {
1632
1642
  action: z4.enum(["start", "stop"]).describe("'start': begin recording, 'stop': end recording and save the file"),
@@ -1716,10 +1726,28 @@ import { execFile as execFile2 } from "child_process";
1716
1726
  import { promisify as promisify4 } from "util";
1717
1727
  import { platform as platform2 } from "os";
1718
1728
  var execFileAsync2 = promisify4(execFile2);
1729
+ async function requestMacOSPermissions() {
1730
+ try {
1731
+ await execFileAsync2("swift", ["-e", `
1732
+ import CoreGraphics
1733
+ CGRequestScreenCaptureAccess()
1734
+ `], { timeout: 5e3 });
1735
+ } catch {
1736
+ }
1737
+ try {
1738
+ await execFileAsync2("swift", ["-e", `
1739
+ import ApplicationServices
1740
+ let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary
1741
+ AXIsProcessTrustedWithOptions(opts)
1742
+ `], { timeout: 5e3 });
1743
+ } catch {
1744
+ }
1745
+ }
1719
1746
  async function ensurePeekaboo() {
1720
1747
  if (platform2() !== "darwin") return false;
1721
1748
  try {
1722
1749
  await execFileAsync2("which", ["peekaboo"]);
1750
+ await requestMacOSPermissions();
1723
1751
  return true;
1724
1752
  } catch {
1725
1753
  console.log("\u23F3 peekaboo not found, installing via brew...");
@@ -1727,6 +1755,7 @@ async function ensurePeekaboo() {
1727
1755
  await execFileAsync2("brew", ["tap", "steipete/tap"], { timeout: 3e4 });
1728
1756
  await execFileAsync2("brew", ["install", "peekaboo"], { timeout: 12e4 });
1729
1757
  console.log("\u2705 peekaboo installed");
1758
+ await requestMacOSPermissions();
1730
1759
  return true;
1731
1760
  } catch (brewErr) {
1732
1761
  console.warn("\u26A0\uFE0F peekaboo install failed:", brewErr.message);
@@ -1751,11 +1780,18 @@ var consecutiveFailures = 0;
1751
1780
  var MAX_CONSECUTIVE_FAILURES = 2;
1752
1781
  var PERM_FIX_HINT = [
1753
1782
  "\n\n\u{1F527} PERMISSION FIX \u2014 run these via execute_command:",
1754
- "1. Check status: peekaboo permissions --json-output",
1755
- "2. Screen Recording: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1756
- "3. Accessibility: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility'",
1757
- "Toggle ON for 'peekaboo' in the opened panel, then retry."
1783
+ "1. Check: peekaboo permissions --json-output",
1784
+ "2. Screen Recording: swift -e 'import CoreGraphics; CGRequestScreenCaptureAccess()'",
1785
+ "3. Accessibility: swift -e 'import ApplicationServices; let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary; AXIsProcessTrustedWithOptions(opts)'",
1786
+ "\u2192 macOS system dialogs appear. Ask user to click Allow, then retry.",
1787
+ "NOTE: peekaboo inherits permissions from the terminal app \u2014 do NOT look for 'peekaboo' in System Preferences.",
1788
+ "Fallback (if Swift fails): open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1789
+ " open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility'"
1758
1790
  ].join("\n");
1791
+ function isPermissionError(msg) {
1792
+ const lower = msg.toLowerCase();
1793
+ return lower.includes("permission") || lower.includes("accessibility") || lower.includes("screen recording") || lower.includes("not trusted") || lower.includes("not allowed") || lower.includes("denied");
1794
+ }
1759
1795
  async function peekaboo(args) {
1760
1796
  try {
1761
1797
  const { stdout } = await execa("peekaboo", [...args, "--json-output"]);
@@ -1763,14 +1799,13 @@ async function peekaboo(args) {
1763
1799
  return JSON.parse(stdout);
1764
1800
  } catch (err) {
1765
1801
  consecutiveFailures++;
1766
- const msg = err.message?.toLowerCase() ?? "";
1767
- const isPermError = msg.includes("permission") || msg.includes("accessibility") || msg.includes("screen recording") || msg.includes("not trusted") || msg.includes("not allowed") || msg.includes("denied");
1768
- const hint = isPermError ? PERM_FIX_HINT : "";
1802
+ const msg = err.message ?? "";
1803
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1769
1804
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1770
1805
  consecutiveFailures = 0;
1771
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}${hint}`);
1806
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1772
1807
  }
1773
- throw new Error(`${err.message}${hint}`);
1808
+ throw new Error(`${msg}${hint}`);
1774
1809
  }
1775
1810
  }
1776
1811
  function checkBlacklist(app) {
@@ -1785,25 +1820,38 @@ var DesktopTools = class {
1785
1820
  [
1786
1821
  "Capture the macOS Accessibility Tree snapshot for a running application. Returns a structured element list with IDs, roles, labels, and positions.",
1787
1822
  "",
1788
- "WORKFLOW: List running apps \u2192 capture accessibility tree \u2192 find target element by role/label \u2192 interact using element ID or label (click, type, scroll).",
1789
- "Pass the returned snapshotId to subsequent interaction calls for 240x speed improvement (cached lookup vs. full re-scan).",
1823
+ "WHEN TO USE DESKTOP TOOLS:",
1824
+ "When the user asks to interact with, control, or automate ANY macOS application \u2014 use desktop_* tools, NOT execute_command.",
1825
+ "Workflow: desktop_open_app \u2192 desktop_see \u2192 desktop_click/type/paste \u2192 verify with desktop_see or desktop_screenshot.",
1826
+ "",
1827
+ "WORKFLOW TIPS:",
1828
+ "- If accessibility tree times out (complex UI apps like KakaoTalk): increase timeout parameter, or fall back to:",
1829
+ " desktop_screenshot \u2192 desktop_list_windows (get window bounds x,y,w,h) \u2192 calculate coordinates \u2192 desktop_click with coords parameter.",
1830
+ "- For Korean/Japanese/Chinese text input: always use desktop_paste (NOT desktop_type).",
1831
+ "- For multi-window apps: use desktop_list_windows to find specific windows.",
1832
+ "- Pass snapshotId to subsequent calls for 240x speed improvement.",
1833
+ "- Double-click to open items (e.g. chat windows in KakaoTalk): use desktop_click with doubleClick=true.",
1790
1834
  "",
1791
- "PERMISSIONS: Desktop tools require macOS Accessibility + Screen Recording permissions for 'peekaboo'.",
1792
- "If a tool fails with permission error, use execute_command to:",
1835
+ "PERMISSIONS: Requires Accessibility + Screen Recording.",
1836
+ "peekaboo inherits permissions from the parent terminal app \u2014 it does NOT need its own entry in System Preferences.",
1837
+ "If denied, fix via execute_command:",
1793
1838
  " 1. peekaboo permissions --json-output (check which are missing)",
1794
- " 2. open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility'",
1795
- " 3. open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1796
- "Ask the user to toggle ON for 'peekaboo', then retry.",
1839
+ " 2. Screen Recording: swift -e 'import CoreGraphics; CGRequestScreenCaptureAccess()'",
1840
+ " 3. Accessibility: swift -e 'import ApplicationServices; let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary; AXIsProcessTrustedWithOptions(opts)'",
1841
+ " \u2192 macOS system dialogs appear. Ask user to click Allow, then retry.",
1842
+ " Fallback: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1797
1843
  "",
1798
- "SAFETY: Terminal, iTerm, and Finder are blocked. Two consecutive failures trigger an automatic safety stop."
1844
+ "SAFETY: Terminal, iTerm, and Finder are blocked. Two consecutive failures trigger automatic safety stop."
1799
1845
  ].join("\n"),
1800
1846
  {
1801
- app: z5.string().optional().describe("App name to target (e.g. 'Safari', 'Notes', 'Google Chrome'). Omit for the frontmost app.")
1847
+ app: z5.string().optional().describe("App name to target (e.g. 'Safari', 'Notes', 'Google Chrome'). Omit for the frontmost app."),
1848
+ timeout: z5.number().optional().describe("Timeout in seconds (default: 20). Increase for complex UI apps. If it still times out, fall back to desktop_screenshot + coordinate-based desktop_click.")
1802
1849
  },
1803
- async ({ app }) => {
1850
+ async ({ app, timeout }) => {
1804
1851
  checkBlacklist(app);
1805
1852
  const args = ["see"];
1806
1853
  if (app) args.push("--app", app);
1854
+ if (timeout) args.push("--timeout-seconds", String(timeout));
1807
1855
  const result = await peekaboo(args);
1808
1856
  const data = result.data;
1809
1857
  const snapshotId = data?.snapshot_id ?? result.snapshotId ?? result.snapshot_id;
@@ -1824,25 +1872,48 @@ var DesktopTools = class {
1824
1872
  server.tool(
1825
1873
  "desktop_click",
1826
1874
  [
1827
- "Click a macOS UI element by its accessibility label, ID, or x,y coordinates.",
1875
+ "Click a macOS UI element by text query, element ID, or x,y coordinates.",
1876
+ "",
1877
+ "PARAMETER GUIDE:",
1878
+ "- query: Text/label to search for (e.g. 'Save', 'Submit'). Searches visible UI elements.",
1879
+ "- on: Element ID from a previous desktop_see snapshot (e.g. 'B1', 'T2'). Fastest with snapshotId.",
1880
+ "- coords: Click at exact screen coordinates as 'x,y' (e.g. '1070,188'). Use when accessibility tree times out.",
1828
1881
  "",
1829
- "The 'on' parameter accepts: element label text (e.g. 'Save'), accessibility ID from a previous accessibility tree capture, or coordinates as 'x,y' string.",
1830
- "For faster interaction, pass the snapshotId from a recent accessibility tree capture.",
1882
+ "PROVEN WORKFLOW (from KakaoTalk automation):",
1883
+ "1. Try desktop_see first to get element IDs \u2192 click with 'on' parameter.",
1884
+ "2. If desktop_see times out: use desktop_screenshot \u2192 calculate coordinates \u2192 click with 'coords'.",
1885
+ "3. Use desktop_list_windows to get window bounds (x,y,w,h) for coordinate calculation.",
1886
+ "",
1887
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app).",
1831
1888
  "",
1832
1889
  "SAFETY: Terminal, iTerm, and Finder are blocked. Two consecutive failures trigger automatic safety stop."
1833
1890
  ].join("\n"),
1834
1891
  {
1835
- on: z5.string().describe("Element label, accessibility ID, or 'x,y' coordinates to click"),
1836
- app: z5.string().optional().describe("App name to target (e.g. 'Safari')"),
1837
- snapshot: z5.string().optional().describe("snapshotId from a previous accessibility tree capture for cached interaction (240x faster)"),
1838
- doubleClick: z5.boolean().optional().default(false).describe("Double-click instead of single click")
1892
+ query: z5.string().optional().describe("Text/label to search and click (e.g. 'Save', 'Submit Button')"),
1893
+ on: z5.string().optional().describe("Element ID from desktop_see snapshot (e.g. 'B1', 'T2')"),
1894
+ coords: z5.string().optional().describe("Screen coordinates as 'x,y' (e.g. '1070,188'). Use when accessibility tree is unavailable."),
1895
+ app: z5.string().optional().describe("App name to target (e.g. 'Safari', 'KakaoTalk')"),
1896
+ snapshot: z5.string().optional().describe("snapshotId from desktop_see for cached interaction (240x faster)"),
1897
+ doubleClick: z5.boolean().optional().default(false).describe("Double-click instead of single click (e.g. open files, open chat windows)"),
1898
+ rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)")
1839
1899
  },
1840
- async ({ on, app, snapshot, doubleClick }) => {
1900
+ async ({ query, on, coords, app, snapshot, doubleClick, rightClick }) => {
1841
1901
  checkBlacklist(app);
1842
- const args = ["click", "--on", on];
1902
+ if (!query && !on && !coords) {
1903
+ throw new Error("Provide at least one of: query (text search), on (element ID), or coords ('x,y').");
1904
+ }
1905
+ const args = ["click"];
1906
+ if (coords) {
1907
+ args.push("--coords", coords);
1908
+ } else if (on) {
1909
+ args.push("--on", on);
1910
+ } else if (query) {
1911
+ args.push(query);
1912
+ }
1843
1913
  if (app) args.push("--app", app);
1844
1914
  if (snapshot) args.push("--snapshot", snapshot);
1845
- if (doubleClick) args.push("--double-click");
1915
+ if (doubleClick) args.push("--double");
1916
+ if (rightClick) args.push("--right");
1846
1917
  const result = await peekaboo(args);
1847
1918
  return {
1848
1919
  content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
@@ -1852,20 +1923,27 @@ var DesktopTools = class {
1852
1923
  server.tool(
1853
1924
  "desktop_type",
1854
1925
  [
1855
- "Type text into the currently focused UI element on macOS. The text is sent as keyboard input character-by-character.",
1926
+ "Type text into the currently focused UI element on macOS via keyboard simulation.",
1927
+ "",
1928
+ "IMPORTANT: For Korean/Japanese/Chinese/emoji text, use desktop_paste instead \u2014 keyboard simulation does not support CJK.",
1929
+ "Always click the target input field first (via desktop_click) before typing.",
1856
1930
  "",
1857
- "IMPORTANT: Always capture the accessibility tree first to verify the correct element is focused before typing.",
1931
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app).",
1858
1932
  "",
1859
1933
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1860
1934
  ].join("\n"),
1861
1935
  {
1862
- text: z5.string().describe("Text to type into the focused element"),
1863
- app: z5.string().optional().describe("App name to focus before typing")
1936
+ text: z5.string().describe("Text to type (ASCII only \u2014 for CJK/emoji use desktop_paste)"),
1937
+ app: z5.string().optional().describe("App name to focus before typing"),
1938
+ pressReturn: z5.boolean().optional().default(false).describe("Press Return/Enter after typing (e.g. to send a message or submit a form)"),
1939
+ clear: z5.boolean().optional().default(false).describe("Clear the field before typing (Cmd+A, Delete)")
1864
1940
  },
1865
- async ({ text, app }) => {
1941
+ async ({ text, app, pressReturn, clear }) => {
1866
1942
  checkBlacklist(app);
1867
1943
  const args = ["type", text];
1868
1944
  if (app) args.push("--app", app);
1945
+ if (clear) args.push("--clear");
1946
+ if (pressReturn) args.push("--return");
1869
1947
  const result = await peekaboo(args);
1870
1948
  return {
1871
1949
  content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
@@ -1879,6 +1957,9 @@ var DesktopTools = class {
1879
1957
  "",
1880
1958
  "Common shortcuts: 'cmd,c' (copy), 'cmd,v' (paste), 'cmd,z' (undo), 'cmd,s' (save), 'cmd,w' (close tab), 'cmd,q' (quit), 'cmd,shift,t' (reopen tab), 'cmd,tab' (switch app).",
1881
1959
  "",
1960
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app, not peekaboo itself).",
1961
+ "Fix if denied via execute_command: swift -e 'import ApplicationServices; let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary; AXIsProcessTrustedWithOptions(opts)'",
1962
+ "",
1882
1963
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1883
1964
  ].join("\n"),
1884
1965
  {
@@ -1902,6 +1983,9 @@ var DesktopTools = class {
1902
1983
  "",
1903
1984
  "Use 'ticks' to control scroll distance (default: 3, higher = more scrolling). Can target a specific element by label or ID from a previous accessibility tree capture.",
1904
1985
  "",
1986
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app, not peekaboo itself).",
1987
+ "Fix if denied via execute_command: swift -e 'import ApplicationServices; let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary; AXIsProcessTrustedWithOptions(opts)'",
1988
+ "",
1905
1989
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1906
1990
  ].join("\n"),
1907
1991
  {
@@ -1939,11 +2023,13 @@ var DesktopTools = class {
1939
2023
  };
1940
2024
  } catch (err) {
1941
2025
  consecutiveFailures++;
2026
+ const msg = err.message ?? "";
2027
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1942
2028
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1943
2029
  consecutiveFailures = 0;
1944
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
2030
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1945
2031
  }
1946
- throw err;
2032
+ throw new Error(`${msg}${hint}`);
1947
2033
  }
1948
2034
  }
1949
2035
  );
@@ -1953,7 +2039,10 @@ var DesktopTools = class {
1953
2039
  "List all open windows on macOS, optionally filtered by app name. Returns window titles and metadata.",
1954
2040
  "",
1955
2041
  "If no app is specified, lists windows for the frontmost application.",
1956
- "Use this after identifying running apps to find specific windows before capturing the accessibility tree or taking a screenshot."
2042
+ "Use this after identifying running apps to find specific windows before capturing the accessibility tree or taking a screenshot.",
2043
+ "",
2044
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app, not peekaboo itself).",
2045
+ "Fix if denied via execute_command: swift -e 'import ApplicationServices; let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary; AXIsProcessTrustedWithOptions(opts)'"
1957
2046
  ].join("\n"),
1958
2047
  {
1959
2048
  app: z5.string().optional().describe("Filter by app name. Omit to query the frontmost app.")
@@ -1977,32 +2066,54 @@ var DesktopTools = class {
1977
2066
  };
1978
2067
  } catch (err) {
1979
2068
  consecutiveFailures++;
2069
+ const msg = err.message ?? "";
2070
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1980
2071
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1981
2072
  consecutiveFailures = 0;
1982
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
2073
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1983
2074
  }
1984
- throw err;
2075
+ throw new Error(`${msg}${hint}`);
1985
2076
  }
1986
2077
  }
1987
2078
  );
1988
2079
  server.tool(
1989
2080
  "desktop_screenshot",
1990
2081
  [
1991
- "Take a high-quality macOS screenshot (Retina display support). Returns base64 image data.",
2082
+ "Take a high-quality macOS screenshot. Returns base64 image data.",
2083
+ "",
2084
+ "MODES:",
2085
+ "- 'screen': full display capture (default). Use screenIndex for multi-monitor setups.",
2086
+ "- 'window': specific app window. Specify with app, windowTitle, or windowIndex.",
2087
+ "- 'frontmost': capture only the frontmost window.",
2088
+ "- 'auto': peekaboo chooses the best mode automatically.",
2089
+ "",
2090
+ "TARGETING SPECIFIC WINDOWS:",
2091
+ "- app: capture by app name (e.g. 'Safari', 'KakaoTalk')",
2092
+ "- windowTitle: capture a specific window by title (partial match supported)",
2093
+ "- windowIndex: capture by window z-order (0 = frontmost window of the app)",
2094
+ "- screenIndex: which display to capture in 'screen' mode (0-based, for multi-monitor)",
1992
2095
  "",
1993
- "MODES: 'screen' captures the full display, 'window' captures a specific app window.",
1994
2096
  "TIP: Prefer the accessibility tree for understanding UI structure \u2014 use screenshots only when visual appearance matters (layouts, images, colors).",
1995
2097
  "",
2098
+ "PERMISSIONS: Requires Screen Recording (inherited from terminal app, not peekaboo itself).",
2099
+ "Fix if denied via execute_command: swift -e 'import CoreGraphics; CGRequestScreenCaptureAccess()'",
2100
+ "",
1996
2101
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1997
2102
  ].join("\n"),
1998
2103
  {
1999
- app: z5.string().optional().describe("Capture a specific app's window (by name)"),
2000
- mode: z5.enum(["screen", "window"]).optional().default("screen").describe("'screen': full display capture, 'window': specific app window only")
2104
+ app: z5.string().optional().describe("Capture a specific app's window (by name, e.g. 'Safari', 'KakaoTalk')"),
2105
+ mode: z5.enum(["screen", "window", "frontmost", "auto"]).optional().default("screen").describe("'screen': full display, 'window': specific app window, 'frontmost': frontmost window, 'auto': peekaboo decides"),
2106
+ windowTitle: z5.string().optional().describe("Capture window by title (partial match). Use with mode='window'."),
2107
+ windowIndex: z5.number().optional().describe("Window z-order index (0 = frontmost window of the app). Use with mode='window'."),
2108
+ screenIndex: z5.number().optional().describe("Display index for multi-monitor (0-based). Use with mode='screen'.")
2001
2109
  },
2002
- async ({ app, mode }) => {
2110
+ async ({ app, mode, windowTitle, windowIndex, screenIndex }) => {
2003
2111
  checkBlacklist(app);
2004
2112
  const args = ["image", "--mode", mode];
2005
2113
  if (app) args.push("--app", app);
2114
+ if (windowTitle) args.push("--window-title", windowTitle);
2115
+ if (windowIndex !== void 0) args.push("--window-index", String(windowIndex));
2116
+ if (screenIndex !== void 0) args.push("--screen-index", String(screenIndex));
2006
2117
  const result = await peekaboo(args);
2007
2118
  const data = result.data;
2008
2119
  const files = data?.files;
@@ -2030,6 +2141,9 @@ var DesktopTools = class {
2030
2141
  "Examples: ['File', 'New Tab'], ['Edit', 'Find', 'Find...'], ['View', 'Enter Full Screen'].",
2031
2142
  "Omit the 'app' parameter to target the frontmost app. The target app must be running.",
2032
2143
  "",
2144
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app, not peekaboo itself).",
2145
+ "Fix if denied via execute_command: swift -e 'import ApplicationServices; let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary; AXIsProcessTrustedWithOptions(opts)'",
2146
+ "",
2033
2147
  "SAFETY: Terminal, iTerm, and Finder are blocked."
2034
2148
  ].join("\n"),
2035
2149
  {
@@ -2048,14 +2162,88 @@ var DesktopTools = class {
2048
2162
  };
2049
2163
  } catch (err) {
2050
2164
  consecutiveFailures++;
2165
+ const msg = err.message ?? "";
2166
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
2051
2167
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
2052
2168
  consecutiveFailures = 0;
2053
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
2169
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
2054
2170
  }
2055
- throw err;
2171
+ throw new Error(`${msg}${hint}`);
2056
2172
  }
2057
2173
  }
2058
2174
  );
2175
+ server.tool(
2176
+ "desktop_paste",
2177
+ [
2178
+ "Paste text via clipboard into the focused element. Automatically sets clipboard, pastes (Cmd+V), then restores previous clipboard.",
2179
+ "",
2180
+ "ALWAYS USE THIS instead of desktop_type for: Korean, Japanese, Chinese, emoji, or any non-ASCII text.",
2181
+ "Unlike desktop_type (keyboard simulation), this uses the system clipboard \u2014 works with ALL character sets.",
2182
+ "",
2183
+ `PROVEN: In KakaoTalk automation, 'peekaboo paste "\uC548\uB155?"' successfully sent Korean text while 'type' would have failed.`,
2184
+ "",
2185
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app).",
2186
+ "",
2187
+ "SAFETY: Terminal, iTerm, and Finder are blocked."
2188
+ ].join("\n"),
2189
+ {
2190
+ text: z5.string().describe("Text to paste (supports Korean, Japanese, Chinese, emoji, any Unicode)"),
2191
+ app: z5.string().optional().describe("App name to focus before pasting")
2192
+ },
2193
+ async ({ text, app }) => {
2194
+ checkBlacklist(app);
2195
+ const args = ["paste", text];
2196
+ if (app) args.push("--app", app);
2197
+ const result = await peekaboo(args);
2198
+ return {
2199
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
2200
+ };
2201
+ }
2202
+ );
2203
+ server.tool(
2204
+ "desktop_open_app",
2205
+ [
2206
+ "Launch or bring to front a macOS application. Use this as the FIRST STEP when automating any app.",
2207
+ "",
2208
+ "PROVEN WORKFLOW (from KakaoTalk automation):",
2209
+ "1. desktop_open_app \u2192 2. desktop_list_apps (verify) \u2192 3. desktop_see or desktop_screenshot \u2192 4. interact",
2210
+ "",
2211
+ "After launching, use desktop_list_apps to confirm the app is running, then desktop_see to capture UI.",
2212
+ "",
2213
+ "SAFETY: Terminal, iTerm, and Finder are blocked for automation safety."
2214
+ ].join("\n"),
2215
+ {
2216
+ app: z5.string().describe("Application name to launch (e.g. 'Safari', 'Notes', 'KakaoTalk', 'Google Chrome')")
2217
+ },
2218
+ async ({ app }) => {
2219
+ checkBlacklist(app);
2220
+ const args = ["app", "launch", app, "--wait-until-ready"];
2221
+ const result = await peekaboo(args);
2222
+ return {
2223
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
2224
+ };
2225
+ }
2226
+ );
2227
+ server.tool(
2228
+ "desktop_open_url",
2229
+ [
2230
+ "Open a URL or file with its default (or specified) application.",
2231
+ "",
2232
+ "Examples: 'https://google.com', '~/Documents/report.pdf', 'x-apple.systempreferences:...'"
2233
+ ].join("\n"),
2234
+ {
2235
+ url: z5.string().describe("URL or file path to open"),
2236
+ app: z5.string().optional().describe("Specific app to open with (e.g. 'Google Chrome', 'Preview')")
2237
+ },
2238
+ async ({ url, app }) => {
2239
+ const args = ["open", url];
2240
+ if (app) args.push("--app", app);
2241
+ const result = await peekaboo(args);
2242
+ return {
2243
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
2244
+ };
2245
+ }
2246
+ );
2059
2247
  }
2060
2248
  };
2061
2249
 
@@ -42,7 +42,10 @@ var toolPermissions = {
42
42
  desktop_hotkey: "confirm",
43
43
  desktop_scroll: "confirm",
44
44
  desktop_menu: "confirm",
45
+ desktop_paste: "confirm",
45
46
  desktop_screenshot: "confirm",
47
+ desktop_open_app: "auto",
48
+ desktop_open_url: "auto",
46
49
  cron_create: "confirm",
47
50
  cron_delete: "confirm",
48
51
  edit_block: "confirm",
@@ -73,6 +76,8 @@ var FilesystemTools = class {
73
76
  "ROUTING:",
74
77
  "- Use for system commands, package managers (npm, pip, brew), git, build tools, and scripting.",
75
78
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
79
+ "- NOT for macOS app GUI interaction. When the user asks to interact with, control, or automate any application (clicking, typing, reading screen, navigating menus), use the desktop_* tools instead (desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_menu, desktop_screenshot).",
80
+ "- The ONLY exception: permission fix commands (swift -e for CGRequestScreenCaptureAccess/AXIsProcessTrustedWithOptions, peekaboo permissions, or open 'x-apple.systempreferences:...').",
76
81
  "",
77
82
  "BEHAVIOR:",
78
83
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
@@ -1212,7 +1217,12 @@ Cause: ${e.message}${hint}` }],
1212
1217
  "Start or stop screen recording. Captures the full screen as MP4 video.",
1213
1218
  "",
1214
1219
  "Use action='start' to begin, action='stop' to end and save. Only one recording can be active at a time.",
1215
- "Platform-specific: macOS (screencapture -v), Windows/Linux (ffmpeg)."
1220
+ "Platform-specific: macOS (screencapture -v), Windows/Linux (ffmpeg).",
1221
+ "",
1222
+ "PERMISSIONS (macOS): Screen Recording permission is needed (inherited from terminal app).",
1223
+ "If denied, run via execute_command: swift -e 'import CoreGraphics; CGRequestScreenCaptureAccess()'",
1224
+ "\u2192 macOS dialog appears. Ask user to click Allow, then retry.",
1225
+ "Fallback: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'"
1216
1226
  ].join("\n"),
1217
1227
  {
1218
1228
  action: z4.enum(["start", "stop"]).describe("'start': begin recording, 'stop': end recording and save the file"),
@@ -1302,10 +1312,28 @@ import { execFile as execFile2 } from "child_process";
1302
1312
  import { promisify as promisify4 } from "util";
1303
1313
  import { platform as platform2 } from "os";
1304
1314
  var execFileAsync2 = promisify4(execFile2);
1315
+ async function requestMacOSPermissions() {
1316
+ try {
1317
+ await execFileAsync2("swift", ["-e", `
1318
+ import CoreGraphics
1319
+ CGRequestScreenCaptureAccess()
1320
+ `], { timeout: 5e3 });
1321
+ } catch {
1322
+ }
1323
+ try {
1324
+ await execFileAsync2("swift", ["-e", `
1325
+ import ApplicationServices
1326
+ let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary
1327
+ AXIsProcessTrustedWithOptions(opts)
1328
+ `], { timeout: 5e3 });
1329
+ } catch {
1330
+ }
1331
+ }
1305
1332
  async function ensurePeekaboo() {
1306
1333
  if (platform2() !== "darwin") return false;
1307
1334
  try {
1308
1335
  await execFileAsync2("which", ["peekaboo"]);
1336
+ await requestMacOSPermissions();
1309
1337
  return true;
1310
1338
  } catch {
1311
1339
  console.log("\u23F3 peekaboo not found, installing via brew...");
@@ -1313,6 +1341,7 @@ async function ensurePeekaboo() {
1313
1341
  await execFileAsync2("brew", ["tap", "steipete/tap"], { timeout: 3e4 });
1314
1342
  await execFileAsync2("brew", ["install", "peekaboo"], { timeout: 12e4 });
1315
1343
  console.log("\u2705 peekaboo installed");
1344
+ await requestMacOSPermissions();
1316
1345
  return true;
1317
1346
  } catch (brewErr) {
1318
1347
  console.warn("\u26A0\uFE0F peekaboo install failed:", brewErr.message);
@@ -1337,11 +1366,18 @@ var consecutiveFailures = 0;
1337
1366
  var MAX_CONSECUTIVE_FAILURES = 2;
1338
1367
  var PERM_FIX_HINT = [
1339
1368
  "\n\n\u{1F527} PERMISSION FIX \u2014 run these via execute_command:",
1340
- "1. Check status: peekaboo permissions --json-output",
1341
- "2. Screen Recording: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1342
- "3. Accessibility: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility'",
1343
- "Toggle ON for 'peekaboo' in the opened panel, then retry."
1369
+ "1. Check: peekaboo permissions --json-output",
1370
+ "2. Screen Recording: swift -e 'import CoreGraphics; CGRequestScreenCaptureAccess()'",
1371
+ "3. Accessibility: swift -e 'import ApplicationServices; let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary; AXIsProcessTrustedWithOptions(opts)'",
1372
+ "\u2192 macOS system dialogs appear. Ask user to click Allow, then retry.",
1373
+ "NOTE: peekaboo inherits permissions from the terminal app \u2014 do NOT look for 'peekaboo' in System Preferences.",
1374
+ "Fallback (if Swift fails): open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1375
+ " open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility'"
1344
1376
  ].join("\n");
1377
+ function isPermissionError(msg) {
1378
+ const lower = msg.toLowerCase();
1379
+ return lower.includes("permission") || lower.includes("accessibility") || lower.includes("screen recording") || lower.includes("not trusted") || lower.includes("not allowed") || lower.includes("denied");
1380
+ }
1345
1381
  async function peekaboo(args) {
1346
1382
  try {
1347
1383
  const { stdout } = await execa("peekaboo", [...args, "--json-output"]);
@@ -1349,14 +1385,13 @@ async function peekaboo(args) {
1349
1385
  return JSON.parse(stdout);
1350
1386
  } catch (err) {
1351
1387
  consecutiveFailures++;
1352
- const msg = err.message?.toLowerCase() ?? "";
1353
- const isPermError = msg.includes("permission") || msg.includes("accessibility") || msg.includes("screen recording") || msg.includes("not trusted") || msg.includes("not allowed") || msg.includes("denied");
1354
- const hint = isPermError ? PERM_FIX_HINT : "";
1388
+ const msg = err.message ?? "";
1389
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1355
1390
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1356
1391
  consecutiveFailures = 0;
1357
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}${hint}`);
1392
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1358
1393
  }
1359
- throw new Error(`${err.message}${hint}`);
1394
+ throw new Error(`${msg}${hint}`);
1360
1395
  }
1361
1396
  }
1362
1397
  function checkBlacklist(app) {
@@ -1371,25 +1406,38 @@ var DesktopTools = class {
1371
1406
  [
1372
1407
  "Capture the macOS Accessibility Tree snapshot for a running application. Returns a structured element list with IDs, roles, labels, and positions.",
1373
1408
  "",
1374
- "WORKFLOW: List running apps \u2192 capture accessibility tree \u2192 find target element by role/label \u2192 interact using element ID or label (click, type, scroll).",
1375
- "Pass the returned snapshotId to subsequent interaction calls for 240x speed improvement (cached lookup vs. full re-scan).",
1409
+ "WHEN TO USE DESKTOP TOOLS:",
1410
+ "When the user asks to interact with, control, or automate ANY macOS application \u2014 use desktop_* tools, NOT execute_command.",
1411
+ "Workflow: desktop_open_app \u2192 desktop_see \u2192 desktop_click/type/paste \u2192 verify with desktop_see or desktop_screenshot.",
1412
+ "",
1413
+ "WORKFLOW TIPS:",
1414
+ "- If accessibility tree times out (complex UI apps like KakaoTalk): increase timeout parameter, or fall back to:",
1415
+ " desktop_screenshot \u2192 desktop_list_windows (get window bounds x,y,w,h) \u2192 calculate coordinates \u2192 desktop_click with coords parameter.",
1416
+ "- For Korean/Japanese/Chinese text input: always use desktop_paste (NOT desktop_type).",
1417
+ "- For multi-window apps: use desktop_list_windows to find specific windows.",
1418
+ "- Pass snapshotId to subsequent calls for 240x speed improvement.",
1419
+ "- Double-click to open items (e.g. chat windows in KakaoTalk): use desktop_click with doubleClick=true.",
1376
1420
  "",
1377
- "PERMISSIONS: Desktop tools require macOS Accessibility + Screen Recording permissions for 'peekaboo'.",
1378
- "If a tool fails with permission error, use execute_command to:",
1421
+ "PERMISSIONS: Requires Accessibility + Screen Recording.",
1422
+ "peekaboo inherits permissions from the parent terminal app \u2014 it does NOT need its own entry in System Preferences.",
1423
+ "If denied, fix via execute_command:",
1379
1424
  " 1. peekaboo permissions --json-output (check which are missing)",
1380
- " 2. open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility'",
1381
- " 3. open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1382
- "Ask the user to toggle ON for 'peekaboo', then retry.",
1425
+ " 2. Screen Recording: swift -e 'import CoreGraphics; CGRequestScreenCaptureAccess()'",
1426
+ " 3. Accessibility: swift -e 'import ApplicationServices; let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary; AXIsProcessTrustedWithOptions(opts)'",
1427
+ " \u2192 macOS system dialogs appear. Ask user to click Allow, then retry.",
1428
+ " Fallback: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1383
1429
  "",
1384
- "SAFETY: Terminal, iTerm, and Finder are blocked. Two consecutive failures trigger an automatic safety stop."
1430
+ "SAFETY: Terminal, iTerm, and Finder are blocked. Two consecutive failures trigger automatic safety stop."
1385
1431
  ].join("\n"),
1386
1432
  {
1387
- app: z5.string().optional().describe("App name to target (e.g. 'Safari', 'Notes', 'Google Chrome'). Omit for the frontmost app.")
1433
+ app: z5.string().optional().describe("App name to target (e.g. 'Safari', 'Notes', 'Google Chrome'). Omit for the frontmost app."),
1434
+ timeout: z5.number().optional().describe("Timeout in seconds (default: 20). Increase for complex UI apps. If it still times out, fall back to desktop_screenshot + coordinate-based desktop_click.")
1388
1435
  },
1389
- async ({ app }) => {
1436
+ async ({ app, timeout }) => {
1390
1437
  checkBlacklist(app);
1391
1438
  const args = ["see"];
1392
1439
  if (app) args.push("--app", app);
1440
+ if (timeout) args.push("--timeout-seconds", String(timeout));
1393
1441
  const result = await peekaboo(args);
1394
1442
  const data = result.data;
1395
1443
  const snapshotId = data?.snapshot_id ?? result.snapshotId ?? result.snapshot_id;
@@ -1410,25 +1458,48 @@ var DesktopTools = class {
1410
1458
  server.tool(
1411
1459
  "desktop_click",
1412
1460
  [
1413
- "Click a macOS UI element by its accessibility label, ID, or x,y coordinates.",
1461
+ "Click a macOS UI element by text query, element ID, or x,y coordinates.",
1462
+ "",
1463
+ "PARAMETER GUIDE:",
1464
+ "- query: Text/label to search for (e.g. 'Save', 'Submit'). Searches visible UI elements.",
1465
+ "- on: Element ID from a previous desktop_see snapshot (e.g. 'B1', 'T2'). Fastest with snapshotId.",
1466
+ "- coords: Click at exact screen coordinates as 'x,y' (e.g. '1070,188'). Use when accessibility tree times out.",
1414
1467
  "",
1415
- "The 'on' parameter accepts: element label text (e.g. 'Save'), accessibility ID from a previous accessibility tree capture, or coordinates as 'x,y' string.",
1416
- "For faster interaction, pass the snapshotId from a recent accessibility tree capture.",
1468
+ "PROVEN WORKFLOW (from KakaoTalk automation):",
1469
+ "1. Try desktop_see first to get element IDs \u2192 click with 'on' parameter.",
1470
+ "2. If desktop_see times out: use desktop_screenshot \u2192 calculate coordinates \u2192 click with 'coords'.",
1471
+ "3. Use desktop_list_windows to get window bounds (x,y,w,h) for coordinate calculation.",
1472
+ "",
1473
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app).",
1417
1474
  "",
1418
1475
  "SAFETY: Terminal, iTerm, and Finder are blocked. Two consecutive failures trigger automatic safety stop."
1419
1476
  ].join("\n"),
1420
1477
  {
1421
- on: z5.string().describe("Element label, accessibility ID, or 'x,y' coordinates to click"),
1422
- app: z5.string().optional().describe("App name to target (e.g. 'Safari')"),
1423
- snapshot: z5.string().optional().describe("snapshotId from a previous accessibility tree capture for cached interaction (240x faster)"),
1424
- doubleClick: z5.boolean().optional().default(false).describe("Double-click instead of single click")
1478
+ query: z5.string().optional().describe("Text/label to search and click (e.g. 'Save', 'Submit Button')"),
1479
+ on: z5.string().optional().describe("Element ID from desktop_see snapshot (e.g. 'B1', 'T2')"),
1480
+ coords: z5.string().optional().describe("Screen coordinates as 'x,y' (e.g. '1070,188'). Use when accessibility tree is unavailable."),
1481
+ app: z5.string().optional().describe("App name to target (e.g. 'Safari', 'KakaoTalk')"),
1482
+ snapshot: z5.string().optional().describe("snapshotId from desktop_see for cached interaction (240x faster)"),
1483
+ doubleClick: z5.boolean().optional().default(false).describe("Double-click instead of single click (e.g. open files, open chat windows)"),
1484
+ rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)")
1425
1485
  },
1426
- async ({ on, app, snapshot, doubleClick }) => {
1486
+ async ({ query, on, coords, app, snapshot, doubleClick, rightClick }) => {
1427
1487
  checkBlacklist(app);
1428
- const args = ["click", "--on", on];
1488
+ if (!query && !on && !coords) {
1489
+ throw new Error("Provide at least one of: query (text search), on (element ID), or coords ('x,y').");
1490
+ }
1491
+ const args = ["click"];
1492
+ if (coords) {
1493
+ args.push("--coords", coords);
1494
+ } else if (on) {
1495
+ args.push("--on", on);
1496
+ } else if (query) {
1497
+ args.push(query);
1498
+ }
1429
1499
  if (app) args.push("--app", app);
1430
1500
  if (snapshot) args.push("--snapshot", snapshot);
1431
- if (doubleClick) args.push("--double-click");
1501
+ if (doubleClick) args.push("--double");
1502
+ if (rightClick) args.push("--right");
1432
1503
  const result = await peekaboo(args);
1433
1504
  return {
1434
1505
  content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
@@ -1438,20 +1509,27 @@ var DesktopTools = class {
1438
1509
  server.tool(
1439
1510
  "desktop_type",
1440
1511
  [
1441
- "Type text into the currently focused UI element on macOS. The text is sent as keyboard input character-by-character.",
1512
+ "Type text into the currently focused UI element on macOS via keyboard simulation.",
1513
+ "",
1514
+ "IMPORTANT: For Korean/Japanese/Chinese/emoji text, use desktop_paste instead \u2014 keyboard simulation does not support CJK.",
1515
+ "Always click the target input field first (via desktop_click) before typing.",
1442
1516
  "",
1443
- "IMPORTANT: Always capture the accessibility tree first to verify the correct element is focused before typing.",
1517
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app).",
1444
1518
  "",
1445
1519
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1446
1520
  ].join("\n"),
1447
1521
  {
1448
- text: z5.string().describe("Text to type into the focused element"),
1449
- app: z5.string().optional().describe("App name to focus before typing")
1522
+ text: z5.string().describe("Text to type (ASCII only \u2014 for CJK/emoji use desktop_paste)"),
1523
+ app: z5.string().optional().describe("App name to focus before typing"),
1524
+ pressReturn: z5.boolean().optional().default(false).describe("Press Return/Enter after typing (e.g. to send a message or submit a form)"),
1525
+ clear: z5.boolean().optional().default(false).describe("Clear the field before typing (Cmd+A, Delete)")
1450
1526
  },
1451
- async ({ text, app }) => {
1527
+ async ({ text, app, pressReturn, clear }) => {
1452
1528
  checkBlacklist(app);
1453
1529
  const args = ["type", text];
1454
1530
  if (app) args.push("--app", app);
1531
+ if (clear) args.push("--clear");
1532
+ if (pressReturn) args.push("--return");
1455
1533
  const result = await peekaboo(args);
1456
1534
  return {
1457
1535
  content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
@@ -1465,6 +1543,9 @@ var DesktopTools = class {
1465
1543
  "",
1466
1544
  "Common shortcuts: 'cmd,c' (copy), 'cmd,v' (paste), 'cmd,z' (undo), 'cmd,s' (save), 'cmd,w' (close tab), 'cmd,q' (quit), 'cmd,shift,t' (reopen tab), 'cmd,tab' (switch app).",
1467
1545
  "",
1546
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app, not peekaboo itself).",
1547
+ "Fix if denied via execute_command: swift -e 'import ApplicationServices; let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary; AXIsProcessTrustedWithOptions(opts)'",
1548
+ "",
1468
1549
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1469
1550
  ].join("\n"),
1470
1551
  {
@@ -1488,6 +1569,9 @@ var DesktopTools = class {
1488
1569
  "",
1489
1570
  "Use 'ticks' to control scroll distance (default: 3, higher = more scrolling). Can target a specific element by label or ID from a previous accessibility tree capture.",
1490
1571
  "",
1572
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app, not peekaboo itself).",
1573
+ "Fix if denied via execute_command: swift -e 'import ApplicationServices; let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary; AXIsProcessTrustedWithOptions(opts)'",
1574
+ "",
1491
1575
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1492
1576
  ].join("\n"),
1493
1577
  {
@@ -1525,11 +1609,13 @@ var DesktopTools = class {
1525
1609
  };
1526
1610
  } catch (err) {
1527
1611
  consecutiveFailures++;
1612
+ const msg = err.message ?? "";
1613
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1528
1614
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1529
1615
  consecutiveFailures = 0;
1530
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
1616
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1531
1617
  }
1532
- throw err;
1618
+ throw new Error(`${msg}${hint}`);
1533
1619
  }
1534
1620
  }
1535
1621
  );
@@ -1539,7 +1625,10 @@ var DesktopTools = class {
1539
1625
  "List all open windows on macOS, optionally filtered by app name. Returns window titles and metadata.",
1540
1626
  "",
1541
1627
  "If no app is specified, lists windows for the frontmost application.",
1542
- "Use this after identifying running apps to find specific windows before capturing the accessibility tree or taking a screenshot."
1628
+ "Use this after identifying running apps to find specific windows before capturing the accessibility tree or taking a screenshot.",
1629
+ "",
1630
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app, not peekaboo itself).",
1631
+ "Fix if denied via execute_command: swift -e 'import ApplicationServices; let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary; AXIsProcessTrustedWithOptions(opts)'"
1543
1632
  ].join("\n"),
1544
1633
  {
1545
1634
  app: z5.string().optional().describe("Filter by app name. Omit to query the frontmost app.")
@@ -1563,32 +1652,54 @@ var DesktopTools = class {
1563
1652
  };
1564
1653
  } catch (err) {
1565
1654
  consecutiveFailures++;
1655
+ const msg = err.message ?? "";
1656
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1566
1657
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1567
1658
  consecutiveFailures = 0;
1568
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
1659
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1569
1660
  }
1570
- throw err;
1661
+ throw new Error(`${msg}${hint}`);
1571
1662
  }
1572
1663
  }
1573
1664
  );
1574
1665
  server.tool(
1575
1666
  "desktop_screenshot",
1576
1667
  [
1577
- "Take a high-quality macOS screenshot (Retina display support). Returns base64 image data.",
1668
+ "Take a high-quality macOS screenshot. Returns base64 image data.",
1669
+ "",
1670
+ "MODES:",
1671
+ "- 'screen': full display capture (default). Use screenIndex for multi-monitor setups.",
1672
+ "- 'window': specific app window. Specify with app, windowTitle, or windowIndex.",
1673
+ "- 'frontmost': capture only the frontmost window.",
1674
+ "- 'auto': peekaboo chooses the best mode automatically.",
1675
+ "",
1676
+ "TARGETING SPECIFIC WINDOWS:",
1677
+ "- app: capture by app name (e.g. 'Safari', 'KakaoTalk')",
1678
+ "- windowTitle: capture a specific window by title (partial match supported)",
1679
+ "- windowIndex: capture by window z-order (0 = frontmost window of the app)",
1680
+ "- screenIndex: which display to capture in 'screen' mode (0-based, for multi-monitor)",
1578
1681
  "",
1579
- "MODES: 'screen' captures the full display, 'window' captures a specific app window.",
1580
1682
  "TIP: Prefer the accessibility tree for understanding UI structure \u2014 use screenshots only when visual appearance matters (layouts, images, colors).",
1581
1683
  "",
1684
+ "PERMISSIONS: Requires Screen Recording (inherited from terminal app, not peekaboo itself).",
1685
+ "Fix if denied via execute_command: swift -e 'import CoreGraphics; CGRequestScreenCaptureAccess()'",
1686
+ "",
1582
1687
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1583
1688
  ].join("\n"),
1584
1689
  {
1585
- app: z5.string().optional().describe("Capture a specific app's window (by name)"),
1586
- mode: z5.enum(["screen", "window"]).optional().default("screen").describe("'screen': full display capture, 'window': specific app window only")
1690
+ app: z5.string().optional().describe("Capture a specific app's window (by name, e.g. 'Safari', 'KakaoTalk')"),
1691
+ mode: z5.enum(["screen", "window", "frontmost", "auto"]).optional().default("screen").describe("'screen': full display, 'window': specific app window, 'frontmost': frontmost window, 'auto': peekaboo decides"),
1692
+ windowTitle: z5.string().optional().describe("Capture window by title (partial match). Use with mode='window'."),
1693
+ windowIndex: z5.number().optional().describe("Window z-order index (0 = frontmost window of the app). Use with mode='window'."),
1694
+ screenIndex: z5.number().optional().describe("Display index for multi-monitor (0-based). Use with mode='screen'.")
1587
1695
  },
1588
- async ({ app, mode }) => {
1696
+ async ({ app, mode, windowTitle, windowIndex, screenIndex }) => {
1589
1697
  checkBlacklist(app);
1590
1698
  const args = ["image", "--mode", mode];
1591
1699
  if (app) args.push("--app", app);
1700
+ if (windowTitle) args.push("--window-title", windowTitle);
1701
+ if (windowIndex !== void 0) args.push("--window-index", String(windowIndex));
1702
+ if (screenIndex !== void 0) args.push("--screen-index", String(screenIndex));
1592
1703
  const result = await peekaboo(args);
1593
1704
  const data = result.data;
1594
1705
  const files = data?.files;
@@ -1616,6 +1727,9 @@ var DesktopTools = class {
1616
1727
  "Examples: ['File', 'New Tab'], ['Edit', 'Find', 'Find...'], ['View', 'Enter Full Screen'].",
1617
1728
  "Omit the 'app' parameter to target the frontmost app. The target app must be running.",
1618
1729
  "",
1730
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app, not peekaboo itself).",
1731
+ "Fix if denied via execute_command: swift -e 'import ApplicationServices; let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary; AXIsProcessTrustedWithOptions(opts)'",
1732
+ "",
1619
1733
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1620
1734
  ].join("\n"),
1621
1735
  {
@@ -1634,14 +1748,88 @@ var DesktopTools = class {
1634
1748
  };
1635
1749
  } catch (err) {
1636
1750
  consecutiveFailures++;
1751
+ const msg = err.message ?? "";
1752
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1637
1753
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1638
1754
  consecutiveFailures = 0;
1639
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
1755
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1640
1756
  }
1641
- throw err;
1757
+ throw new Error(`${msg}${hint}`);
1642
1758
  }
1643
1759
  }
1644
1760
  );
1761
+ server.tool(
1762
+ "desktop_paste",
1763
+ [
1764
+ "Paste text via clipboard into the focused element. Automatically sets clipboard, pastes (Cmd+V), then restores previous clipboard.",
1765
+ "",
1766
+ "ALWAYS USE THIS instead of desktop_type for: Korean, Japanese, Chinese, emoji, or any non-ASCII text.",
1767
+ "Unlike desktop_type (keyboard simulation), this uses the system clipboard \u2014 works with ALL character sets.",
1768
+ "",
1769
+ `PROVEN: In KakaoTalk automation, 'peekaboo paste "\uC548\uB155?"' successfully sent Korean text while 'type' would have failed.`,
1770
+ "",
1771
+ "PERMISSIONS: Requires Accessibility (inherited from terminal app).",
1772
+ "",
1773
+ "SAFETY: Terminal, iTerm, and Finder are blocked."
1774
+ ].join("\n"),
1775
+ {
1776
+ text: z5.string().describe("Text to paste (supports Korean, Japanese, Chinese, emoji, any Unicode)"),
1777
+ app: z5.string().optional().describe("App name to focus before pasting")
1778
+ },
1779
+ async ({ text, app }) => {
1780
+ checkBlacklist(app);
1781
+ const args = ["paste", text];
1782
+ if (app) args.push("--app", app);
1783
+ const result = await peekaboo(args);
1784
+ return {
1785
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
1786
+ };
1787
+ }
1788
+ );
1789
+ server.tool(
1790
+ "desktop_open_app",
1791
+ [
1792
+ "Launch or bring to front a macOS application. Use this as the FIRST STEP when automating any app.",
1793
+ "",
1794
+ "PROVEN WORKFLOW (from KakaoTalk automation):",
1795
+ "1. desktop_open_app \u2192 2. desktop_list_apps (verify) \u2192 3. desktop_see or desktop_screenshot \u2192 4. interact",
1796
+ "",
1797
+ "After launching, use desktop_list_apps to confirm the app is running, then desktop_see to capture UI.",
1798
+ "",
1799
+ "SAFETY: Terminal, iTerm, and Finder are blocked for automation safety."
1800
+ ].join("\n"),
1801
+ {
1802
+ app: z5.string().describe("Application name to launch (e.g. 'Safari', 'Notes', 'KakaoTalk', 'Google Chrome')")
1803
+ },
1804
+ async ({ app }) => {
1805
+ checkBlacklist(app);
1806
+ const args = ["app", "launch", app, "--wait-until-ready"];
1807
+ const result = await peekaboo(args);
1808
+ return {
1809
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
1810
+ };
1811
+ }
1812
+ );
1813
+ server.tool(
1814
+ "desktop_open_url",
1815
+ [
1816
+ "Open a URL or file with its default (or specified) application.",
1817
+ "",
1818
+ "Examples: 'https://google.com', '~/Documents/report.pdf', 'x-apple.systempreferences:...'"
1819
+ ].join("\n"),
1820
+ {
1821
+ url: z5.string().describe("URL or file path to open"),
1822
+ app: z5.string().optional().describe("Specific app to open with (e.g. 'Google Chrome', 'Preview')")
1823
+ },
1824
+ async ({ url, app }) => {
1825
+ const args = ["open", url];
1826
+ if (app) args.push("--app", app);
1827
+ const result = await peekaboo(args);
1828
+ return {
1829
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
1830
+ };
1831
+ }
1832
+ );
1645
1833
  }
1646
1834
  };
1647
1835
 
@@ -43,7 +43,10 @@ var toolPermissions = {
43
43
  desktop_hotkey: "confirm",
44
44
  desktop_scroll: "confirm",
45
45
  desktop_menu: "confirm",
46
+ desktop_paste: "confirm",
46
47
  desktop_screenshot: "confirm",
48
+ desktop_open_app: "auto",
49
+ desktop_open_url: "auto",
47
50
  cron_create: "confirm",
48
51
  cron_delete: "confirm",
49
52
  edit_block: "confirm",
@@ -74,6 +77,8 @@ var FilesystemTools = class {
74
77
  "ROUTING:",
75
78
  "- Use for system commands, package managers (npm, pip, brew), git, build tools, and scripting.",
76
79
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
80
+ "- NOT for macOS app GUI interaction. When the user asks to interact with, control, or automate any application (clicking, typing, reading screen, navigating menus), use the desktop_* tools instead (desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_menu, desktop_screenshot).",
81
+ "- The ONLY exception: permission fix commands (swift -e for CGRequestScreenCaptureAccess/AXIsProcessTrustedWithOptions, peekaboo permissions, or open 'x-apple.systempreferences:...').",
77
82
  "",
78
83
  "BEHAVIOR:",
79
84
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
@@ -1213,7 +1218,12 @@ Cause: ${e.message}${hint}` }],
1213
1218
  "Start or stop screen recording. Captures the full screen as MP4 video.",
1214
1219
  "",
1215
1220
  "Use action='start' to begin, action='stop' to end and save. Only one recording can be active at a time.",
1216
- "Platform-specific: macOS (screencapture -v), Windows/Linux (ffmpeg)."
1221
+ "Platform-specific: macOS (screencapture -v), Windows/Linux (ffmpeg).",
1222
+ "",
1223
+ "PERMISSIONS (macOS): Screen Recording permission is needed (inherited from terminal app).",
1224
+ "If denied, run via execute_command: swift -e 'import CoreGraphics; CGRequestScreenCaptureAccess()'",
1225
+ "\u2192 macOS dialog appears. Ask user to click Allow, then retry.",
1226
+ "Fallback: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'"
1217
1227
  ].join("\n"),
1218
1228
  {
1219
1229
  action: z4.enum(["start", "stop"]).describe("'start': begin recording, 'stop': end recording and save the file"),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "junis",
3
- "version": "0.3.10",
3
+ "version": "0.3.12",
4
4
  "description": "One-line device control for AI agents",
5
5
  "type": "module",
6
6
  "bin": {