junis 0.3.9 → 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -165,6 +165,40 @@ function sleep(ms) {
165
165
 
166
166
  // src/relay/client.ts
167
167
  import WebSocket from "ws";
168
+
169
+ // src/relay/upload.ts
170
+ var LARGE_FILE_THRESHOLD = 5 * 1024 * 1024;
171
+ async function uploadLargeFile(relay, base64Data, filename, contentType) {
172
+ const buffer = Buffer.from(base64Data, "base64");
173
+ const { put_url, access_url } = await relay.requestUploadUrl(
174
+ filename,
175
+ contentType,
176
+ buffer.length
177
+ );
178
+ const res = await fetch(put_url, {
179
+ method: "PUT",
180
+ headers: { "Content-Type": contentType },
181
+ body: buffer
182
+ });
183
+ if (!res.ok) {
184
+ throw new Error(`Upload failed: ${res.status} ${res.statusText}`);
185
+ }
186
+ return access_url;
187
+ }
188
+ function isLargeBase64(base64) {
189
+ return base64.length * 0.75 > LARGE_FILE_THRESHOLD;
190
+ }
191
+ function detectContentType(base64) {
192
+ const header = base64.slice(0, 16);
193
+ if (header.startsWith("/9j/")) return "image/jpeg";
194
+ if (header.startsWith("iVBOR")) return "image/png";
195
+ if (header.startsWith("R0lGO")) return "image/gif";
196
+ if (header.startsWith("UklGR")) return "image/webp";
197
+ if (header.startsWith("JVBER")) return "application/pdf";
198
+ return "application/octet-stream";
199
+ }
200
+
201
+ // src/relay/client.ts
168
202
  var JUNIS_WS = (() => {
169
203
  if (process.env.JUNIS_WS_URL) return process.env.JUNIS_WS_URL;
170
204
  const apiUrl = process.env.JUNIS_API_URL ?? "https://junis.ai";
@@ -186,6 +220,8 @@ var RelayClient = class {
186
220
  heartbeatTimer = null;
187
221
  destroyed = false;
188
222
  lastPongTime = 0;
223
+ // upload_url_response 대기용 pending 맵
224
+ pendingUploadRequests = /* @__PURE__ */ new Map();
189
225
  async connect() {
190
226
  if (this.destroyed) return;
191
227
  const url = `${JUNIS_WS}/ws/devices/${this.config.device_key}`;
@@ -209,9 +245,22 @@ var RelayClient = class {
209
245
  this.lastPongTime = Date.now();
210
246
  return;
211
247
  }
248
+ if (msg.type === "upload_url_response") {
249
+ const pending = this.pendingUploadRequests.get(msg.request_id);
250
+ if (pending) {
251
+ this.pendingUploadRequests.delete(msg.request_id);
252
+ if (msg.error) {
253
+ pending.reject(new Error(msg.error));
254
+ } else {
255
+ pending.resolve(msg);
256
+ }
257
+ }
258
+ return;
259
+ }
212
260
  if (msg.type === "mcp_request") {
213
261
  try {
214
- const result = await this.onMCPRequest(msg.id, msg.payload);
262
+ let result = await this.onMCPRequest(msg.id, msg.payload);
263
+ result = await this.processLargeFiles(result);
215
264
  this.send({ type: "mcp_response", id: msg.id, payload: result });
216
265
  } catch (err) {
217
266
  this.send({
@@ -266,6 +315,76 @@ var RelayClient = class {
266
315
  this.ws.send(JSON.stringify(data));
267
316
  }
268
317
  }
318
+ /**
319
+ * 서버에 presigned PUT URL 요청.
320
+ * WebSocket으로 upload_url_request 전송 → upload_url_response 대기.
321
+ */
322
+ requestUploadUrl(filename, contentType, size) {
323
+ return new Promise((resolve, reject) => {
324
+ const requestId = crypto.randomUUID();
325
+ const timeout = setTimeout(() => {
326
+ this.pendingUploadRequests.delete(requestId);
327
+ reject(new Error("Upload URL request timeout (30s)"));
328
+ }, 3e4);
329
+ this.pendingUploadRequests.set(requestId, {
330
+ resolve: (data) => {
331
+ clearTimeout(timeout);
332
+ resolve(data);
333
+ },
334
+ reject: (err) => {
335
+ clearTimeout(timeout);
336
+ reject(err);
337
+ }
338
+ });
339
+ this.send({
340
+ type: "upload_url_request",
341
+ request_id: requestId,
342
+ filename,
343
+ content_type: contentType,
344
+ size
345
+ });
346
+ });
347
+ }
348
+ /**
349
+ * MCP 응답 내 대용량 base64 데이터를 감지하여 presigned URL 업로드 후 URL로 교체.
350
+ *
351
+ * 대상:
352
+ * 1. ImageContent: { type: "image", data: "<base64>", mimeType: "image/png" }
353
+ * → { type: "text", text: "![uploaded](https://...access_url)" }
354
+ * 2. TextContent with large base64: { type: "text", text: "<huge base64>" }
355
+ * → { type: "text", text: "https://...access_url" }
356
+ */
357
+ async processLargeFiles(result) {
358
+ if (!result || typeof result !== "object") return result;
359
+ const obj = result;
360
+ const inner = obj.result ?? obj;
361
+ const content = inner.content;
362
+ if (!Array.isArray(content)) return result;
363
+ for (let i = 0; i < content.length; i++) {
364
+ const item = content[i];
365
+ if (!item || typeof item !== "object") continue;
366
+ if (item.type === "image" && typeof item.data === "string" && isLargeBase64(item.data)) {
367
+ try {
368
+ const mimeType = item.mimeType || "image/png";
369
+ const ext = mimeType.split("/")[1] || "bin";
370
+ const url = await uploadLargeFile(this, item.data, `screenshot.${ext}`, mimeType);
371
+ content[i] = { type: "text", text: `![uploaded](${url})` };
372
+ } catch (err) {
373
+ console.error("Failed to upload large image:", err);
374
+ }
375
+ } else if (item.type === "text" && typeof item.text === "string" && isLargeBase64(item.text) && /^[A-Za-z0-9+/\n\r]+=*$/.test(item.text.trim())) {
376
+ try {
377
+ const contentType = detectContentType(item.text);
378
+ const ext = contentType.split("/")[1] || "bin";
379
+ const url = await uploadLargeFile(this, item.text, `file.${ext}`, contentType);
380
+ content[i] = { type: "text", text: url };
381
+ } catch (err) {
382
+ console.error("Failed to upload large text base64:", err);
383
+ }
384
+ }
385
+ }
386
+ return result;
387
+ }
269
388
  startHeartbeat() {
270
389
  this.heartbeatTimer = setInterval(() => {
271
390
  if (Date.now() - this.lastPongTime > 9e4) {
@@ -286,6 +405,10 @@ var RelayClient = class {
286
405
  this.destroyed = true;
287
406
  this.stopHeartbeat();
288
407
  this.ws?.close();
408
+ for (const [, pending] of this.pendingUploadRequests) {
409
+ pending.reject(new Error("Client destroyed"));
410
+ }
411
+ this.pendingUploadRequests.clear();
289
412
  }
290
413
  };
291
414
 
@@ -333,14 +456,17 @@ var toolPermissions = {
333
456
  desktop_hotkey: "confirm",
334
457
  desktop_scroll: "confirm",
335
458
  desktop_menu: "confirm",
459
+ desktop_paste: "confirm",
336
460
  desktop_screenshot: "confirm",
461
+ desktop_open_app: "auto",
462
+ desktop_open_url: "auto",
337
463
  cron_create: "confirm",
338
464
  cron_delete: "confirm",
339
465
  edit_block: "confirm",
340
466
  kill_process: "confirm",
341
- // 시스템 변경 — 기본 차단 (PDF 7.3절)
342
- execute_command: "deny",
343
- write_file: "deny"
467
+ // 시스템 변경 — 대화 기반 승인 (confirm)
468
+ execute_command: "confirm",
469
+ write_file: "confirm"
344
470
  };
345
471
  function checkPermission(toolName) {
346
472
  const level = toolPermissions[toolName];
@@ -364,19 +490,19 @@ var FilesystemTools = class {
364
490
  "ROUTING:",
365
491
  "- Use for system commands, package managers (npm, pip, brew), git, build tools, and scripting.",
366
492
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
493
+ "- NOT for macOS app GUI interaction. When the user asks to interact with, control, or automate any application (clicking, typing, reading screen, navigating menus), use the desktop_* tools instead (desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_menu, desktop_screenshot).",
494
+ "- The ONLY exception: opening System Preferences URLs for permissions (e.g. open 'x-apple.systempreferences:...').",
367
495
  "",
368
496
  "BEHAVIOR:",
369
- "- Safe, routine commands (ls, pwd, git status, echo): execute immediately without explanation.",
370
- "- Destructive or irreversible commands (rm -rf, sudo, shutdown, mkfs): explain what will happen and get user confirmation first.",
497
+ "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
371
498
  "- If a command fails, analyze the error and suggest an alternative. Do not retry the identical command more than twice.",
372
499
  "",
373
500
  "SAFETY:",
374
- "- Commands run with the user's full permissions. Never execute commands that could damage the system, expose credentials, or modify security settings without explicit user request.",
375
- "- Avoid piping untrusted input into shells. Use absolute paths when possible. Quote paths containing spaces."
501
+ "- Commands run with the user's full permissions. Use absolute paths when possible. Quote paths containing spaces."
376
502
  ].join("\n"),
377
503
  {
378
504
  command: z.string().describe("The shell command to execute. Use absolute paths when possible. Quote paths containing spaces."),
379
- timeout_ms: z.number().optional().default(3e4).describe("Maximum execution time in milliseconds (default: 30000). Increase for long-running builds or downloads."),
505
+ timeout_ms: z.number().optional().default(12e4).describe("Maximum execution time in milliseconds (default: 120000). Increase for very long-running builds or downloads."),
380
506
  background: z.boolean().optional().default(false).describe("Run in background without waiting for completion. Use for servers or long-running processes.")
381
507
  },
382
508
  async ({ command, timeout_ms, background }) => {
@@ -863,11 +989,11 @@ var BrowserTools = class {
863
989
  headless: z2.boolean().optional().default(false).describe("Run without visible window (managed mode only). Use for background tasks."),
864
990
  cdpUrl: z2.string().optional().describe("Chrome DevTools Protocol URL for remote-cdp mode (e.g. http://localhost:9222)"),
865
991
  profile: z2.string().optional().describe("Browser profile name for persistent sessions \u2014 preserves cookies, logins, and history across restarts (managed mode only)"),
866
- allowInternal: z2.boolean().optional().default(false).describe("Allow navigation to localhost and internal network URLs")
992
+ allowInternal: z2.boolean().optional().default(true).describe("Allow navigation to localhost and internal network URLs (default: true for local agent)")
867
993
  },
868
994
  ({ mode, headless, cdpUrl, profile, allowInternal }) => this.withLock(async () => {
869
995
  if (this.browser) {
870
- return { content: [{ type: "text", text: "Browser is already running. Call browser_stop first." }] };
996
+ await this.cleanup();
871
997
  }
872
998
  if (mode === "remote-cdp") {
873
999
  if (!cdpUrl) throw new Error("cdpUrl is required for remote-cdp mode");
@@ -1404,7 +1530,11 @@ var DeviceTools = class {
1404
1530
  "Capture a photo from the device's camera and return it as base64 image data.",
1405
1531
  "",
1406
1532
  "Platform-specific: macOS (imagesnap), Windows (ffmpeg/dshow), Linux (fswebcam).",
1407
- "Requires a connected camera with OS permissions granted. If output_path is provided, the file is also saved to disk."
1533
+ "If output_path is provided, the file is also saved to disk.",
1534
+ "",
1535
+ "PERMISSIONS (macOS): Camera permission is needed. If it fails, macOS may show a native Allow/Deny dialog \u2014 ask the user to click Allow.",
1536
+ "If still denied, use execute_command to open Camera settings:",
1537
+ " open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Camera'"
1408
1538
  ].join("\n"),
1409
1539
  {
1410
1540
  output_path: z4.string().optional().describe("File path to save the captured photo. If omitted, returns image data only (temp file auto-cleaned).")
@@ -1422,11 +1552,10 @@ var DeviceTools = class {
1422
1552
  await execAsync3(cmd);
1423
1553
  } catch (err) {
1424
1554
  const e = err;
1555
+ const hint = p === "mac" ? "\n\n\u{1F527} FIX: Camera permission may be needed. Try:\n1. Retry \u2014 macOS may show a native Allow/Deny dialog.\n2. If denied, run via execute_command: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Camera'\nAsk the user to toggle ON for 'imagesnap' (or their terminal app), then retry." : "";
1425
1556
  return {
1426
- content: [{ type: "text", text: `\u274C Camera not found or inaccessible.
1427
- Cause: ${e.message}
1428
-
1429
- Please check if a camera is connected.` }],
1557
+ content: [{ type: "text", text: `\u274C Camera capture failed.
1558
+ Cause: ${e.message}${hint}` }],
1430
1559
  isError: true
1431
1560
  };
1432
1561
  }
@@ -1502,7 +1631,11 @@ Please check if a camera is connected.` }],
1502
1631
  "Start or stop screen recording. Captures the full screen as MP4 video.",
1503
1632
  "",
1504
1633
  "Use action='start' to begin, action='stop' to end and save. Only one recording can be active at a time.",
1505
- "Platform-specific: macOS (screencapture -v), Windows/Linux (ffmpeg)."
1634
+ "Platform-specific: macOS (screencapture -v), Windows/Linux (ffmpeg).",
1635
+ "",
1636
+ "PERMISSIONS (macOS): Screen Recording permission is needed. If denied, run via execute_command:",
1637
+ " open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1638
+ "Toggle ON for 'screencapture' (or your terminal app), then retry."
1506
1639
  ].join("\n"),
1507
1640
  {
1508
1641
  action: z4.enum(["start", "stop"]).describe("'start': begin recording, 'stop': end recording and save the file"),
@@ -1592,10 +1725,28 @@ import { execFile as execFile2 } from "child_process";
1592
1725
  import { promisify as promisify4 } from "util";
1593
1726
  import { platform as platform2 } from "os";
1594
1727
  var execFileAsync2 = promisify4(execFile2);
1728
+ async function requestMacOSPermissions() {
1729
+ try {
1730
+ await execFileAsync2("swift", ["-e", `
1731
+ import CoreGraphics
1732
+ CGRequestScreenCaptureAccess()
1733
+ `], { timeout: 5e3 });
1734
+ } catch {
1735
+ }
1736
+ try {
1737
+ await execFileAsync2("swift", ["-e", `
1738
+ import ApplicationServices
1739
+ let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary
1740
+ AXIsProcessTrustedWithOptions(opts)
1741
+ `], { timeout: 5e3 });
1742
+ } catch {
1743
+ }
1744
+ }
1595
1745
  async function ensurePeekaboo() {
1596
1746
  if (platform2() !== "darwin") return false;
1597
1747
  try {
1598
1748
  await execFileAsync2("which", ["peekaboo"]);
1749
+ await requestMacOSPermissions();
1599
1750
  return true;
1600
1751
  } catch {
1601
1752
  console.log("\u23F3 peekaboo not found, installing via brew...");
@@ -1603,6 +1754,7 @@ async function ensurePeekaboo() {
1603
1754
  await execFileAsync2("brew", ["tap", "steipete/tap"], { timeout: 3e4 });
1604
1755
  await execFileAsync2("brew", ["install", "peekaboo"], { timeout: 12e4 });
1605
1756
  console.log("\u2705 peekaboo installed");
1757
+ await requestMacOSPermissions();
1606
1758
  return true;
1607
1759
  } catch (brewErr) {
1608
1760
  console.warn("\u26A0\uFE0F peekaboo install failed:", brewErr.message);
@@ -1625,6 +1777,17 @@ var APP_BLACKLIST = /* @__PURE__ */ new Set([
1625
1777
  ]);
1626
1778
  var consecutiveFailures = 0;
1627
1779
  var MAX_CONSECUTIVE_FAILURES = 2;
1780
+ var PERM_FIX_HINT = [
1781
+ "\n\n\u{1F527} PERMISSION FIX \u2014 run these via execute_command:",
1782
+ "1. Check status: peekaboo permissions --json-output",
1783
+ "2. Screen Recording: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1784
+ "3. Accessibility: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility'",
1785
+ "Toggle ON for 'peekaboo' in the opened panel, then retry."
1786
+ ].join("\n");
1787
+ function isPermissionError(msg) {
1788
+ const lower = msg.toLowerCase();
1789
+ return lower.includes("permission") || lower.includes("accessibility") || lower.includes("screen recording") || lower.includes("not trusted") || lower.includes("not allowed") || lower.includes("denied");
1790
+ }
1628
1791
  async function peekaboo(args) {
1629
1792
  try {
1630
1793
  const { stdout } = await execa("peekaboo", [...args, "--json-output"]);
@@ -1632,11 +1795,13 @@ async function peekaboo(args) {
1632
1795
  return JSON.parse(stdout);
1633
1796
  } catch (err) {
1634
1797
  consecutiveFailures++;
1798
+ const msg = err.message ?? "";
1799
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1635
1800
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1636
1801
  consecutiveFailures = 0;
1637
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
1802
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1638
1803
  }
1639
- throw err;
1804
+ throw new Error(`${msg}${hint}`);
1640
1805
  }
1641
1806
  }
1642
1807
  function checkBlacklist(app) {
@@ -1651,10 +1816,24 @@ var DesktopTools = class {
1651
1816
  [
1652
1817
  "Capture the macOS Accessibility Tree snapshot for a running application. Returns a structured element list with IDs, roles, labels, and positions.",
1653
1818
  "",
1654
- "WORKFLOW: List running apps \u2192 capture accessibility tree \u2192 find target element by role/label \u2192 interact using element ID or label (click, type, scroll).",
1655
- "Pass the returned snapshotId to subsequent interaction calls for 240x speed improvement (cached lookup vs. full re-scan).",
1819
+ "WHEN TO USE DESKTOP TOOLS:",
1820
+ "When the user asks to interact with, control, or automate ANY macOS application \u2014 use desktop_* tools, NOT execute_command.",
1821
+ "Workflow: desktop_open_app \u2192 desktop_see \u2192 desktop_click/type/paste \u2192 verify with desktop_see or desktop_screenshot.",
1822
+ "",
1823
+ "WORKFLOW TIPS:",
1824
+ "- If accessibility tree times out (complex UI apps like KakaoTalk): use desktop_screenshot + coordinate-based desktop_click instead.",
1825
+ "- For Korean/Japanese/Chinese text input: always use desktop_paste (NOT desktop_type).",
1826
+ "- For multi-window apps: use desktop_list_windows to find specific windows.",
1827
+ "- Pass snapshotId to subsequent calls for 240x speed improvement.",
1828
+ "",
1829
+ "PERMISSIONS: Requires Accessibility + Screen Recording for 'peekaboo'.",
1830
+ "If denied, run via execute_command:",
1831
+ " 1. peekaboo permissions --json-output",
1832
+ " 2. open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility'",
1833
+ " 3. open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1834
+ "Toggle ON for 'peekaboo', then retry.",
1656
1835
  "",
1657
- "SAFETY: Terminal, iTerm, and Finder are blocked. Two consecutive failures trigger an automatic safety stop."
1836
+ "SAFETY: Terminal, iTerm, and Finder are blocked. Two consecutive failures trigger automatic safety stop."
1658
1837
  ].join("\n"),
1659
1838
  {
1660
1839
  app: z5.string().optional().describe("App name to target (e.g. 'Safari', 'Notes', 'Google Chrome'). Omit for the frontmost app.")
@@ -1688,6 +1867,8 @@ var DesktopTools = class {
1688
1867
  "The 'on' parameter accepts: element label text (e.g. 'Save'), accessibility ID from a previous accessibility tree capture, or coordinates as 'x,y' string.",
1689
1868
  "For faster interaction, pass the snapshotId from a recent accessibility tree capture.",
1690
1869
  "",
1870
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'.",
1871
+ "",
1691
1872
  "SAFETY: Terminal, iTerm, and Finder are blocked. Two consecutive failures trigger automatic safety stop."
1692
1873
  ].join("\n"),
1693
1874
  {
@@ -1714,6 +1895,9 @@ var DesktopTools = class {
1714
1895
  "Type text into the currently focused UI element on macOS. The text is sent as keyboard input character-by-character.",
1715
1896
  "",
1716
1897
  "IMPORTANT: Always capture the accessibility tree first to verify the correct element is focused before typing.",
1898
+ "For Korean/Japanese/Chinese text or emoji, use desktop_paste instead \u2014 keyboard input does not support CJK characters.",
1899
+ "",
1900
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'.",
1717
1901
  "",
1718
1902
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1719
1903
  ].join("\n"),
@@ -1738,6 +1922,8 @@ var DesktopTools = class {
1738
1922
  "",
1739
1923
  "Common shortcuts: 'cmd,c' (copy), 'cmd,v' (paste), 'cmd,z' (undo), 'cmd,s' (save), 'cmd,w' (close tab), 'cmd,q' (quit), 'cmd,shift,t' (reopen tab), 'cmd,tab' (switch app).",
1740
1924
  "",
1925
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'.",
1926
+ "",
1741
1927
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1742
1928
  ].join("\n"),
1743
1929
  {
@@ -1761,6 +1947,8 @@ var DesktopTools = class {
1761
1947
  "",
1762
1948
  "Use 'ticks' to control scroll distance (default: 3, higher = more scrolling). Can target a specific element by label or ID from a previous accessibility tree capture.",
1763
1949
  "",
1950
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'.",
1951
+ "",
1764
1952
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1765
1953
  ].join("\n"),
1766
1954
  {
@@ -1798,11 +1986,13 @@ var DesktopTools = class {
1798
1986
  };
1799
1987
  } catch (err) {
1800
1988
  consecutiveFailures++;
1989
+ const msg = err.message ?? "";
1990
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1801
1991
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1802
1992
  consecutiveFailures = 0;
1803
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
1993
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1804
1994
  }
1805
- throw err;
1995
+ throw new Error(`${msg}${hint}`);
1806
1996
  }
1807
1997
  }
1808
1998
  );
@@ -1812,7 +2002,9 @@ var DesktopTools = class {
1812
2002
  "List all open windows on macOS, optionally filtered by app name. Returns window titles and metadata.",
1813
2003
  "",
1814
2004
  "If no app is specified, lists windows for the frontmost application.",
1815
- "Use this after identifying running apps to find specific windows before capturing the accessibility tree or taking a screenshot."
2005
+ "Use this after identifying running apps to find specific windows before capturing the accessibility tree or taking a screenshot.",
2006
+ "",
2007
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'."
1816
2008
  ].join("\n"),
1817
2009
  {
1818
2010
  app: z5.string().optional().describe("Filter by app name. Omit to query the frontmost app.")
@@ -1836,11 +2028,13 @@ var DesktopTools = class {
1836
2028
  };
1837
2029
  } catch (err) {
1838
2030
  consecutiveFailures++;
2031
+ const msg = err.message ?? "";
2032
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1839
2033
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1840
2034
  consecutiveFailures = 0;
1841
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
2035
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1842
2036
  }
1843
- throw err;
2037
+ throw new Error(`${msg}${hint}`);
1844
2038
  }
1845
2039
  }
1846
2040
  );
@@ -1852,6 +2046,8 @@ var DesktopTools = class {
1852
2046
  "MODES: 'screen' captures the full display, 'window' captures a specific app window.",
1853
2047
  "TIP: Prefer the accessibility tree for understanding UI structure \u2014 use screenshots only when visual appearance matters (layouts, images, colors).",
1854
2048
  "",
2049
+ "PERMISSIONS: Requires macOS Screen Recording permission for 'peekaboo'.",
2050
+ "",
1855
2051
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1856
2052
  ].join("\n"),
1857
2053
  {
@@ -1889,6 +2085,8 @@ var DesktopTools = class {
1889
2085
  "Examples: ['File', 'New Tab'], ['Edit', 'Find', 'Find...'], ['View', 'Enter Full Screen'].",
1890
2086
  "Omit the 'app' parameter to target the frontmost app. The target app must be running.",
1891
2087
  "",
2088
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'.",
2089
+ "",
1892
2090
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1893
2091
  ].join("\n"),
1894
2092
  {
@@ -1907,14 +2105,82 @@ var DesktopTools = class {
1907
2105
  };
1908
2106
  } catch (err) {
1909
2107
  consecutiveFailures++;
2108
+ const msg = err.message ?? "";
2109
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1910
2110
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1911
2111
  consecutiveFailures = 0;
1912
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
2112
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1913
2113
  }
1914
- throw err;
2114
+ throw new Error(`${msg}${hint}`);
1915
2115
  }
1916
2116
  }
1917
2117
  );
2118
+ server.tool(
2119
+ "desktop_paste",
2120
+ [
2121
+ "Paste text via clipboard into the focused element. Use this for Korean, Japanese, Chinese, emoji, or any non-ASCII text.",
2122
+ "",
2123
+ "Unlike desktop_type (which sends keyboard input character-by-character), this uses the system clipboard to paste text, supporting all character sets including CJK and emoji.",
2124
+ "",
2125
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'.",
2126
+ "",
2127
+ "SAFETY: Terminal, iTerm, and Finder are blocked."
2128
+ ].join("\n"),
2129
+ {
2130
+ text: z5.string().describe("Text to paste into the focused element (supports Korean, Japanese, Chinese, emoji)"),
2131
+ app: z5.string().optional().describe("App name to focus before pasting")
2132
+ },
2133
+ async ({ text, app }) => {
2134
+ checkBlacklist(app);
2135
+ const args = ["type", "--paste", text];
2136
+ if (app) args.push("--app", app);
2137
+ const result = await peekaboo(args);
2138
+ return {
2139
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
2140
+ };
2141
+ }
2142
+ );
2143
+ server.tool(
2144
+ "desktop_open_app",
2145
+ [
2146
+ "Launch or bring to front a macOS application. Use this as the FIRST STEP when automating any app.",
2147
+ "",
2148
+ "This uses macOS native 'open -a' command. The app will be launched if not running, or brought to front if already running.",
2149
+ "After launching, wait briefly then use desktop_see to capture the accessibility tree.",
2150
+ "",
2151
+ "SAFETY: Terminal, iTerm, and Finder are blocked for automation safety."
2152
+ ].join("\n"),
2153
+ {
2154
+ app: z5.string().describe("Application name to launch (e.g. 'Safari', 'Notes', 'KakaoTalk', 'Google Chrome')")
2155
+ },
2156
+ async ({ app }) => {
2157
+ checkBlacklist(app);
2158
+ await execa("open", ["-a", app]);
2159
+ await new Promise((r) => setTimeout(r, 1500));
2160
+ return {
2161
+ content: [{ type: "text", text: `Launched ${app}` }]
2162
+ };
2163
+ }
2164
+ );
2165
+ server.tool(
2166
+ "desktop_open_url",
2167
+ [
2168
+ "Open a URL in the default browser or a specified app. Also works for file paths and custom URL schemes.",
2169
+ "",
2170
+ "Examples: 'https://google.com', 'file:///path/to/file.html', 'x-apple.systempreferences:...'"
2171
+ ].join("\n"),
2172
+ {
2173
+ url: z5.string().describe("URL to open (https://, file://, or custom scheme)"),
2174
+ app: z5.string().optional().describe("Specific app to open the URL with (e.g. 'Google Chrome', 'Firefox')")
2175
+ },
2176
+ async ({ url, app }) => {
2177
+ const args = app ? ["-a", app, url] : [url];
2178
+ await execa("open", args);
2179
+ return {
2180
+ content: [{ type: "text", text: `Opened: ${url}` }]
2181
+ };
2182
+ }
2183
+ );
1918
2184
  }
1919
2185
  };
1920
2186
 
@@ -42,14 +42,17 @@ var toolPermissions = {
42
42
  desktop_hotkey: "confirm",
43
43
  desktop_scroll: "confirm",
44
44
  desktop_menu: "confirm",
45
+ desktop_paste: "confirm",
45
46
  desktop_screenshot: "confirm",
47
+ desktop_open_app: "auto",
48
+ desktop_open_url: "auto",
46
49
  cron_create: "confirm",
47
50
  cron_delete: "confirm",
48
51
  edit_block: "confirm",
49
52
  kill_process: "confirm",
50
- // 시스템 변경 — 기본 차단 (PDF 7.3절)
51
- execute_command: "deny",
52
- write_file: "deny"
53
+ // 시스템 변경 — 대화 기반 승인 (confirm)
54
+ execute_command: "confirm",
55
+ write_file: "confirm"
53
56
  };
54
57
  function checkPermission(toolName) {
55
58
  const level = toolPermissions[toolName];
@@ -73,19 +76,19 @@ var FilesystemTools = class {
73
76
  "ROUTING:",
74
77
  "- Use for system commands, package managers (npm, pip, brew), git, build tools, and scripting.",
75
78
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
79
+ "- NOT for macOS app GUI interaction. When the user asks to interact with, control, or automate any application (clicking, typing, reading screen, navigating menus), use the desktop_* tools instead (desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_menu, desktop_screenshot).",
80
+ "- The ONLY exception: opening System Preferences URLs for permissions (e.g. open 'x-apple.systempreferences:...').",
76
81
  "",
77
82
  "BEHAVIOR:",
78
- "- Safe, routine commands (ls, pwd, git status, echo): execute immediately without explanation.",
79
- "- Destructive or irreversible commands (rm -rf, sudo, shutdown, mkfs): explain what will happen and get user confirmation first.",
83
+ "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
80
84
  "- If a command fails, analyze the error and suggest an alternative. Do not retry the identical command more than twice.",
81
85
  "",
82
86
  "SAFETY:",
83
- "- Commands run with the user's full permissions. Never execute commands that could damage the system, expose credentials, or modify security settings without explicit user request.",
84
- "- Avoid piping untrusted input into shells. Use absolute paths when possible. Quote paths containing spaces."
87
+ "- Commands run with the user's full permissions. Use absolute paths when possible. Quote paths containing spaces."
85
88
  ].join("\n"),
86
89
  {
87
90
  command: z.string().describe("The shell command to execute. Use absolute paths when possible. Quote paths containing spaces."),
88
- timeout_ms: z.number().optional().default(3e4).describe("Maximum execution time in milliseconds (default: 30000). Increase for long-running builds or downloads."),
91
+ timeout_ms: z.number().optional().default(12e4).describe("Maximum execution time in milliseconds (default: 120000). Increase for very long-running builds or downloads."),
89
92
  background: z.boolean().optional().default(false).describe("Run in background without waiting for completion. Use for servers or long-running processes.")
90
93
  },
91
94
  async ({ command, timeout_ms, background }) => {
@@ -572,11 +575,11 @@ var BrowserTools = class {
572
575
  headless: z2.boolean().optional().default(false).describe("Run without visible window (managed mode only). Use for background tasks."),
573
576
  cdpUrl: z2.string().optional().describe("Chrome DevTools Protocol URL for remote-cdp mode (e.g. http://localhost:9222)"),
574
577
  profile: z2.string().optional().describe("Browser profile name for persistent sessions \u2014 preserves cookies, logins, and history across restarts (managed mode only)"),
575
- allowInternal: z2.boolean().optional().default(false).describe("Allow navigation to localhost and internal network URLs")
578
+ allowInternal: z2.boolean().optional().default(true).describe("Allow navigation to localhost and internal network URLs (default: true for local agent)")
576
579
  },
577
580
  ({ mode, headless, cdpUrl, profile, allowInternal }) => this.withLock(async () => {
578
581
  if (this.browser) {
579
- return { content: [{ type: "text", text: "Browser is already running. Call browser_stop first." }] };
582
+ await this.cleanup();
580
583
  }
581
584
  if (mode === "remote-cdp") {
582
585
  if (!cdpUrl) throw new Error("cdpUrl is required for remote-cdp mode");
@@ -1113,7 +1116,11 @@ var DeviceTools = class {
1113
1116
  "Capture a photo from the device's camera and return it as base64 image data.",
1114
1117
  "",
1115
1118
  "Platform-specific: macOS (imagesnap), Windows (ffmpeg/dshow), Linux (fswebcam).",
1116
- "Requires a connected camera with OS permissions granted. If output_path is provided, the file is also saved to disk."
1119
+ "If output_path is provided, the file is also saved to disk.",
1120
+ "",
1121
+ "PERMISSIONS (macOS): Camera permission is needed. If it fails, macOS may show a native Allow/Deny dialog \u2014 ask the user to click Allow.",
1122
+ "If still denied, use execute_command to open Camera settings:",
1123
+ " open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Camera'"
1117
1124
  ].join("\n"),
1118
1125
  {
1119
1126
  output_path: z4.string().optional().describe("File path to save the captured photo. If omitted, returns image data only (temp file auto-cleaned).")
@@ -1131,11 +1138,10 @@ var DeviceTools = class {
1131
1138
  await execAsync3(cmd);
1132
1139
  } catch (err) {
1133
1140
  const e = err;
1141
+ const hint = p === "mac" ? "\n\n\u{1F527} FIX: Camera permission may be needed. Try:\n1. Retry \u2014 macOS may show a native Allow/Deny dialog.\n2. If denied, run via execute_command: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Camera'\nAsk the user to toggle ON for 'imagesnap' (or their terminal app), then retry." : "";
1134
1142
  return {
1135
- content: [{ type: "text", text: `\u274C Camera not found or inaccessible.
1136
- Cause: ${e.message}
1137
-
1138
- Please check if a camera is connected.` }],
1143
+ content: [{ type: "text", text: `\u274C Camera capture failed.
1144
+ Cause: ${e.message}${hint}` }],
1139
1145
  isError: true
1140
1146
  };
1141
1147
  }
@@ -1211,7 +1217,11 @@ Please check if a camera is connected.` }],
1211
1217
  "Start or stop screen recording. Captures the full screen as MP4 video.",
1212
1218
  "",
1213
1219
  "Use action='start' to begin, action='stop' to end and save. Only one recording can be active at a time.",
1214
- "Platform-specific: macOS (screencapture -v), Windows/Linux (ffmpeg)."
1220
+ "Platform-specific: macOS (screencapture -v), Windows/Linux (ffmpeg).",
1221
+ "",
1222
+ "PERMISSIONS (macOS): Screen Recording permission is needed. If denied, run via execute_command:",
1223
+ " open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1224
+ "Toggle ON for 'screencapture' (or your terminal app), then retry."
1215
1225
  ].join("\n"),
1216
1226
  {
1217
1227
  action: z4.enum(["start", "stop"]).describe("'start': begin recording, 'stop': end recording and save the file"),
@@ -1301,10 +1311,28 @@ import { execFile as execFile2 } from "child_process";
1301
1311
  import { promisify as promisify4 } from "util";
1302
1312
  import { platform as platform2 } from "os";
1303
1313
  var execFileAsync2 = promisify4(execFile2);
1314
+ async function requestMacOSPermissions() {
1315
+ try {
1316
+ await execFileAsync2("swift", ["-e", `
1317
+ import CoreGraphics
1318
+ CGRequestScreenCaptureAccess()
1319
+ `], { timeout: 5e3 });
1320
+ } catch {
1321
+ }
1322
+ try {
1323
+ await execFileAsync2("swift", ["-e", `
1324
+ import ApplicationServices
1325
+ let opts = [kAXTrustedCheckOptionPrompt.takeUnretainedValue(): true] as CFDictionary
1326
+ AXIsProcessTrustedWithOptions(opts)
1327
+ `], { timeout: 5e3 });
1328
+ } catch {
1329
+ }
1330
+ }
1304
1331
  async function ensurePeekaboo() {
1305
1332
  if (platform2() !== "darwin") return false;
1306
1333
  try {
1307
1334
  await execFileAsync2("which", ["peekaboo"]);
1335
+ await requestMacOSPermissions();
1308
1336
  return true;
1309
1337
  } catch {
1310
1338
  console.log("\u23F3 peekaboo not found, installing via brew...");
@@ -1312,6 +1340,7 @@ async function ensurePeekaboo() {
1312
1340
  await execFileAsync2("brew", ["tap", "steipete/tap"], { timeout: 3e4 });
1313
1341
  await execFileAsync2("brew", ["install", "peekaboo"], { timeout: 12e4 });
1314
1342
  console.log("\u2705 peekaboo installed");
1343
+ await requestMacOSPermissions();
1315
1344
  return true;
1316
1345
  } catch (brewErr) {
1317
1346
  console.warn("\u26A0\uFE0F peekaboo install failed:", brewErr.message);
@@ -1334,6 +1363,17 @@ var APP_BLACKLIST = /* @__PURE__ */ new Set([
1334
1363
  ]);
1335
1364
  var consecutiveFailures = 0;
1336
1365
  var MAX_CONSECUTIVE_FAILURES = 2;
1366
+ var PERM_FIX_HINT = [
1367
+ "\n\n\u{1F527} PERMISSION FIX \u2014 run these via execute_command:",
1368
+ "1. Check status: peekaboo permissions --json-output",
1369
+ "2. Screen Recording: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1370
+ "3. Accessibility: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility'",
1371
+ "Toggle ON for 'peekaboo' in the opened panel, then retry."
1372
+ ].join("\n");
1373
+ function isPermissionError(msg) {
1374
+ const lower = msg.toLowerCase();
1375
+ return lower.includes("permission") || lower.includes("accessibility") || lower.includes("screen recording") || lower.includes("not trusted") || lower.includes("not allowed") || lower.includes("denied");
1376
+ }
1337
1377
  async function peekaboo(args) {
1338
1378
  try {
1339
1379
  const { stdout } = await execa("peekaboo", [...args, "--json-output"]);
@@ -1341,11 +1381,13 @@ async function peekaboo(args) {
1341
1381
  return JSON.parse(stdout);
1342
1382
  } catch (err) {
1343
1383
  consecutiveFailures++;
1384
+ const msg = err.message ?? "";
1385
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1344
1386
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1345
1387
  consecutiveFailures = 0;
1346
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
1388
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1347
1389
  }
1348
- throw err;
1390
+ throw new Error(`${msg}${hint}`);
1349
1391
  }
1350
1392
  }
1351
1393
  function checkBlacklist(app) {
@@ -1360,10 +1402,24 @@ var DesktopTools = class {
1360
1402
  [
1361
1403
  "Capture the macOS Accessibility Tree snapshot for a running application. Returns a structured element list with IDs, roles, labels, and positions.",
1362
1404
  "",
1363
- "WORKFLOW: List running apps \u2192 capture accessibility tree \u2192 find target element by role/label \u2192 interact using element ID or label (click, type, scroll).",
1364
- "Pass the returned snapshotId to subsequent interaction calls for 240x speed improvement (cached lookup vs. full re-scan).",
1405
+ "WHEN TO USE DESKTOP TOOLS:",
1406
+ "When the user asks to interact with, control, or automate ANY macOS application \u2014 use desktop_* tools, NOT execute_command.",
1407
+ "Workflow: desktop_open_app \u2192 desktop_see \u2192 desktop_click/type/paste \u2192 verify with desktop_see or desktop_screenshot.",
1408
+ "",
1409
+ "WORKFLOW TIPS:",
1410
+ "- If accessibility tree times out (complex UI apps like KakaoTalk): use desktop_screenshot + coordinate-based desktop_click instead.",
1411
+ "- For Korean/Japanese/Chinese text input: always use desktop_paste (NOT desktop_type).",
1412
+ "- For multi-window apps: use desktop_list_windows to find specific windows.",
1413
+ "- Pass snapshotId to subsequent calls for 240x speed improvement.",
1365
1414
  "",
1366
- "SAFETY: Terminal, iTerm, and Finder are blocked. Two consecutive failures trigger an automatic safety stop."
1415
+ "PERMISSIONS: Requires Accessibility + Screen Recording for 'peekaboo'.",
1416
+ "If denied, run via execute_command:",
1417
+ " 1. peekaboo permissions --json-output",
1418
+ " 2. open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Accessibility'",
1419
+ " 3. open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1420
+ "Toggle ON for 'peekaboo', then retry.",
1421
+ "",
1422
+ "SAFETY: Terminal, iTerm, and Finder are blocked. Two consecutive failures trigger automatic safety stop."
1367
1423
  ].join("\n"),
1368
1424
  {
1369
1425
  app: z5.string().optional().describe("App name to target (e.g. 'Safari', 'Notes', 'Google Chrome'). Omit for the frontmost app.")
@@ -1397,6 +1453,8 @@ var DesktopTools = class {
1397
1453
  "The 'on' parameter accepts: element label text (e.g. 'Save'), accessibility ID from a previous accessibility tree capture, or coordinates as 'x,y' string.",
1398
1454
  "For faster interaction, pass the snapshotId from a recent accessibility tree capture.",
1399
1455
  "",
1456
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'.",
1457
+ "",
1400
1458
  "SAFETY: Terminal, iTerm, and Finder are blocked. Two consecutive failures trigger automatic safety stop."
1401
1459
  ].join("\n"),
1402
1460
  {
@@ -1423,6 +1481,9 @@ var DesktopTools = class {
1423
1481
  "Type text into the currently focused UI element on macOS. The text is sent as keyboard input character-by-character.",
1424
1482
  "",
1425
1483
  "IMPORTANT: Always capture the accessibility tree first to verify the correct element is focused before typing.",
1484
+ "For Korean/Japanese/Chinese text or emoji, use desktop_paste instead \u2014 keyboard input does not support CJK characters.",
1485
+ "",
1486
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'.",
1426
1487
  "",
1427
1488
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1428
1489
  ].join("\n"),
@@ -1447,6 +1508,8 @@ var DesktopTools = class {
1447
1508
  "",
1448
1509
  "Common shortcuts: 'cmd,c' (copy), 'cmd,v' (paste), 'cmd,z' (undo), 'cmd,s' (save), 'cmd,w' (close tab), 'cmd,q' (quit), 'cmd,shift,t' (reopen tab), 'cmd,tab' (switch app).",
1449
1510
  "",
1511
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'.",
1512
+ "",
1450
1513
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1451
1514
  ].join("\n"),
1452
1515
  {
@@ -1470,6 +1533,8 @@ var DesktopTools = class {
1470
1533
  "",
1471
1534
  "Use 'ticks' to control scroll distance (default: 3, higher = more scrolling). Can target a specific element by label or ID from a previous accessibility tree capture.",
1472
1535
  "",
1536
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'.",
1537
+ "",
1473
1538
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1474
1539
  ].join("\n"),
1475
1540
  {
@@ -1507,11 +1572,13 @@ var DesktopTools = class {
1507
1572
  };
1508
1573
  } catch (err) {
1509
1574
  consecutiveFailures++;
1575
+ const msg = err.message ?? "";
1576
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1510
1577
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1511
1578
  consecutiveFailures = 0;
1512
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
1579
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1513
1580
  }
1514
- throw err;
1581
+ throw new Error(`${msg}${hint}`);
1515
1582
  }
1516
1583
  }
1517
1584
  );
@@ -1521,7 +1588,9 @@ var DesktopTools = class {
1521
1588
  "List all open windows on macOS, optionally filtered by app name. Returns window titles and metadata.",
1522
1589
  "",
1523
1590
  "If no app is specified, lists windows for the frontmost application.",
1524
- "Use this after identifying running apps to find specific windows before capturing the accessibility tree or taking a screenshot."
1591
+ "Use this after identifying running apps to find specific windows before capturing the accessibility tree or taking a screenshot.",
1592
+ "",
1593
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'."
1525
1594
  ].join("\n"),
1526
1595
  {
1527
1596
  app: z5.string().optional().describe("Filter by app name. Omit to query the frontmost app.")
@@ -1545,11 +1614,13 @@ var DesktopTools = class {
1545
1614
  };
1546
1615
  } catch (err) {
1547
1616
  consecutiveFailures++;
1617
+ const msg = err.message ?? "";
1618
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1548
1619
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1549
1620
  consecutiveFailures = 0;
1550
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
1621
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1551
1622
  }
1552
- throw err;
1623
+ throw new Error(`${msg}${hint}`);
1553
1624
  }
1554
1625
  }
1555
1626
  );
@@ -1561,6 +1632,8 @@ var DesktopTools = class {
1561
1632
  "MODES: 'screen' captures the full display, 'window' captures a specific app window.",
1562
1633
  "TIP: Prefer the accessibility tree for understanding UI structure \u2014 use screenshots only when visual appearance matters (layouts, images, colors).",
1563
1634
  "",
1635
+ "PERMISSIONS: Requires macOS Screen Recording permission for 'peekaboo'.",
1636
+ "",
1564
1637
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1565
1638
  ].join("\n"),
1566
1639
  {
@@ -1598,6 +1671,8 @@ var DesktopTools = class {
1598
1671
  "Examples: ['File', 'New Tab'], ['Edit', 'Find', 'Find...'], ['View', 'Enter Full Screen'].",
1599
1672
  "Omit the 'app' parameter to target the frontmost app. The target app must be running.",
1600
1673
  "",
1674
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'.",
1675
+ "",
1601
1676
  "SAFETY: Terminal, iTerm, and Finder are blocked."
1602
1677
  ].join("\n"),
1603
1678
  {
@@ -1616,14 +1691,82 @@ var DesktopTools = class {
1616
1691
  };
1617
1692
  } catch (err) {
1618
1693
  consecutiveFailures++;
1694
+ const msg = err.message ?? "";
1695
+ const hint = isPermissionError(msg) ? PERM_FIX_HINT : "";
1619
1696
  if (consecutiveFailures >= MAX_CONSECUTIVE_FAILURES) {
1620
1697
  consecutiveFailures = 0;
1621
- throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${err.message}`);
1698
+ throw new Error(`peekaboo failed ${MAX_CONSECUTIVE_FAILURES} times in a row. Auto-stopped for safety. Last error: ${msg}${hint}`);
1622
1699
  }
1623
- throw err;
1700
+ throw new Error(`${msg}${hint}`);
1624
1701
  }
1625
1702
  }
1626
1703
  );
1704
+ server.tool(
1705
+ "desktop_paste",
1706
+ [
1707
+ "Paste text via clipboard into the focused element. Use this for Korean, Japanese, Chinese, emoji, or any non-ASCII text.",
1708
+ "",
1709
+ "Unlike desktop_type (which sends keyboard input character-by-character), this uses the system clipboard to paste text, supporting all character sets including CJK and emoji.",
1710
+ "",
1711
+ "PERMISSIONS: Requires macOS Accessibility permission for 'peekaboo'.",
1712
+ "",
1713
+ "SAFETY: Terminal, iTerm, and Finder are blocked."
1714
+ ].join("\n"),
1715
+ {
1716
+ text: z5.string().describe("Text to paste into the focused element (supports Korean, Japanese, Chinese, emoji)"),
1717
+ app: z5.string().optional().describe("App name to focus before pasting")
1718
+ },
1719
+ async ({ text, app }) => {
1720
+ checkBlacklist(app);
1721
+ const args = ["type", "--paste", text];
1722
+ if (app) args.push("--app", app);
1723
+ const result = await peekaboo(args);
1724
+ return {
1725
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
1726
+ };
1727
+ }
1728
+ );
1729
+ server.tool(
1730
+ "desktop_open_app",
1731
+ [
1732
+ "Launch or bring to front a macOS application. Use this as the FIRST STEP when automating any app.",
1733
+ "",
1734
+ "This uses macOS native 'open -a' command. The app will be launched if not running, or brought to front if already running.",
1735
+ "After launching, wait briefly then use desktop_see to capture the accessibility tree.",
1736
+ "",
1737
+ "SAFETY: Terminal, iTerm, and Finder are blocked for automation safety."
1738
+ ].join("\n"),
1739
+ {
1740
+ app: z5.string().describe("Application name to launch (e.g. 'Safari', 'Notes', 'KakaoTalk', 'Google Chrome')")
1741
+ },
1742
+ async ({ app }) => {
1743
+ checkBlacklist(app);
1744
+ await execa("open", ["-a", app]);
1745
+ await new Promise((r) => setTimeout(r, 1500));
1746
+ return {
1747
+ content: [{ type: "text", text: `Launched ${app}` }]
1748
+ };
1749
+ }
1750
+ );
1751
+ server.tool(
1752
+ "desktop_open_url",
1753
+ [
1754
+ "Open a URL in the default browser or a specified app. Also works for file paths and custom URL schemes.",
1755
+ "",
1756
+ "Examples: 'https://google.com', 'file:///path/to/file.html', 'x-apple.systempreferences:...'"
1757
+ ].join("\n"),
1758
+ {
1759
+ url: z5.string().describe("URL to open (https://, file://, or custom scheme)"),
1760
+ app: z5.string().optional().describe("Specific app to open the URL with (e.g. 'Google Chrome', 'Firefox')")
1761
+ },
1762
+ async ({ url, app }) => {
1763
+ const args = app ? ["-a", app, url] : [url];
1764
+ await execa("open", args);
1765
+ return {
1766
+ content: [{ type: "text", text: `Opened: ${url}` }]
1767
+ };
1768
+ }
1769
+ );
1627
1770
  }
1628
1771
  };
1629
1772
 
@@ -43,14 +43,17 @@ var toolPermissions = {
43
43
  desktop_hotkey: "confirm",
44
44
  desktop_scroll: "confirm",
45
45
  desktop_menu: "confirm",
46
+ desktop_paste: "confirm",
46
47
  desktop_screenshot: "confirm",
48
+ desktop_open_app: "auto",
49
+ desktop_open_url: "auto",
47
50
  cron_create: "confirm",
48
51
  cron_delete: "confirm",
49
52
  edit_block: "confirm",
50
53
  kill_process: "confirm",
51
- // 시스템 변경 — 기본 차단 (PDF 7.3절)
52
- execute_command: "deny",
53
- write_file: "deny"
54
+ // 시스템 변경 — 대화 기반 승인 (confirm)
55
+ execute_command: "confirm",
56
+ write_file: "confirm"
54
57
  };
55
58
  function checkPermission(toolName) {
56
59
  const level = toolPermissions[toolName];
@@ -74,19 +77,19 @@ var FilesystemTools = class {
74
77
  "ROUTING:",
75
78
  "- Use for system commands, package managers (npm, pip, brew), git, build tools, and scripting.",
76
79
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
80
+ "- NOT for macOS app GUI interaction. When the user asks to interact with, control, or automate any application (clicking, typing, reading screen, navigating menus), use the desktop_* tools instead (desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_menu, desktop_screenshot).",
81
+ "- The ONLY exception: opening System Preferences URLs for permissions (e.g. open 'x-apple.systempreferences:...').",
77
82
  "",
78
83
  "BEHAVIOR:",
79
- "- Safe, routine commands (ls, pwd, git status, echo): execute immediately without explanation.",
80
- "- Destructive or irreversible commands (rm -rf, sudo, shutdown, mkfs): explain what will happen and get user confirmation first.",
84
+ "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
81
85
  "- If a command fails, analyze the error and suggest an alternative. Do not retry the identical command more than twice.",
82
86
  "",
83
87
  "SAFETY:",
84
- "- Commands run with the user's full permissions. Never execute commands that could damage the system, expose credentials, or modify security settings without explicit user request.",
85
- "- Avoid piping untrusted input into shells. Use absolute paths when possible. Quote paths containing spaces."
88
+ "- Commands run with the user's full permissions. Use absolute paths when possible. Quote paths containing spaces."
86
89
  ].join("\n"),
87
90
  {
88
91
  command: z.string().describe("The shell command to execute. Use absolute paths when possible. Quote paths containing spaces."),
89
- timeout_ms: z.number().optional().default(3e4).describe("Maximum execution time in milliseconds (default: 30000). Increase for long-running builds or downloads."),
92
+ timeout_ms: z.number().optional().default(12e4).describe("Maximum execution time in milliseconds (default: 120000). Increase for very long-running builds or downloads."),
90
93
  background: z.boolean().optional().default(false).describe("Run in background without waiting for completion. Use for servers or long-running processes.")
91
94
  },
92
95
  async ({ command, timeout_ms, background }) => {
@@ -573,11 +576,11 @@ var BrowserTools = class {
573
576
  headless: z2.boolean().optional().default(false).describe("Run without visible window (managed mode only). Use for background tasks."),
574
577
  cdpUrl: z2.string().optional().describe("Chrome DevTools Protocol URL for remote-cdp mode (e.g. http://localhost:9222)"),
575
578
  profile: z2.string().optional().describe("Browser profile name for persistent sessions \u2014 preserves cookies, logins, and history across restarts (managed mode only)"),
576
- allowInternal: z2.boolean().optional().default(false).describe("Allow navigation to localhost and internal network URLs")
579
+ allowInternal: z2.boolean().optional().default(true).describe("Allow navigation to localhost and internal network URLs (default: true for local agent)")
577
580
  },
578
581
  ({ mode, headless, cdpUrl, profile, allowInternal }) => this.withLock(async () => {
579
582
  if (this.browser) {
580
- return { content: [{ type: "text", text: "Browser is already running. Call browser_stop first." }] };
583
+ await this.cleanup();
581
584
  }
582
585
  if (mode === "remote-cdp") {
583
586
  if (!cdpUrl) throw new Error("cdpUrl is required for remote-cdp mode");
@@ -1114,7 +1117,11 @@ var DeviceTools = class {
1114
1117
  "Capture a photo from the device's camera and return it as base64 image data.",
1115
1118
  "",
1116
1119
  "Platform-specific: macOS (imagesnap), Windows (ffmpeg/dshow), Linux (fswebcam).",
1117
- "Requires a connected camera with OS permissions granted. If output_path is provided, the file is also saved to disk."
1120
+ "If output_path is provided, the file is also saved to disk.",
1121
+ "",
1122
+ "PERMISSIONS (macOS): Camera permission is needed. If it fails, macOS may show a native Allow/Deny dialog \u2014 ask the user to click Allow.",
1123
+ "If still denied, use execute_command to open Camera settings:",
1124
+ " open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Camera'"
1118
1125
  ].join("\n"),
1119
1126
  {
1120
1127
  output_path: z4.string().optional().describe("File path to save the captured photo. If omitted, returns image data only (temp file auto-cleaned).")
@@ -1132,11 +1139,10 @@ var DeviceTools = class {
1132
1139
  await execAsync3(cmd);
1133
1140
  } catch (err) {
1134
1141
  const e = err;
1142
+ const hint = p === "mac" ? "\n\n\u{1F527} FIX: Camera permission may be needed. Try:\n1. Retry \u2014 macOS may show a native Allow/Deny dialog.\n2. If denied, run via execute_command: open 'x-apple.systempreferences:com.apple.preference.security?Privacy_Camera'\nAsk the user to toggle ON for 'imagesnap' (or their terminal app), then retry." : "";
1135
1143
  return {
1136
- content: [{ type: "text", text: `\u274C Camera not found or inaccessible.
1137
- Cause: ${e.message}
1138
-
1139
- Please check if a camera is connected.` }],
1144
+ content: [{ type: "text", text: `\u274C Camera capture failed.
1145
+ Cause: ${e.message}${hint}` }],
1140
1146
  isError: true
1141
1147
  };
1142
1148
  }
@@ -1212,7 +1218,11 @@ Please check if a camera is connected.` }],
1212
1218
  "Start or stop screen recording. Captures the full screen as MP4 video.",
1213
1219
  "",
1214
1220
  "Use action='start' to begin, action='stop' to end and save. Only one recording can be active at a time.",
1215
- "Platform-specific: macOS (screencapture -v), Windows/Linux (ffmpeg)."
1221
+ "Platform-specific: macOS (screencapture -v), Windows/Linux (ffmpeg).",
1222
+ "",
1223
+ "PERMISSIONS (macOS): Screen Recording permission is needed. If denied, run via execute_command:",
1224
+ " open 'x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture'",
1225
+ "Toggle ON for 'screencapture' (or your terminal app), then retry."
1216
1226
  ].join("\n"),
1217
1227
  {
1218
1228
  action: z4.enum(["start", "stop"]).describe("'start': begin recording, 'stop': end recording and save the file"),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "junis",
3
- "version": "0.3.9",
3
+ "version": "0.3.11",
4
4
  "description": "One-line device control for AI agents",
5
5
  "type": "module",
6
6
  "bin": {