junis 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -190,13 +190,16 @@ async function uploadLargeFile(relay, base64Data, filename, contentType) {
190
190
  function isLargeBase64(base64) {
191
191
  return base64.length * 0.75 > LARGE_FILE_THRESHOLD;
192
192
  }
193
- function detectContentType(base64) {
194
- const header = base64.slice(0, 16);
195
- if (header.startsWith("/9j/")) return "image/jpeg";
196
- if (header.startsWith("iVBOR")) return "image/png";
197
- if (header.startsWith("R0lGO")) return "image/gif";
198
- if (header.startsWith("UklGR")) return "image/webp";
199
- if (header.startsWith("JVBER")) return "application/pdf";
193
+ function detectContentType(buffer) {
194
+ if (buffer[0] === 137 && buffer[1] === 80 && buffer[2] === 78 && buffer[3] === 71) return "image/png";
195
+ if (buffer[0] === 255 && buffer[1] === 216 && buffer[2] === 255) return "image/jpeg";
196
+ if (buffer[0] === 71 && buffer[1] === 73 && buffer[2] === 70 && buffer[3] === 56) return "image/gif";
197
+ if (buffer[0] === 37 && buffer[1] === 80 && buffer[2] === 68 && buffer[3] === 70) return "application/pdf";
198
+ if (buffer[0] === 82 && buffer[1] === 73 && buffer[2] === 70 && buffer[3] === 70 && buffer[8] === 87 && buffer[9] === 69 && buffer[10] === 66 && buffer[11] === 80) return "image/webp";
199
+ if (buffer[4] === 102 && buffer[5] === 116 && buffer[6] === 121 && buffer[7] === 112) return "video/mp4";
200
+ if (buffer[0] === 82 && buffer[1] === 73 && buffer[2] === 70 && buffer[3] === 70 && buffer[8] === 87 && buffer[9] === 65 && buffer[10] === 86 && buffer[11] === 69) return "audio/wav";
201
+ if (buffer[0] === 73 && buffer[1] === 68 && buffer[2] === 51) return "audio/mpeg";
202
+ if (buffer[0] === 255 && (buffer[1] === 251 || buffer[1] === 243 || buffer[1] === 242)) return "audio/mpeg";
200
203
  return "application/octet-stream";
201
204
  }
202
205
 
@@ -222,6 +225,14 @@ var RelayClient = class {
222
225
  heartbeatTimer = null;
223
226
  destroyed = false;
224
227
  lastPongTime = 0;
228
+ _currentRequestId = null;
229
+ _currentSessionId = null;
230
+ get currentRequestId() {
231
+ return this._currentRequestId;
232
+ }
233
+ get currentSessionId() {
234
+ return this._currentSessionId;
235
+ }
225
236
  // upload_url_response 대기용 pending 맵
226
237
  pendingUploadRequests = /* @__PURE__ */ new Map();
227
238
  // signed_url_response 대기용 pending 맵
@@ -274,11 +285,17 @@ var RelayClient = class {
274
285
  return;
275
286
  }
276
287
  if (msg.type === "mcp_request") {
288
+ this._currentRequestId = msg.id;
289
+ this._currentSessionId = msg.session_id || null;
277
290
  try {
278
291
  let result = await this.onMCPRequest(msg.id, msg.payload);
292
+ this._currentRequestId = null;
293
+ this._currentSessionId = null;
279
294
  result = await this.processLargeFiles(result);
280
295
  this.send({ type: "mcp_response", id: msg.id, payload: result });
281
296
  } catch (err) {
297
+ this._currentRequestId = null;
298
+ this._currentSessionId = null;
282
299
  this.send({
283
300
  type: "mcp_response",
284
301
  id: msg.id,
@@ -331,6 +348,14 @@ var RelayClient = class {
331
348
  this.ws.send(JSON.stringify(data));
332
349
  }
333
350
  }
351
+ sendProgress(requestId, line) {
352
+ this.send({
353
+ type: "mcp_stdout",
354
+ request_id: requestId,
355
+ session_id: this._currentSessionId || "",
356
+ line
357
+ });
358
+ }
334
359
  /**
335
360
  * 서버에 presigned PUT URL 요청.
336
361
  * WebSocket으로 upload_url_request 전송 → upload_url_response 대기.
@@ -433,7 +458,8 @@ var RelayClient = class {
433
458
  }
434
459
  } else if (item.type === "text" && typeof item.text === "string" && isLargeBase64(item.text) && /^[A-Za-z0-9+/\n\r]+=*$/.test(item.text.trim())) {
435
460
  try {
436
- const contentType = detectContentType(item.text);
461
+ const buffer = Buffer.from(item.text, "base64");
462
+ const contentType = detectContentType(buffer);
437
463
  if (contentType === "application/octet-stream") {
438
464
  content[i] = { type: "text", text: "[Binary file detected. Use the share_file tool to provide a download link to the user. Do not display this message.]" };
439
465
  continue;
@@ -443,7 +469,8 @@ var RelayClient = class {
443
469
  content[i] = { type: "text", text: url };
444
470
  } catch (err) {
445
471
  console.error("Failed to upload large text base64:", err);
446
- const contentType = detectContentType(item.text);
472
+ const buffer = Buffer.from(item.text, "base64");
473
+ const contentType = detectContentType(buffer);
447
474
  const ext = contentType.split("/")[1] || "bin";
448
475
  item.text = `[\uD30C\uC77C \uC5C5\uB85C\uB4DC \uC2E4\uD328: ${String(err)}. \uD30C\uC77C\uBA85: file.${ext}]`;
449
476
  }
@@ -488,7 +515,7 @@ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/
488
515
  import { createServer } from "http";
489
516
 
490
517
  // src/tools/filesystem.ts
491
- import { exec, execFile } from "child_process";
518
+ import { exec, execFile, spawn } from "child_process";
492
519
  import { promisify } from "util";
493
520
  import fs2 from "fs/promises";
494
521
  import path2 from "path";
@@ -559,7 +586,9 @@ function checkPermission(toolName) {
559
586
  var execAsync = promisify(exec);
560
587
  var execFileAsync = promisify(execFile);
561
588
  var FilesystemTools = class {
562
- register(server) {
589
+ relayClient;
590
+ register(server, relayClient) {
591
+ this.relayClient = relayClient;
563
592
  server.tool(
564
593
  "execute_command",
565
594
  [
@@ -570,9 +599,9 @@ var FilesystemTools = class {
570
599
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
571
600
  "- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
572
601
  "- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
573
- "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size for click/move/drag.",
602
+ "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
574
603
  " Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
575
- " This is the most reliable way to get pixel-accurate coordinates on macOS.",
604
+ " This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
576
605
  "",
577
606
  "BEHAVIOR:",
578
607
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
@@ -595,26 +624,84 @@ var FilesystemTools = class {
595
624
  exec(command);
596
625
  return { content: [{ type: "text", text: "Background execution started" }] };
597
626
  }
598
- try {
599
- const { stdout, stderr } = await execAsync(command, {
627
+ const requestId = this.relayClient?.currentRequestId ?? null;
628
+ if (requestId) {
629
+ this.relayClient?.sendProgress(requestId, `$ ${command}`);
630
+ }
631
+ return new Promise((resolve) => {
632
+ const child = spawn("sh", ["-c", command], {
600
633
  timeout: timeout_ms
601
634
  });
602
- return {
603
- content: [{ type: "text", text: stdout || stderr || "(no output)" }]
604
- };
605
- } catch (err) {
606
- const error = err;
607
- return {
608
- content: [
609
- {
610
- type: "text",
611
- text: `Error (exit ${error.code ?? "?"}): ${error.message}
612
- ${error.stderr ?? ""}`
635
+ let stdoutBuf = "";
636
+ let stderrBuf = "";
637
+ let stdoutLineBuffer = "";
638
+ let stderrLineBuffer = "";
639
+ const flushLines = (buffer, newChunk) => {
640
+ const combined = buffer + newChunk;
641
+ const lines = combined.split("\n");
642
+ const incomplete = lines.pop() ?? "";
643
+ for (const line of lines) {
644
+ if (requestId) {
645
+ this.relayClient?.sendProgress(requestId, line);
613
646
  }
614
- ],
615
- isError: true
647
+ }
648
+ return incomplete;
616
649
  };
617
- }
650
+ child.stdout.on("data", (chunk) => {
651
+ const text = chunk.toString();
652
+ stdoutBuf += text;
653
+ stdoutLineBuffer = flushLines(stdoutLineBuffer, text);
654
+ });
655
+ child.stderr.on("data", (chunk) => {
656
+ const text = chunk.toString();
657
+ stderrBuf += text;
658
+ stderrLineBuffer = flushLines(stderrLineBuffer, text);
659
+ });
660
+ child.on("close", (code, signal) => {
661
+ if (stdoutLineBuffer && requestId) {
662
+ this.relayClient?.sendProgress(requestId, stdoutLineBuffer);
663
+ }
664
+ if (stderrLineBuffer && requestId) {
665
+ this.relayClient?.sendProgress(requestId, stderrLineBuffer);
666
+ }
667
+ if (signal) {
668
+ resolve({
669
+ content: [
670
+ {
671
+ type: "text",
672
+ text: `Killed by signal ${signal} (timeout: ${timeout_ms}ms)`
673
+ }
674
+ ],
675
+ isError: true
676
+ });
677
+ } else if (code !== 0 && code !== null) {
678
+ resolve({
679
+ content: [
680
+ {
681
+ type: "text",
682
+ text: `Error (exit ${code}): ${stderrBuf || stdoutBuf || "(no output)"}`
683
+ }
684
+ ],
685
+ isError: true
686
+ });
687
+ } else {
688
+ resolve({
689
+ content: [{ type: "text", text: stdoutBuf || stderrBuf || "(no output)" }]
690
+ });
691
+ }
692
+ });
693
+ child.on("error", (err) => {
694
+ resolve({
695
+ content: [
696
+ {
697
+ type: "text",
698
+ text: `Error: ${err.message}`
699
+ }
700
+ ],
701
+ isError: true
702
+ });
703
+ });
704
+ });
618
705
  }
619
706
  );
620
707
  server.tool(
@@ -1782,14 +1869,14 @@ Cause: ${e.message}${hint}` }],
1782
1869
  },
1783
1870
  async ({ text }) => {
1784
1871
  const p = platform();
1785
- const { spawn: spawn2 } = await import("child_process");
1872
+ const { spawn: spawn3 } = await import("child_process");
1786
1873
  const cmd = {
1787
1874
  mac: { bin: "pbcopy", args: [] },
1788
1875
  win: { bin: "powershell", args: ["-Command", "$input | Set-Clipboard"] },
1789
1876
  linux: { bin: "xclip", args: ["-selection", "clipboard"] }
1790
1877
  }[p];
1791
1878
  await new Promise((resolve, reject) => {
1792
- const proc = spawn2(cmd.bin, cmd.args, { stdio: ["pipe", "ignore", "ignore"] });
1879
+ const proc = spawn3(cmd.bin, cmd.args, { stdio: ["pipe", "ignore", "ignore"] });
1793
1880
  proc.on("error", reject);
1794
1881
  proc.on("close", (code) => code === 0 ? resolve() : reject(new Error(`${cmd.bin} exited ${code}`)));
1795
1882
  proc.stdin.end(text);
@@ -1821,9 +1908,9 @@ Cause: ${e.message}${hint}` }],
1821
1908
  return { content: [{ type: "text", text: "Already recording." }] };
1822
1909
  }
1823
1910
  const tmpPath = output_path ?? `/tmp/junis_record_${Date.now()}.mp4`;
1824
- const { spawn: spawn2 } = await import("child_process");
1911
+ const { spawn: spawn3 } = await import("child_process");
1825
1912
  const cmd = p === "mac" ? ["screencapture", ["-v", tmpPath]] : ["ffmpeg", ["-f", p === "win" ? "gdigrab" : "x11grab", "-i", p === "win" ? "desktop" : ":0.0", tmpPath]];
1826
- const child = spawn2(cmd[0], cmd[1], { detached: true, stdio: "ignore" });
1913
+ const child = spawn3(cmd[0], cmd[1], { detached: true, stdio: "ignore" });
1827
1914
  child.unref();
1828
1915
  screenRecordPid = child.pid ?? null;
1829
1916
  return { content: [{ type: "text", text: `Recording started. Output path: ${tmpPath} (PID: ${screenRecordPid})` }] };
@@ -2310,9 +2397,10 @@ var DesktopTools = class {
2310
2397
  server.tool(
2311
2398
  "desktop_screenshot",
2312
2399
  [
2313
- "Take a screenshot. Returns base64 image at logical resolution (1:1 with click coordinate system).",
2314
- "Use for visual context, verify UI state, or locate elements when query-based methods fail.",
2315
- "For clicking, prefer desktop_click(query, app) which auto-resolves coords. If that fails, use osascript or visual estimation from this image as fallback."
2400
+ "Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
2401
+ "Use for visual context or to verify UI state ONLY.",
2402
+ "WARNING: NEVER estimate click coordinates from screenshot images \u2014 visual estimation causes misclicks. Use desktop_click(query, app) for auto-resolution, or execute_command + osascript for exact coords.",
2403
+ "Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
2316
2404
  ].join("\n"),
2317
2405
  {
2318
2406
  app: z5.string().optional().describe("Capture specific app window"),
@@ -2352,7 +2440,7 @@ var DesktopTools = class {
2352
2440
  "Click a UI element by text label (query), AX role (role), coordinates (coords), or element ID (on).",
2353
2441
  "BEST: Use query with app \u2014 auto-resolves coords via accessibility API (L1-L3 depth), then falls back to full tree search for deeply nested elements. No desktop_see needed.",
2354
2442
  "ROLE: Use role (e.g. 'AXTextArea', 'AXTextField') to find elements without visible text labels (input fields, text areas). Searches L1-L3 depth.",
2355
- "GOOD: Use coords 'x,y' \u2014 best from osascript center calculation (position + size/2), or from screenshot if osascript is unavailable.",
2443
+ "GOOD: Use coords 'x,y' \u2014 MUST be from osascript center calculation (position + size/2). NEVER estimate from screenshots.",
2356
2444
  "CAUTION: 'on' (element ID) has known offset bug \u2014 use query, role, or coords instead.",
2357
2445
  "If not found: try desktop_menu for menu items, desktop_hotkey for shortcuts.",
2358
2446
  "NOTE: Web page elements (inside browser) are invisible \u2014 use browser_* tools instead."
@@ -2362,7 +2450,7 @@ var DesktopTools = class {
2362
2450
  role: z5.string().optional().describe("AX role to find (e.g. 'AXTextArea', 'AXTextField', 'AXButton'). Use when element has no text label."),
2363
2451
  on: z5.string().optional().describe("Element ID from desktop_see (e.g. 'B1', 'T2')"),
2364
2452
  coords: z5.string().optional().describe("Screen coordinates 'x,y' (e.g. '500,300')"),
2365
- app: z5.string().optional().describe("App name (always specify for faster resolution)"),
2453
+ app: z5.string().optional().describe("App name"),
2366
2454
  snapshot: z5.string().optional().describe("Snapshot ID from desktop_see"),
2367
2455
  doubleClick: z5.boolean().optional().default(false).describe("Double-click"),
2368
2456
  rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)"),
@@ -2513,7 +2601,7 @@ var DesktopTools = class {
2513
2601
  "desktop_move",
2514
2602
  [
2515
2603
  "Move mouse cursor without clicking. Use before scroll or to hover.",
2516
- "For coords, prefer osascript (position + size/2) or use 'to' with text label for auto-resolution."
2604
+ "Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
2517
2605
  ].join("\n"),
2518
2606
  {
2519
2607
  coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
@@ -2550,7 +2638,7 @@ var DesktopTools = class {
2550
2638
  [
2551
2639
  "Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
2552
2640
  "Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
2553
- "For coords, prefer osascript (position + size/2). If unavailable, screenshot-based estimation is acceptable."
2641
+ "Before using fromCoords/toCoords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
2554
2642
  ].join("\n"),
2555
2643
  {
2556
2644
  from: z5.string().optional().describe("Source element ID from desktop_see"),
@@ -2760,13 +2848,17 @@ var DesktopTools = class {
2760
2848
  var mcpPort = 3e3;
2761
2849
  var globalBrowserTools = null;
2762
2850
  var desktopToolsEnabled = false;
2851
+ var globalRelayClient = null;
2852
+ function setRelayClient(client) {
2853
+ globalRelayClient = client;
2854
+ }
2763
2855
  function createMcpServer() {
2764
2856
  const server = new McpServer({
2765
2857
  name: "junis",
2766
2858
  version: "0.1.0"
2767
2859
  });
2768
2860
  const fsTools = new FilesystemTools();
2769
- fsTools.register(server);
2861
+ fsTools.register(server, globalRelayClient ?? void 0);
2770
2862
  if (globalBrowserTools) {
2771
2863
  globalBrowserTools.register(server);
2772
2864
  }
@@ -3089,7 +3181,7 @@ import { createRequire } from "module";
3089
3181
  import fs6 from "fs";
3090
3182
  import path3 from "path";
3091
3183
  import os2 from "os";
3092
- import { execSync, spawn } from "child_process";
3184
+ import { execSync, spawn as spawn2 } from "child_process";
3093
3185
  var CONFIG_DIR2 = path3.join(os2.homedir(), ".junis");
3094
3186
  var PID_FILE = path3.join(CONFIG_DIR2, "junis.pid");
3095
3187
  var LOG_DIR = path3.join(CONFIG_DIR2, "logs");
@@ -3127,7 +3219,7 @@ function startDaemon(port) {
3127
3219
  const scriptPath = process.argv[1];
3128
3220
  const out = fs6.openSync(LOG_FILE, "a");
3129
3221
  const err = fs6.openSync(LOG_FILE, "a");
3130
- const child = spawn(nodePath, [scriptPath, "start", "--daemon", "--port", String(port)], {
3222
+ const child = spawn2(nodePath, [scriptPath, "start", "--daemon", "--port", String(port)], {
3131
3223
  detached: true,
3132
3224
  stdio: ["ignore", out, err],
3133
3225
  env: { ...process.env }
@@ -3366,6 +3458,7 @@ async function runForeground(config, port) {
3366
3458
  process.exit(1);
3367
3459
  }
3368
3460
  });
3461
+ setRelayClient(relay);
3369
3462
  await relay.connect();
3370
3463
  const webUrl = process.env.JUNIS_WEB_URL ?? "https://junis.ai";
3371
3464
  console.log(" \u25C9 Relay connected");
@@ -3584,6 +3677,7 @@ program.command("start", { isDefault: true }).description("Start Junis agent con
3584
3677
  process.exit(1);
3585
3678
  }
3586
3679
  });
3680
+ setRelayClient(relay);
3587
3681
  await relay.connect();
3588
3682
  console.log("[junis daemon] relay connected");
3589
3683
  return;
@@ -4,7 +4,7 @@ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/
4
4
  import { createServer } from "http";
5
5
 
6
6
  // src/tools/filesystem.ts
7
- import { exec, execFile } from "child_process";
7
+ import { exec, execFile, spawn } from "child_process";
8
8
  import { promisify } from "util";
9
9
  import fs from "fs/promises";
10
10
  import path from "path";
@@ -75,7 +75,9 @@ function checkPermission(toolName) {
75
75
  var execAsync = promisify(exec);
76
76
  var execFileAsync = promisify(execFile);
77
77
  var FilesystemTools = class {
78
- register(server) {
78
+ relayClient;
79
+ register(server, relayClient) {
80
+ this.relayClient = relayClient;
79
81
  server.tool(
80
82
  "execute_command",
81
83
  [
@@ -86,9 +88,9 @@ var FilesystemTools = class {
86
88
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
87
89
  "- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
88
90
  "- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
89
- "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size for click/move/drag.",
91
+ "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
90
92
  " Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
91
- " This is the most reliable way to get pixel-accurate coordinates on macOS.",
93
+ " This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
92
94
  "",
93
95
  "BEHAVIOR:",
94
96
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
@@ -111,26 +113,84 @@ var FilesystemTools = class {
111
113
  exec(command);
112
114
  return { content: [{ type: "text", text: "Background execution started" }] };
113
115
  }
114
- try {
115
- const { stdout, stderr } = await execAsync(command, {
116
+ const requestId = this.relayClient?.currentRequestId ?? null;
117
+ if (requestId) {
118
+ this.relayClient?.sendProgress(requestId, `$ ${command}`);
119
+ }
120
+ return new Promise((resolve) => {
121
+ const child = spawn("sh", ["-c", command], {
116
122
  timeout: timeout_ms
117
123
  });
118
- return {
119
- content: [{ type: "text", text: stdout || stderr || "(no output)" }]
120
- };
121
- } catch (err) {
122
- const error = err;
123
- return {
124
- content: [
125
- {
126
- type: "text",
127
- text: `Error (exit ${error.code ?? "?"}): ${error.message}
128
- ${error.stderr ?? ""}`
124
+ let stdoutBuf = "";
125
+ let stderrBuf = "";
126
+ let stdoutLineBuffer = "";
127
+ let stderrLineBuffer = "";
128
+ const flushLines = (buffer, newChunk) => {
129
+ const combined = buffer + newChunk;
130
+ const lines = combined.split("\n");
131
+ const incomplete = lines.pop() ?? "";
132
+ for (const line of lines) {
133
+ if (requestId) {
134
+ this.relayClient?.sendProgress(requestId, line);
129
135
  }
130
- ],
131
- isError: true
136
+ }
137
+ return incomplete;
132
138
  };
133
- }
139
+ child.stdout.on("data", (chunk) => {
140
+ const text = chunk.toString();
141
+ stdoutBuf += text;
142
+ stdoutLineBuffer = flushLines(stdoutLineBuffer, text);
143
+ });
144
+ child.stderr.on("data", (chunk) => {
145
+ const text = chunk.toString();
146
+ stderrBuf += text;
147
+ stderrLineBuffer = flushLines(stderrLineBuffer, text);
148
+ });
149
+ child.on("close", (code, signal) => {
150
+ if (stdoutLineBuffer && requestId) {
151
+ this.relayClient?.sendProgress(requestId, stdoutLineBuffer);
152
+ }
153
+ if (stderrLineBuffer && requestId) {
154
+ this.relayClient?.sendProgress(requestId, stderrLineBuffer);
155
+ }
156
+ if (signal) {
157
+ resolve({
158
+ content: [
159
+ {
160
+ type: "text",
161
+ text: `Killed by signal ${signal} (timeout: ${timeout_ms}ms)`
162
+ }
163
+ ],
164
+ isError: true
165
+ });
166
+ } else if (code !== 0 && code !== null) {
167
+ resolve({
168
+ content: [
169
+ {
170
+ type: "text",
171
+ text: `Error (exit ${code}): ${stderrBuf || stdoutBuf || "(no output)"}`
172
+ }
173
+ ],
174
+ isError: true
175
+ });
176
+ } else {
177
+ resolve({
178
+ content: [{ type: "text", text: stdoutBuf || stderrBuf || "(no output)" }]
179
+ });
180
+ }
181
+ });
182
+ child.on("error", (err) => {
183
+ resolve({
184
+ content: [
185
+ {
186
+ type: "text",
187
+ text: `Error: ${err.message}`
188
+ }
189
+ ],
190
+ isError: true
191
+ });
192
+ });
193
+ });
134
194
  }
135
195
  );
136
196
  server.tool(
@@ -1298,14 +1358,14 @@ Cause: ${e.message}${hint}` }],
1298
1358
  },
1299
1359
  async ({ text }) => {
1300
1360
  const p = platform();
1301
- const { spawn } = await import("child_process");
1361
+ const { spawn: spawn2 } = await import("child_process");
1302
1362
  const cmd = {
1303
1363
  mac: { bin: "pbcopy", args: [] },
1304
1364
  win: { bin: "powershell", args: ["-Command", "$input | Set-Clipboard"] },
1305
1365
  linux: { bin: "xclip", args: ["-selection", "clipboard"] }
1306
1366
  }[p];
1307
1367
  await new Promise((resolve, reject) => {
1308
- const proc = spawn(cmd.bin, cmd.args, { stdio: ["pipe", "ignore", "ignore"] });
1368
+ const proc = spawn2(cmd.bin, cmd.args, { stdio: ["pipe", "ignore", "ignore"] });
1309
1369
  proc.on("error", reject);
1310
1370
  proc.on("close", (code) => code === 0 ? resolve() : reject(new Error(`${cmd.bin} exited ${code}`)));
1311
1371
  proc.stdin.end(text);
@@ -1337,9 +1397,9 @@ Cause: ${e.message}${hint}` }],
1337
1397
  return { content: [{ type: "text", text: "Already recording." }] };
1338
1398
  }
1339
1399
  const tmpPath = output_path ?? `/tmp/junis_record_${Date.now()}.mp4`;
1340
- const { spawn } = await import("child_process");
1400
+ const { spawn: spawn2 } = await import("child_process");
1341
1401
  const cmd = p === "mac" ? ["screencapture", ["-v", tmpPath]] : ["ffmpeg", ["-f", p === "win" ? "gdigrab" : "x11grab", "-i", p === "win" ? "desktop" : ":0.0", tmpPath]];
1342
- const child = spawn(cmd[0], cmd[1], { detached: true, stdio: "ignore" });
1402
+ const child = spawn2(cmd[0], cmd[1], { detached: true, stdio: "ignore" });
1343
1403
  child.unref();
1344
1404
  screenRecordPid = child.pid ?? null;
1345
1405
  return { content: [{ type: "text", text: `Recording started. Output path: ${tmpPath} (PID: ${screenRecordPid})` }] };
@@ -1826,9 +1886,10 @@ var DesktopTools = class {
1826
1886
  server.tool(
1827
1887
  "desktop_screenshot",
1828
1888
  [
1829
- "Take a screenshot. Returns base64 image at logical resolution (1:1 with click coordinate system).",
1830
- "Use for visual context, verify UI state, or locate elements when query-based methods fail.",
1831
- "For clicking, prefer desktop_click(query, app) which auto-resolves coords. If that fails, use osascript or visual estimation from this image as fallback."
1889
+ "Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
1890
+ "Use for visual context or to verify UI state ONLY.",
1891
+ "WARNING: NEVER estimate click coordinates from screenshot images \u2014 visual estimation causes misclicks. Use desktop_click(query, app) for auto-resolution, or execute_command + osascript for exact coords.",
1892
+ "Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
1832
1893
  ].join("\n"),
1833
1894
  {
1834
1895
  app: z5.string().optional().describe("Capture specific app window"),
@@ -1868,7 +1929,7 @@ var DesktopTools = class {
1868
1929
  "Click a UI element by text label (query), AX role (role), coordinates (coords), or element ID (on).",
1869
1930
  "BEST: Use query with app \u2014 auto-resolves coords via accessibility API (L1-L3 depth), then falls back to full tree search for deeply nested elements. No desktop_see needed.",
1870
1931
  "ROLE: Use role (e.g. 'AXTextArea', 'AXTextField') to find elements without visible text labels (input fields, text areas). Searches L1-L3 depth.",
1871
- "GOOD: Use coords 'x,y' \u2014 best from osascript center calculation (position + size/2), or from screenshot if osascript is unavailable.",
1932
+ "GOOD: Use coords 'x,y' \u2014 MUST be from osascript center calculation (position + size/2). NEVER estimate from screenshots.",
1872
1933
  "CAUTION: 'on' (element ID) has known offset bug \u2014 use query, role, or coords instead.",
1873
1934
  "If not found: try desktop_menu for menu items, desktop_hotkey for shortcuts.",
1874
1935
  "NOTE: Web page elements (inside browser) are invisible \u2014 use browser_* tools instead."
@@ -1878,7 +1939,7 @@ var DesktopTools = class {
1878
1939
  role: z5.string().optional().describe("AX role to find (e.g. 'AXTextArea', 'AXTextField', 'AXButton'). Use when element has no text label."),
1879
1940
  on: z5.string().optional().describe("Element ID from desktop_see (e.g. 'B1', 'T2')"),
1880
1941
  coords: z5.string().optional().describe("Screen coordinates 'x,y' (e.g. '500,300')"),
1881
- app: z5.string().optional().describe("App name (always specify for faster resolution)"),
1942
+ app: z5.string().optional().describe("App name"),
1882
1943
  snapshot: z5.string().optional().describe("Snapshot ID from desktop_see"),
1883
1944
  doubleClick: z5.boolean().optional().default(false).describe("Double-click"),
1884
1945
  rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)"),
@@ -2029,7 +2090,7 @@ var DesktopTools = class {
2029
2090
  "desktop_move",
2030
2091
  [
2031
2092
  "Move mouse cursor without clicking. Use before scroll or to hover.",
2032
- "For coords, prefer osascript (position + size/2) or use 'to' with text label for auto-resolution."
2093
+ "Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
2033
2094
  ].join("\n"),
2034
2095
  {
2035
2096
  coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
@@ -2066,7 +2127,7 @@ var DesktopTools = class {
2066
2127
  [
2067
2128
  "Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
2068
2129
  "Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
2069
- "For coords, prefer osascript (position + size/2). If unavailable, screenshot-based estimation is acceptable."
2130
+ "Before using fromCoords/toCoords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
2070
2131
  ].join("\n"),
2071
2132
  {
2072
2133
  from: z5.string().optional().describe("Source element ID from desktop_see"),
@@ -2276,13 +2337,17 @@ var DesktopTools = class {
2276
2337
  var mcpPort = 3e3;
2277
2338
  var globalBrowserTools = null;
2278
2339
  var desktopToolsEnabled = false;
2340
+ var globalRelayClient = null;
2341
+ function setRelayClient(client) {
2342
+ globalRelayClient = client;
2343
+ }
2279
2344
  function createMcpServer() {
2280
2345
  const server = new McpServer({
2281
2346
  name: "junis",
2282
2347
  version: "0.1.0"
2283
2348
  });
2284
2349
  const fsTools = new FilesystemTools();
2285
- fsTools.register(server);
2350
+ fsTools.register(server, globalRelayClient ?? void 0);
2286
2351
  if (globalBrowserTools) {
2287
2352
  globalBrowserTools.register(server);
2288
2353
  }
@@ -2573,6 +2638,7 @@ async function handleMCPRequest(id, payload) {
2573
2638
  export {
2574
2639
  checkPermission,
2575
2640
  handleMCPRequest,
2641
+ setRelayClient,
2576
2642
  startMCPServer,
2577
2643
  toolPermissions
2578
2644
  };
@@ -5,7 +5,7 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
5
5
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
6
6
 
7
7
  // src/tools/filesystem.ts
8
- import { exec, execFile } from "child_process";
8
+ import { exec, execFile, spawn } from "child_process";
9
9
  import { promisify } from "util";
10
10
  import fs from "fs/promises";
11
11
  import path from "path";
@@ -76,7 +76,9 @@ function checkPermission(toolName) {
76
76
  var execAsync = promisify(exec);
77
77
  var execFileAsync = promisify(execFile);
78
78
  var FilesystemTools = class {
79
- register(server) {
79
+ relayClient;
80
+ register(server, relayClient) {
81
+ this.relayClient = relayClient;
80
82
  server.tool(
81
83
  "execute_command",
82
84
  [
@@ -87,9 +89,9 @@ var FilesystemTools = class {
87
89
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
88
90
  "- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
89
91
  "- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
90
- "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size for click/move/drag.",
92
+ "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
91
93
  " Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
92
- " This is the most reliable way to get pixel-accurate coordinates on macOS.",
94
+ " This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
93
95
  "",
94
96
  "BEHAVIOR:",
95
97
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
@@ -112,26 +114,84 @@ var FilesystemTools = class {
112
114
  exec(command);
113
115
  return { content: [{ type: "text", text: "Background execution started" }] };
114
116
  }
115
- try {
116
- const { stdout, stderr } = await execAsync(command, {
117
+ const requestId = this.relayClient?.currentRequestId ?? null;
118
+ if (requestId) {
119
+ this.relayClient?.sendProgress(requestId, `$ ${command}`);
120
+ }
121
+ return new Promise((resolve) => {
122
+ const child = spawn("sh", ["-c", command], {
117
123
  timeout: timeout_ms
118
124
  });
119
- return {
120
- content: [{ type: "text", text: stdout || stderr || "(no output)" }]
121
- };
122
- } catch (err) {
123
- const error = err;
124
- return {
125
- content: [
126
- {
127
- type: "text",
128
- text: `Error (exit ${error.code ?? "?"}): ${error.message}
129
- ${error.stderr ?? ""}`
125
+ let stdoutBuf = "";
126
+ let stderrBuf = "";
127
+ let stdoutLineBuffer = "";
128
+ let stderrLineBuffer = "";
129
+ const flushLines = (buffer, newChunk) => {
130
+ const combined = buffer + newChunk;
131
+ const lines = combined.split("\n");
132
+ const incomplete = lines.pop() ?? "";
133
+ for (const line of lines) {
134
+ if (requestId) {
135
+ this.relayClient?.sendProgress(requestId, line);
130
136
  }
131
- ],
132
- isError: true
137
+ }
138
+ return incomplete;
133
139
  };
134
- }
140
+ child.stdout.on("data", (chunk) => {
141
+ const text = chunk.toString();
142
+ stdoutBuf += text;
143
+ stdoutLineBuffer = flushLines(stdoutLineBuffer, text);
144
+ });
145
+ child.stderr.on("data", (chunk) => {
146
+ const text = chunk.toString();
147
+ stderrBuf += text;
148
+ stderrLineBuffer = flushLines(stderrLineBuffer, text);
149
+ });
150
+ child.on("close", (code, signal) => {
151
+ if (stdoutLineBuffer && requestId) {
152
+ this.relayClient?.sendProgress(requestId, stdoutLineBuffer);
153
+ }
154
+ if (stderrLineBuffer && requestId) {
155
+ this.relayClient?.sendProgress(requestId, stderrLineBuffer);
156
+ }
157
+ if (signal) {
158
+ resolve({
159
+ content: [
160
+ {
161
+ type: "text",
162
+ text: `Killed by signal ${signal} (timeout: ${timeout_ms}ms)`
163
+ }
164
+ ],
165
+ isError: true
166
+ });
167
+ } else if (code !== 0 && code !== null) {
168
+ resolve({
169
+ content: [
170
+ {
171
+ type: "text",
172
+ text: `Error (exit ${code}): ${stderrBuf || stdoutBuf || "(no output)"}`
173
+ }
174
+ ],
175
+ isError: true
176
+ });
177
+ } else {
178
+ resolve({
179
+ content: [{ type: "text", text: stdoutBuf || stderrBuf || "(no output)" }]
180
+ });
181
+ }
182
+ });
183
+ child.on("error", (err) => {
184
+ resolve({
185
+ content: [
186
+ {
187
+ type: "text",
188
+ text: `Error: ${err.message}`
189
+ }
190
+ ],
191
+ isError: true
192
+ });
193
+ });
194
+ });
135
195
  }
136
196
  );
137
197
  server.tool(
@@ -1299,14 +1359,14 @@ Cause: ${e.message}${hint}` }],
1299
1359
  },
1300
1360
  async ({ text }) => {
1301
1361
  const p = platform();
1302
- const { spawn } = await import("child_process");
1362
+ const { spawn: spawn2 } = await import("child_process");
1303
1363
  const cmd = {
1304
1364
  mac: { bin: "pbcopy", args: [] },
1305
1365
  win: { bin: "powershell", args: ["-Command", "$input | Set-Clipboard"] },
1306
1366
  linux: { bin: "xclip", args: ["-selection", "clipboard"] }
1307
1367
  }[p];
1308
1368
  await new Promise((resolve, reject) => {
1309
- const proc = spawn(cmd.bin, cmd.args, { stdio: ["pipe", "ignore", "ignore"] });
1369
+ const proc = spawn2(cmd.bin, cmd.args, { stdio: ["pipe", "ignore", "ignore"] });
1310
1370
  proc.on("error", reject);
1311
1371
  proc.on("close", (code) => code === 0 ? resolve() : reject(new Error(`${cmd.bin} exited ${code}`)));
1312
1372
  proc.stdin.end(text);
@@ -1338,9 +1398,9 @@ Cause: ${e.message}${hint}` }],
1338
1398
  return { content: [{ type: "text", text: "Already recording." }] };
1339
1399
  }
1340
1400
  const tmpPath = output_path ?? `/tmp/junis_record_${Date.now()}.mp4`;
1341
- const { spawn } = await import("child_process");
1401
+ const { spawn: spawn2 } = await import("child_process");
1342
1402
  const cmd = p === "mac" ? ["screencapture", ["-v", tmpPath]] : ["ffmpeg", ["-f", p === "win" ? "gdigrab" : "x11grab", "-i", p === "win" ? "desktop" : ":0.0", tmpPath]];
1343
- const child = spawn(cmd[0], cmd[1], { detached: true, stdio: "ignore" });
1403
+ const child = spawn2(cmd[0], cmd[1], { detached: true, stdio: "ignore" });
1344
1404
  child.unref();
1345
1405
  screenRecordPid = child.pid ?? null;
1346
1406
  return { content: [{ type: "text", text: `Recording started. Output path: ${tmpPath} (PID: ${screenRecordPid})` }] };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "junis",
3
- "version": "0.4.2",
3
+ "version": "0.4.4",
4
4
  "description": "One-line device control for AI agents",
5
5
  "type": "module",
6
6
  "bin": {