junis 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -222,6 +222,14 @@ var RelayClient = class {
222
222
  heartbeatTimer = null;
223
223
  destroyed = false;
224
224
  lastPongTime = 0;
225
+ _currentRequestId = null;
226
+ _currentSessionId = null;
227
+ get currentRequestId() {
228
+ return this._currentRequestId;
229
+ }
230
+ get currentSessionId() {
231
+ return this._currentSessionId;
232
+ }
225
233
  // upload_url_response 대기용 pending 맵
226
234
  pendingUploadRequests = /* @__PURE__ */ new Map();
227
235
  // signed_url_response 대기용 pending 맵
@@ -274,11 +282,17 @@ var RelayClient = class {
274
282
  return;
275
283
  }
276
284
  if (msg.type === "mcp_request") {
285
+ this._currentRequestId = msg.id;
286
+ this._currentSessionId = msg.session_id || null;
277
287
  try {
278
288
  let result = await this.onMCPRequest(msg.id, msg.payload);
289
+ this._currentRequestId = null;
290
+ this._currentSessionId = null;
279
291
  result = await this.processLargeFiles(result);
280
292
  this.send({ type: "mcp_response", id: msg.id, payload: result });
281
293
  } catch (err) {
294
+ this._currentRequestId = null;
295
+ this._currentSessionId = null;
282
296
  this.send({
283
297
  type: "mcp_response",
284
298
  id: msg.id,
@@ -331,6 +345,14 @@ var RelayClient = class {
331
345
  this.ws.send(JSON.stringify(data));
332
346
  }
333
347
  }
348
+ sendProgress(requestId, line) {
349
+ this.send({
350
+ type: "mcp_stdout",
351
+ request_id: requestId,
352
+ session_id: this._currentSessionId || "",
353
+ line
354
+ });
355
+ }
334
356
  /**
335
357
  * 서버에 presigned PUT URL 요청.
336
358
  * WebSocket으로 upload_url_request 전송 → upload_url_response 대기.
@@ -488,7 +510,7 @@ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/
488
510
  import { createServer } from "http";
489
511
 
490
512
  // src/tools/filesystem.ts
491
- import { exec, execFile } from "child_process";
513
+ import { exec, execFile, spawn } from "child_process";
492
514
  import { promisify } from "util";
493
515
  import fs2 from "fs/promises";
494
516
  import path2 from "path";
@@ -559,7 +581,9 @@ function checkPermission(toolName) {
559
581
  var execAsync = promisify(exec);
560
582
  var execFileAsync = promisify(execFile);
561
583
  var FilesystemTools = class {
562
- register(server) {
584
+ relayClient;
585
+ register(server, relayClient) {
586
+ this.relayClient = relayClient;
563
587
  server.tool(
564
588
  "execute_command",
565
589
  [
@@ -570,9 +594,9 @@ var FilesystemTools = class {
570
594
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
571
595
  "- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
572
596
  "- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
573
- "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size for click/move/drag.",
597
+ "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
574
598
  " Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
575
- " This is the most reliable way to get pixel-accurate coordinates on macOS.",
599
+ " This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
576
600
  "",
577
601
  "BEHAVIOR:",
578
602
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
@@ -595,26 +619,84 @@ var FilesystemTools = class {
595
619
  exec(command);
596
620
  return { content: [{ type: "text", text: "Background execution started" }] };
597
621
  }
598
- try {
599
- const { stdout, stderr } = await execAsync(command, {
622
+ const requestId = this.relayClient?.currentRequestId ?? null;
623
+ if (requestId) {
624
+ this.relayClient?.sendProgress(requestId, `$ ${command}`);
625
+ }
626
+ return new Promise((resolve) => {
627
+ const child = spawn("sh", ["-c", command], {
600
628
  timeout: timeout_ms
601
629
  });
602
- return {
603
- content: [{ type: "text", text: stdout || stderr || "(no output)" }]
604
- };
605
- } catch (err) {
606
- const error = err;
607
- return {
608
- content: [
609
- {
610
- type: "text",
611
- text: `Error (exit ${error.code ?? "?"}): ${error.message}
612
- ${error.stderr ?? ""}`
630
+ let stdoutBuf = "";
631
+ let stderrBuf = "";
632
+ let stdoutLineBuffer = "";
633
+ let stderrLineBuffer = "";
634
+ const flushLines = (buffer, newChunk) => {
635
+ const combined = buffer + newChunk;
636
+ const lines = combined.split("\n");
637
+ const incomplete = lines.pop() ?? "";
638
+ for (const line of lines) {
639
+ if (requestId) {
640
+ this.relayClient?.sendProgress(requestId, line);
613
641
  }
614
- ],
615
- isError: true
642
+ }
643
+ return incomplete;
616
644
  };
617
- }
645
+ child.stdout.on("data", (chunk) => {
646
+ const text = chunk.toString();
647
+ stdoutBuf += text;
648
+ stdoutLineBuffer = flushLines(stdoutLineBuffer, text);
649
+ });
650
+ child.stderr.on("data", (chunk) => {
651
+ const text = chunk.toString();
652
+ stderrBuf += text;
653
+ stderrLineBuffer = flushLines(stderrLineBuffer, text);
654
+ });
655
+ child.on("close", (code, signal) => {
656
+ if (stdoutLineBuffer && requestId) {
657
+ this.relayClient?.sendProgress(requestId, stdoutLineBuffer);
658
+ }
659
+ if (stderrLineBuffer && requestId) {
660
+ this.relayClient?.sendProgress(requestId, stderrLineBuffer);
661
+ }
662
+ if (signal) {
663
+ resolve({
664
+ content: [
665
+ {
666
+ type: "text",
667
+ text: `Killed by signal ${signal} (timeout: ${timeout_ms}ms)`
668
+ }
669
+ ],
670
+ isError: true
671
+ });
672
+ } else if (code !== 0 && code !== null) {
673
+ resolve({
674
+ content: [
675
+ {
676
+ type: "text",
677
+ text: `Error (exit ${code}): ${stderrBuf || stdoutBuf || "(no output)"}`
678
+ }
679
+ ],
680
+ isError: true
681
+ });
682
+ } else {
683
+ resolve({
684
+ content: [{ type: "text", text: stdoutBuf || stderrBuf || "(no output)" }]
685
+ });
686
+ }
687
+ });
688
+ child.on("error", (err) => {
689
+ resolve({
690
+ content: [
691
+ {
692
+ type: "text",
693
+ text: `Error: ${err.message}`
694
+ }
695
+ ],
696
+ isError: true
697
+ });
698
+ });
699
+ });
618
700
  }
619
701
  );
620
702
  server.tool(
@@ -1782,14 +1864,14 @@ Cause: ${e.message}${hint}` }],
1782
1864
  },
1783
1865
  async ({ text }) => {
1784
1866
  const p = platform();
1785
- const { spawn: spawn2 } = await import("child_process");
1867
+ const { spawn: spawn3 } = await import("child_process");
1786
1868
  const cmd = {
1787
1869
  mac: { bin: "pbcopy", args: [] },
1788
1870
  win: { bin: "powershell", args: ["-Command", "$input | Set-Clipboard"] },
1789
1871
  linux: { bin: "xclip", args: ["-selection", "clipboard"] }
1790
1872
  }[p];
1791
1873
  await new Promise((resolve, reject) => {
1792
- const proc = spawn2(cmd.bin, cmd.args, { stdio: ["pipe", "ignore", "ignore"] });
1874
+ const proc = spawn3(cmd.bin, cmd.args, { stdio: ["pipe", "ignore", "ignore"] });
1793
1875
  proc.on("error", reject);
1794
1876
  proc.on("close", (code) => code === 0 ? resolve() : reject(new Error(`${cmd.bin} exited ${code}`)));
1795
1877
  proc.stdin.end(text);
@@ -1821,9 +1903,9 @@ Cause: ${e.message}${hint}` }],
1821
1903
  return { content: [{ type: "text", text: "Already recording." }] };
1822
1904
  }
1823
1905
  const tmpPath = output_path ?? `/tmp/junis_record_${Date.now()}.mp4`;
1824
- const { spawn: spawn2 } = await import("child_process");
1906
+ const { spawn: spawn3 } = await import("child_process");
1825
1907
  const cmd = p === "mac" ? ["screencapture", ["-v", tmpPath]] : ["ffmpeg", ["-f", p === "win" ? "gdigrab" : "x11grab", "-i", p === "win" ? "desktop" : ":0.0", tmpPath]];
1826
- const child = spawn2(cmd[0], cmd[1], { detached: true, stdio: "ignore" });
1908
+ const child = spawn3(cmd[0], cmd[1], { detached: true, stdio: "ignore" });
1827
1909
  child.unref();
1828
1910
  screenRecordPid = child.pid ?? null;
1829
1911
  return { content: [{ type: "text", text: `Recording started. Output path: ${tmpPath} (PID: ${screenRecordPid})` }] };
@@ -2310,9 +2392,10 @@ var DesktopTools = class {
2310
2392
  server.tool(
2311
2393
  "desktop_screenshot",
2312
2394
  [
2313
- "Take a screenshot. Returns base64 image at logical resolution (1:1 with click coordinate system).",
2314
- "Use for visual context, verify UI state, or locate elements when query-based methods fail.",
2315
- "For clicking, prefer desktop_click(query, app) which auto-resolves coords. If that fails, use osascript or visual estimation from this image as fallback."
2395
+ "Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
2396
+ "Use for visual context or to verify UI state ONLY.",
2397
+ "WARNING: NEVER estimate click coordinates from screenshot images \u2014 visual estimation causes misclicks. Use desktop_click(query, app) for auto-resolution, or execute_command + osascript for exact coords.",
2398
+ "Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
2316
2399
  ].join("\n"),
2317
2400
  {
2318
2401
  app: z5.string().optional().describe("Capture specific app window"),
@@ -2352,7 +2435,7 @@ var DesktopTools = class {
2352
2435
  "Click a UI element by text label (query), AX role (role), coordinates (coords), or element ID (on).",
2353
2436
  "BEST: Use query with app \u2014 auto-resolves coords via accessibility API (L1-L3 depth), then falls back to full tree search for deeply nested elements. No desktop_see needed.",
2354
2437
  "ROLE: Use role (e.g. 'AXTextArea', 'AXTextField') to find elements without visible text labels (input fields, text areas). Searches L1-L3 depth.",
2355
- "GOOD: Use coords 'x,y' \u2014 best from osascript center calculation (position + size/2), or from screenshot if osascript is unavailable.",
2438
+ "GOOD: Use coords 'x,y' \u2014 MUST be from osascript center calculation (position + size/2). NEVER estimate from screenshots.",
2356
2439
  "CAUTION: 'on' (element ID) has known offset bug \u2014 use query, role, or coords instead.",
2357
2440
  "If not found: try desktop_menu for menu items, desktop_hotkey for shortcuts.",
2358
2441
  "NOTE: Web page elements (inside browser) are invisible \u2014 use browser_* tools instead."
@@ -2362,7 +2445,7 @@ var DesktopTools = class {
2362
2445
  role: z5.string().optional().describe("AX role to find (e.g. 'AXTextArea', 'AXTextField', 'AXButton'). Use when element has no text label."),
2363
2446
  on: z5.string().optional().describe("Element ID from desktop_see (e.g. 'B1', 'T2')"),
2364
2447
  coords: z5.string().optional().describe("Screen coordinates 'x,y' (e.g. '500,300')"),
2365
- app: z5.string().optional().describe("App name (always specify for faster resolution)"),
2448
+ app: z5.string().optional().describe("App name"),
2366
2449
  snapshot: z5.string().optional().describe("Snapshot ID from desktop_see"),
2367
2450
  doubleClick: z5.boolean().optional().default(false).describe("Double-click"),
2368
2451
  rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)"),
@@ -2513,7 +2596,7 @@ var DesktopTools = class {
2513
2596
  "desktop_move",
2514
2597
  [
2515
2598
  "Move mouse cursor without clicking. Use before scroll or to hover.",
2516
- "For coords, prefer osascript (position + size/2) or use 'to' with text label for auto-resolution."
2599
+ "Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
2517
2600
  ].join("\n"),
2518
2601
  {
2519
2602
  coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
@@ -2550,7 +2633,7 @@ var DesktopTools = class {
2550
2633
  [
2551
2634
  "Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
2552
2635
  "Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
2553
- "For coords, prefer osascript (position + size/2). If unavailable, screenshot-based estimation is acceptable."
2636
+ "Before using fromCoords/toCoords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
2554
2637
  ].join("\n"),
2555
2638
  {
2556
2639
  from: z5.string().optional().describe("Source element ID from desktop_see"),
@@ -2760,13 +2843,17 @@ var DesktopTools = class {
2760
2843
  var mcpPort = 3e3;
2761
2844
  var globalBrowserTools = null;
2762
2845
  var desktopToolsEnabled = false;
2846
+ var globalRelayClient = null;
2847
+ function setRelayClient(client) {
2848
+ globalRelayClient = client;
2849
+ }
2763
2850
  function createMcpServer() {
2764
2851
  const server = new McpServer({
2765
2852
  name: "junis",
2766
2853
  version: "0.1.0"
2767
2854
  });
2768
2855
  const fsTools = new FilesystemTools();
2769
- fsTools.register(server);
2856
+ fsTools.register(server, globalRelayClient ?? void 0);
2770
2857
  if (globalBrowserTools) {
2771
2858
  globalBrowserTools.register(server);
2772
2859
  }
@@ -3089,7 +3176,7 @@ import { createRequire } from "module";
3089
3176
  import fs6 from "fs";
3090
3177
  import path3 from "path";
3091
3178
  import os2 from "os";
3092
- import { execSync, spawn } from "child_process";
3179
+ import { execSync, spawn as spawn2 } from "child_process";
3093
3180
  var CONFIG_DIR2 = path3.join(os2.homedir(), ".junis");
3094
3181
  var PID_FILE = path3.join(CONFIG_DIR2, "junis.pid");
3095
3182
  var LOG_DIR = path3.join(CONFIG_DIR2, "logs");
@@ -3127,7 +3214,7 @@ function startDaemon(port) {
3127
3214
  const scriptPath = process.argv[1];
3128
3215
  const out = fs6.openSync(LOG_FILE, "a");
3129
3216
  const err = fs6.openSync(LOG_FILE, "a");
3130
- const child = spawn(nodePath, [scriptPath, "start", "--daemon", "--port", String(port)], {
3217
+ const child = spawn2(nodePath, [scriptPath, "start", "--daemon", "--port", String(port)], {
3131
3218
  detached: true,
3132
3219
  stdio: ["ignore", out, err],
3133
3220
  env: { ...process.env }
@@ -3366,6 +3453,7 @@ async function runForeground(config, port) {
3366
3453
  process.exit(1);
3367
3454
  }
3368
3455
  });
3456
+ setRelayClient(relay);
3369
3457
  await relay.connect();
3370
3458
  const webUrl = process.env.JUNIS_WEB_URL ?? "https://junis.ai";
3371
3459
  console.log(" \u25C9 Relay connected");
@@ -3584,6 +3672,7 @@ program.command("start", { isDefault: true }).description("Start Junis agent con
3584
3672
  process.exit(1);
3585
3673
  }
3586
3674
  });
3675
+ setRelayClient(relay);
3587
3676
  await relay.connect();
3588
3677
  console.log("[junis daemon] relay connected");
3589
3678
  return;
@@ -4,7 +4,7 @@ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/
4
4
  import { createServer } from "http";
5
5
 
6
6
  // src/tools/filesystem.ts
7
- import { exec, execFile } from "child_process";
7
+ import { exec, execFile, spawn } from "child_process";
8
8
  import { promisify } from "util";
9
9
  import fs from "fs/promises";
10
10
  import path from "path";
@@ -75,7 +75,9 @@ function checkPermission(toolName) {
75
75
  var execAsync = promisify(exec);
76
76
  var execFileAsync = promisify(execFile);
77
77
  var FilesystemTools = class {
78
- register(server) {
78
+ relayClient;
79
+ register(server, relayClient) {
80
+ this.relayClient = relayClient;
79
81
  server.tool(
80
82
  "execute_command",
81
83
  [
@@ -86,9 +88,9 @@ var FilesystemTools = class {
86
88
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
87
89
  "- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
88
90
  "- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
89
- "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size for click/move/drag.",
91
+ "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
90
92
  " Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
91
- " This is the most reliable way to get pixel-accurate coordinates on macOS.",
93
+ " This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
92
94
  "",
93
95
  "BEHAVIOR:",
94
96
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
@@ -111,26 +113,84 @@ var FilesystemTools = class {
111
113
  exec(command);
112
114
  return { content: [{ type: "text", text: "Background execution started" }] };
113
115
  }
114
- try {
115
- const { stdout, stderr } = await execAsync(command, {
116
+ const requestId = this.relayClient?.currentRequestId ?? null;
117
+ if (requestId) {
118
+ this.relayClient?.sendProgress(requestId, `$ ${command}`);
119
+ }
120
+ return new Promise((resolve) => {
121
+ const child = spawn("sh", ["-c", command], {
116
122
  timeout: timeout_ms
117
123
  });
118
- return {
119
- content: [{ type: "text", text: stdout || stderr || "(no output)" }]
120
- };
121
- } catch (err) {
122
- const error = err;
123
- return {
124
- content: [
125
- {
126
- type: "text",
127
- text: `Error (exit ${error.code ?? "?"}): ${error.message}
128
- ${error.stderr ?? ""}`
124
+ let stdoutBuf = "";
125
+ let stderrBuf = "";
126
+ let stdoutLineBuffer = "";
127
+ let stderrLineBuffer = "";
128
+ const flushLines = (buffer, newChunk) => {
129
+ const combined = buffer + newChunk;
130
+ const lines = combined.split("\n");
131
+ const incomplete = lines.pop() ?? "";
132
+ for (const line of lines) {
133
+ if (requestId) {
134
+ this.relayClient?.sendProgress(requestId, line);
129
135
  }
130
- ],
131
- isError: true
136
+ }
137
+ return incomplete;
132
138
  };
133
- }
139
+ child.stdout.on("data", (chunk) => {
140
+ const text = chunk.toString();
141
+ stdoutBuf += text;
142
+ stdoutLineBuffer = flushLines(stdoutLineBuffer, text);
143
+ });
144
+ child.stderr.on("data", (chunk) => {
145
+ const text = chunk.toString();
146
+ stderrBuf += text;
147
+ stderrLineBuffer = flushLines(stderrLineBuffer, text);
148
+ });
149
+ child.on("close", (code, signal) => {
150
+ if (stdoutLineBuffer && requestId) {
151
+ this.relayClient?.sendProgress(requestId, stdoutLineBuffer);
152
+ }
153
+ if (stderrLineBuffer && requestId) {
154
+ this.relayClient?.sendProgress(requestId, stderrLineBuffer);
155
+ }
156
+ if (signal) {
157
+ resolve({
158
+ content: [
159
+ {
160
+ type: "text",
161
+ text: `Killed by signal ${signal} (timeout: ${timeout_ms}ms)`
162
+ }
163
+ ],
164
+ isError: true
165
+ });
166
+ } else if (code !== 0 && code !== null) {
167
+ resolve({
168
+ content: [
169
+ {
170
+ type: "text",
171
+ text: `Error (exit ${code}): ${stderrBuf || stdoutBuf || "(no output)"}`
172
+ }
173
+ ],
174
+ isError: true
175
+ });
176
+ } else {
177
+ resolve({
178
+ content: [{ type: "text", text: stdoutBuf || stderrBuf || "(no output)" }]
179
+ });
180
+ }
181
+ });
182
+ child.on("error", (err) => {
183
+ resolve({
184
+ content: [
185
+ {
186
+ type: "text",
187
+ text: `Error: ${err.message}`
188
+ }
189
+ ],
190
+ isError: true
191
+ });
192
+ });
193
+ });
134
194
  }
135
195
  );
136
196
  server.tool(
@@ -1298,14 +1358,14 @@ Cause: ${e.message}${hint}` }],
1298
1358
  },
1299
1359
  async ({ text }) => {
1300
1360
  const p = platform();
1301
- const { spawn } = await import("child_process");
1361
+ const { spawn: spawn2 } = await import("child_process");
1302
1362
  const cmd = {
1303
1363
  mac: { bin: "pbcopy", args: [] },
1304
1364
  win: { bin: "powershell", args: ["-Command", "$input | Set-Clipboard"] },
1305
1365
  linux: { bin: "xclip", args: ["-selection", "clipboard"] }
1306
1366
  }[p];
1307
1367
  await new Promise((resolve, reject) => {
1308
- const proc = spawn(cmd.bin, cmd.args, { stdio: ["pipe", "ignore", "ignore"] });
1368
+ const proc = spawn2(cmd.bin, cmd.args, { stdio: ["pipe", "ignore", "ignore"] });
1309
1369
  proc.on("error", reject);
1310
1370
  proc.on("close", (code) => code === 0 ? resolve() : reject(new Error(`${cmd.bin} exited ${code}`)));
1311
1371
  proc.stdin.end(text);
@@ -1337,9 +1397,9 @@ Cause: ${e.message}${hint}` }],
1337
1397
  return { content: [{ type: "text", text: "Already recording." }] };
1338
1398
  }
1339
1399
  const tmpPath = output_path ?? `/tmp/junis_record_${Date.now()}.mp4`;
1340
- const { spawn } = await import("child_process");
1400
+ const { spawn: spawn2 } = await import("child_process");
1341
1401
  const cmd = p === "mac" ? ["screencapture", ["-v", tmpPath]] : ["ffmpeg", ["-f", p === "win" ? "gdigrab" : "x11grab", "-i", p === "win" ? "desktop" : ":0.0", tmpPath]];
1342
- const child = spawn(cmd[0], cmd[1], { detached: true, stdio: "ignore" });
1402
+ const child = spawn2(cmd[0], cmd[1], { detached: true, stdio: "ignore" });
1343
1403
  child.unref();
1344
1404
  screenRecordPid = child.pid ?? null;
1345
1405
  return { content: [{ type: "text", text: `Recording started. Output path: ${tmpPath} (PID: ${screenRecordPid})` }] };
@@ -1826,9 +1886,10 @@ var DesktopTools = class {
1826
1886
  server.tool(
1827
1887
  "desktop_screenshot",
1828
1888
  [
1829
- "Take a screenshot. Returns base64 image at logical resolution (1:1 with click coordinate system).",
1830
- "Use for visual context, verify UI state, or locate elements when query-based methods fail.",
1831
- "For clicking, prefer desktop_click(query, app) which auto-resolves coords. If that fails, use osascript or visual estimation from this image as fallback."
1889
+ "Take a screenshot. Returns base64 image at logical resolution (matches click coordinate system 1:1).",
1890
+ "Use for visual context or to verify UI state ONLY.",
1891
+ "WARNING: NEVER estimate click coordinates from screenshot images \u2014 visual estimation causes misclicks. Use desktop_click(query, app) for auto-resolution, or execute_command + osascript for exact coords.",
1892
+ "Do not use visual coordinates from this screenshot directly. Use execute_command + osascript to verify exact element position first."
1832
1893
  ].join("\n"),
1833
1894
  {
1834
1895
  app: z5.string().optional().describe("Capture specific app window"),
@@ -1868,7 +1929,7 @@ var DesktopTools = class {
1868
1929
  "Click a UI element by text label (query), AX role (role), coordinates (coords), or element ID (on).",
1869
1930
  "BEST: Use query with app \u2014 auto-resolves coords via accessibility API (L1-L3 depth), then falls back to full tree search for deeply nested elements. No desktop_see needed.",
1870
1931
  "ROLE: Use role (e.g. 'AXTextArea', 'AXTextField') to find elements without visible text labels (input fields, text areas). Searches L1-L3 depth.",
1871
- "GOOD: Use coords 'x,y' \u2014 best from osascript center calculation (position + size/2), or from screenshot if osascript is unavailable.",
1932
+ "GOOD: Use coords 'x,y' \u2014 MUST be from osascript center calculation (position + size/2). NEVER estimate from screenshots.",
1872
1933
  "CAUTION: 'on' (element ID) has known offset bug \u2014 use query, role, or coords instead.",
1873
1934
  "If not found: try desktop_menu for menu items, desktop_hotkey for shortcuts.",
1874
1935
  "NOTE: Web page elements (inside browser) are invisible \u2014 use browser_* tools instead."
@@ -1878,7 +1939,7 @@ var DesktopTools = class {
1878
1939
  role: z5.string().optional().describe("AX role to find (e.g. 'AXTextArea', 'AXTextField', 'AXButton'). Use when element has no text label."),
1879
1940
  on: z5.string().optional().describe("Element ID from desktop_see (e.g. 'B1', 'T2')"),
1880
1941
  coords: z5.string().optional().describe("Screen coordinates 'x,y' (e.g. '500,300')"),
1881
- app: z5.string().optional().describe("App name (always specify for faster resolution)"),
1942
+ app: z5.string().optional().describe("App name"),
1882
1943
  snapshot: z5.string().optional().describe("Snapshot ID from desktop_see"),
1883
1944
  doubleClick: z5.boolean().optional().default(false).describe("Double-click"),
1884
1945
  rightClick: z5.boolean().optional().default(false).describe("Right-click (context menu)"),
@@ -2029,7 +2090,7 @@ var DesktopTools = class {
2029
2090
  "desktop_move",
2030
2091
  [
2031
2092
  "Move mouse cursor without clicking. Use before scroll or to hover.",
2032
- "For coords, prefer osascript (position + size/2) or use 'to' with text label for auto-resolution."
2093
+ "Before using coords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
2033
2094
  ].join("\n"),
2034
2095
  {
2035
2096
  coords: z5.string().optional().describe("Screen coordinates 'x,y'"),
@@ -2066,7 +2127,7 @@ var DesktopTools = class {
2066
2127
  [
2067
2128
  "Drag and drop between elements or coordinates. Supports cross-app drag (e.g. file to Trash).",
2068
2129
  "Prefer fromCoords/toCoords for accuracy. Element IDs (from/to) have known offset bug.",
2069
- "For coords, prefer osascript (position + size/2). If unavailable, screenshot-based estimation is acceptable."
2130
+ "Before using fromCoords/toCoords, always query exact position via execute_command + osascript. Calculate center = position + size/2. Never estimate from screenshots."
2070
2131
  ].join("\n"),
2071
2132
  {
2072
2133
  from: z5.string().optional().describe("Source element ID from desktop_see"),
@@ -2276,13 +2337,17 @@ var DesktopTools = class {
2276
2337
  var mcpPort = 3e3;
2277
2338
  var globalBrowserTools = null;
2278
2339
  var desktopToolsEnabled = false;
2340
+ var globalRelayClient = null;
2341
+ function setRelayClient(client) {
2342
+ globalRelayClient = client;
2343
+ }
2279
2344
  function createMcpServer() {
2280
2345
  const server = new McpServer({
2281
2346
  name: "junis",
2282
2347
  version: "0.1.0"
2283
2348
  });
2284
2349
  const fsTools = new FilesystemTools();
2285
- fsTools.register(server);
2350
+ fsTools.register(server, globalRelayClient ?? void 0);
2286
2351
  if (globalBrowserTools) {
2287
2352
  globalBrowserTools.register(server);
2288
2353
  }
@@ -2573,6 +2638,7 @@ async function handleMCPRequest(id, payload) {
2573
2638
  export {
2574
2639
  checkPermission,
2575
2640
  handleMCPRequest,
2641
+ setRelayClient,
2576
2642
  startMCPServer,
2577
2643
  toolPermissions
2578
2644
  };
@@ -5,7 +5,7 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
5
5
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
6
6
 
7
7
  // src/tools/filesystem.ts
8
- import { exec, execFile } from "child_process";
8
+ import { exec, execFile, spawn } from "child_process";
9
9
  import { promisify } from "util";
10
10
  import fs from "fs/promises";
11
11
  import path from "path";
@@ -76,7 +76,9 @@ function checkPermission(toolName) {
76
76
  var execAsync = promisify(exec);
77
77
  var execFileAsync = promisify(execFile);
78
78
  var FilesystemTools = class {
79
- register(server) {
79
+ relayClient;
80
+ register(server, relayClient) {
81
+ this.relayClient = relayClient;
80
82
  server.tool(
81
83
  "execute_command",
82
84
  [
@@ -87,9 +89,9 @@ var FilesystemTools = class {
87
89
  "- For reading files prefer read_file, for editing prefer edit_block, for searching prefer search_code.",
88
90
  "- NOT for macOS app GUI interaction. Use desktop_* tools instead: desktop_open_app, desktop_see, desktop_click, desktop_type, desktop_paste, desktop_hotkey, desktop_scroll, desktop_move, desktop_menu, desktop_screenshot.",
89
91
  "- Exception: permission fix commands (swift -e, peekaboo permissions, open 'x-apple.systempreferences:...').",
90
- "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size for click/move/drag.",
92
+ "- Exception: osascript coordinate queries \u2014 Use osascript via this tool to query exact UI element position and size before any click/move/drag operation.",
91
93
  " Pattern: osascript + System Events \u2192 position + size \u2192 center = (x + w/2, y + h/2) \u2192 desktop_click(coords).",
92
- " This is the most reliable way to get pixel-accurate coordinates on macOS.",
94
+ " This is the ONLY reliable way to get pixel-accurate coordinates on macOS. NEVER estimate coords from screenshots.",
93
95
  "",
94
96
  "BEHAVIOR:",
95
97
  "- Execute commands directly when the user requests them. Do not ask for confirmation \u2014 the user has already decided.",
@@ -112,26 +114,84 @@ var FilesystemTools = class {
112
114
  exec(command);
113
115
  return { content: [{ type: "text", text: "Background execution started" }] };
114
116
  }
115
- try {
116
- const { stdout, stderr } = await execAsync(command, {
117
+ const requestId = this.relayClient?.currentRequestId ?? null;
118
+ if (requestId) {
119
+ this.relayClient?.sendProgress(requestId, `$ ${command}`);
120
+ }
121
+ return new Promise((resolve) => {
122
+ const child = spawn("sh", ["-c", command], {
117
123
  timeout: timeout_ms
118
124
  });
119
- return {
120
- content: [{ type: "text", text: stdout || stderr || "(no output)" }]
121
- };
122
- } catch (err) {
123
- const error = err;
124
- return {
125
- content: [
126
- {
127
- type: "text",
128
- text: `Error (exit ${error.code ?? "?"}): ${error.message}
129
- ${error.stderr ?? ""}`
125
+ let stdoutBuf = "";
126
+ let stderrBuf = "";
127
+ let stdoutLineBuffer = "";
128
+ let stderrLineBuffer = "";
129
+ const flushLines = (buffer, newChunk) => {
130
+ const combined = buffer + newChunk;
131
+ const lines = combined.split("\n");
132
+ const incomplete = lines.pop() ?? "";
133
+ for (const line of lines) {
134
+ if (requestId) {
135
+ this.relayClient?.sendProgress(requestId, line);
130
136
  }
131
- ],
132
- isError: true
137
+ }
138
+ return incomplete;
133
139
  };
134
- }
140
+ child.stdout.on("data", (chunk) => {
141
+ const text = chunk.toString();
142
+ stdoutBuf += text;
143
+ stdoutLineBuffer = flushLines(stdoutLineBuffer, text);
144
+ });
145
+ child.stderr.on("data", (chunk) => {
146
+ const text = chunk.toString();
147
+ stderrBuf += text;
148
+ stderrLineBuffer = flushLines(stderrLineBuffer, text);
149
+ });
150
+ child.on("close", (code, signal) => {
151
+ if (stdoutLineBuffer && requestId) {
152
+ this.relayClient?.sendProgress(requestId, stdoutLineBuffer);
153
+ }
154
+ if (stderrLineBuffer && requestId) {
155
+ this.relayClient?.sendProgress(requestId, stderrLineBuffer);
156
+ }
157
+ if (signal) {
158
+ resolve({
159
+ content: [
160
+ {
161
+ type: "text",
162
+ text: `Killed by signal ${signal} (timeout: ${timeout_ms}ms)`
163
+ }
164
+ ],
165
+ isError: true
166
+ });
167
+ } else if (code !== 0 && code !== null) {
168
+ resolve({
169
+ content: [
170
+ {
171
+ type: "text",
172
+ text: `Error (exit ${code}): ${stderrBuf || stdoutBuf || "(no output)"}`
173
+ }
174
+ ],
175
+ isError: true
176
+ });
177
+ } else {
178
+ resolve({
179
+ content: [{ type: "text", text: stdoutBuf || stderrBuf || "(no output)" }]
180
+ });
181
+ }
182
+ });
183
+ child.on("error", (err) => {
184
+ resolve({
185
+ content: [
186
+ {
187
+ type: "text",
188
+ text: `Error: ${err.message}`
189
+ }
190
+ ],
191
+ isError: true
192
+ });
193
+ });
194
+ });
135
195
  }
136
196
  );
137
197
  server.tool(
@@ -1299,14 +1359,14 @@ Cause: ${e.message}${hint}` }],
1299
1359
  },
1300
1360
  async ({ text }) => {
1301
1361
  const p = platform();
1302
- const { spawn } = await import("child_process");
1362
+ const { spawn: spawn2 } = await import("child_process");
1303
1363
  const cmd = {
1304
1364
  mac: { bin: "pbcopy", args: [] },
1305
1365
  win: { bin: "powershell", args: ["-Command", "$input | Set-Clipboard"] },
1306
1366
  linux: { bin: "xclip", args: ["-selection", "clipboard"] }
1307
1367
  }[p];
1308
1368
  await new Promise((resolve, reject) => {
1309
- const proc = spawn(cmd.bin, cmd.args, { stdio: ["pipe", "ignore", "ignore"] });
1369
+ const proc = spawn2(cmd.bin, cmd.args, { stdio: ["pipe", "ignore", "ignore"] });
1310
1370
  proc.on("error", reject);
1311
1371
  proc.on("close", (code) => code === 0 ? resolve() : reject(new Error(`${cmd.bin} exited ${code}`)));
1312
1372
  proc.stdin.end(text);
@@ -1338,9 +1398,9 @@ Cause: ${e.message}${hint}` }],
1338
1398
  return { content: [{ type: "text", text: "Already recording." }] };
1339
1399
  }
1340
1400
  const tmpPath = output_path ?? `/tmp/junis_record_${Date.now()}.mp4`;
1341
- const { spawn } = await import("child_process");
1401
+ const { spawn: spawn2 } = await import("child_process");
1342
1402
  const cmd = p === "mac" ? ["screencapture", ["-v", tmpPath]] : ["ffmpeg", ["-f", p === "win" ? "gdigrab" : "x11grab", "-i", p === "win" ? "desktop" : ":0.0", tmpPath]];
1343
- const child = spawn(cmd[0], cmd[1], { detached: true, stdio: "ignore" });
1403
+ const child = spawn2(cmd[0], cmd[1], { detached: true, stdio: "ignore" });
1344
1404
  child.unref();
1345
1405
  screenRecordPid = child.pid ?? null;
1346
1406
  return { content: [{ type: "text", text: `Recording started. Output path: ${tmpPath} (PID: ${screenRecordPid})` }] };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "junis",
3
- "version": "0.4.2",
3
+ "version": "0.4.3",
4
4
  "description": "One-line device control for AI agents",
5
5
  "type": "module",
6
6
  "bin": {