@pulso/companion 0.3.3 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +233 -30
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -27,7 +27,7 @@ if (!TOKEN) {
27
27
  var ACCESS_LEVEL = process.env.PULSO_ACCESS ?? process.argv.find((_, i, a) => a[i - 1] === "--access") ?? "sandboxed";
28
28
  var WAKE_WORD_ENABLED = process.argv.includes("--wake-word");
29
29
  var PICOVOICE_ACCESS_KEY = process.env.PICOVOICE_ACCESS_KEY ?? process.argv.find((_, i, a) => a[i - 1] === "--picovoice-key") ?? "";
30
- var WS_URL = API_URL.replace("https://", "wss://").replace("http://", "ws://") + "/ws/browser?token=" + TOKEN;
30
+ var WS_URL = API_URL.replace("https://", "wss://").replace("http://", "ws://") + "/ws/companion?token=" + TOKEN;
31
31
  var HOME = homedir();
32
32
  var RECONNECT_DELAY = 5e3;
33
33
  var SAFE_DIRS = ["Documents", "Desktop", "Downloads", "Projects", "Projetos"];
@@ -478,13 +478,68 @@ async function handleCommand(command, params) {
478
478
  case "sys_open_app": {
479
479
  const app = params.app;
480
480
  if (!app) return { success: false, error: "Missing app name" };
481
- await runShell(`open -a "${app.replace(/"/g, "")}"`);
482
- return { success: true, data: { opened: app } };
481
+ const sanitizedApp = app.replace(/"/g, "");
482
+ try {
483
+ await runShell(`open -a "${sanitizedApp}"`);
484
+ } catch (e) {
485
+ return { success: false, error: `Failed to open "${sanitizedApp}": ${e.message}`, errorCode: "APP_NOT_FOUND" };
486
+ }
487
+ let launched = false;
488
+ for (let i = 0; i < 10; i++) {
489
+ await new Promise((r) => setTimeout(r, 500));
490
+ try {
491
+ const running = await runAppleScript(
492
+ `tell application "System Events" to (name of processes) contains "${sanitizedApp}"`
493
+ );
494
+ if (running.trim() === "true") {
495
+ launched = true;
496
+ break;
497
+ }
498
+ } catch {
499
+ }
500
+ }
501
+ let windowInfo;
502
+ if (launched) {
503
+ try {
504
+ const winCheck = await runAppleScript(`
505
+ tell application "System Events"
506
+ tell process "${sanitizedApp}"
507
+ if (count of windows) > 0 then
508
+ set w to window 1
509
+ return (name of w) & "|" & (position of w as string) & "|" & (size of w as string)
510
+ else
511
+ return "no-window"
512
+ end if
513
+ end tell
514
+ end tell`);
515
+ windowInfo = winCheck.trim();
516
+ } catch {
517
+ }
518
+ }
519
+ return {
520
+ success: true,
521
+ data: {
522
+ opened: sanitizedApp,
523
+ launched,
524
+ hasWindow: windowInfo ? windowInfo !== "no-window" : false,
525
+ windowInfo: windowInfo && windowInfo !== "no-window" ? windowInfo : void 0,
526
+ note: launched ? `"${sanitizedApp}" is running${windowInfo && windowInfo !== "no-window" ? " with a visible window" : " (may still be loading)"}.` : `"${sanitizedApp}" was requested to open but process not detected yet. It may still be launching \u2014 take a screenshot to verify.`
527
+ }
528
+ };
483
529
  }
484
530
  case "sys_open_url": {
485
531
  const url = params.url;
486
532
  if (!url) return { success: false, error: "Missing URL" };
487
- await runShell(`open "${url.replace(/"/g, "")}"`);
533
+ const sanitizedUrl = url.replace(/"/g, '\\"');
534
+ try {
535
+ await runAppleScript(`
536
+ tell application "Google Chrome"
537
+ activate
538
+ tell front window to make new tab with properties {URL:"${sanitizedUrl}"}
539
+ end tell`);
540
+ } catch {
541
+ await runShell(`open "${url.replace(/"/g, "")}"`);
542
+ }
488
543
  return { success: true, data: { opened: url } };
489
544
  }
490
545
  case "sys_speak": {
@@ -685,29 +740,33 @@ async function handleCommand(command, params) {
685
740
  return { success: true, data: { path, written: content.length } };
686
741
  }
687
742
  case "sys_screenshot": {
743
+ const display = params.display || 0;
688
744
  const ts = Date.now();
689
745
  const pngPath = `/tmp/pulso-ss-${ts}.png`;
690
746
  const jpgPath = `/tmp/pulso-ss-${ts}.jpg`;
691
747
  try {
692
- await runShell(`screencapture -C -x -D1 ${pngPath}`, 15e3);
748
+ const displayFlag = display > 0 ? `-D${display}` : "";
749
+ await runShell(`screencapture -C -x ${displayFlag} ${pngPath}`, 15e3);
693
750
  } catch (ssErr) {
694
751
  const msg = ssErr.message || "";
695
752
  if (msg.includes("could not create image") || msg.includes("display")) {
696
753
  return {
697
754
  success: false,
698
- error: "Screen Recording permission required. Go to System Settings \u2192 Privacy & Security \u2192 Screen Recording \u2192 enable your terminal app (Terminal, iTerm, etc). Then restart the companion."
755
+ error: "Screen Recording permission required. Go to System Settings \u2192 Privacy & Security \u2192 Screen Recording \u2192 enable your terminal app (Terminal, iTerm, etc). Then restart the companion.",
756
+ errorCode: "PERMISSION_DENIED"
699
757
  };
700
758
  }
701
- return { success: false, error: `Screenshot failed: ${msg}` };
759
+ return { success: false, error: `Screenshot failed: ${msg}`, errorCode: "SCREENSHOT_FAILED" };
702
760
  }
703
761
  if (!existsSync(pngPath))
704
762
  return {
705
763
  success: false,
706
- error: "Screenshot failed \u2014 Screen Recording permission needed. System Settings \u2192 Privacy & Security \u2192 Screen Recording \u2192 enable your terminal app, then restart companion."
764
+ error: "Screenshot failed \u2014 Screen Recording permission needed.",
765
+ errorCode: "PERMISSION_DENIED"
707
766
  };
708
767
  try {
709
768
  await runShell(
710
- `sips --resampleWidth 1280 --setProperty format jpeg --setProperty formatOptions 60 ${pngPath} --out ${jpgPath}`,
769
+ `sips --resampleWidth 1600 --setProperty format jpeg --setProperty formatOptions 75 ${pngPath} --out ${jpgPath}`,
711
770
  1e4
712
771
  );
713
772
  } catch {
@@ -721,7 +780,8 @@ async function handleCommand(command, params) {
721
780
  data: {
722
781
  image: `data:image/png;base64,${buf2.toString("base64")}`,
723
782
  format: "png",
724
- note: "Full screen screenshot"
783
+ display: display || "all",
784
+ note: "Full screen screenshot (PNG fallback)"
725
785
  }
726
786
  };
727
787
  }
@@ -735,11 +795,18 @@ async function handleCommand(command, params) {
735
795
  unlinkSync(jpgPath);
736
796
  } catch {
737
797
  }
738
- let screenSize = "unknown";
798
+ let displayInfo = { width: 0, height: 0, displays: 1 };
739
799
  try {
740
- screenSize = await runShell(
741
- `system_profiler SPDisplaysDataType 2>/dev/null | grep Resolution | head -1 | sed 's/.*: //'`
742
- );
800
+ const diSwift = `
801
+ import Cocoa
802
+ let screens = NSScreen.screens
803
+ let main = screens.first!
804
+ let w = Int(main.frame.width)
805
+ let h = Int(main.frame.height)
806
+ print("\\(w),\\(h),\\(screens.count)")`;
807
+ const diResult = await runSwift(diSwift);
808
+ const [dw, dh, dc] = diResult.trim().split(",").map(Number);
809
+ displayInfo = { width: dw || 0, height: dh || 0, displays: dc || 1 };
743
810
  } catch {
744
811
  }
745
812
  return {
@@ -747,12 +814,110 @@ async function handleCommand(command, params) {
747
814
  data: {
748
815
  image: `data:image/jpeg;base64,${base64}`,
749
816
  format: "jpeg",
750
- resolution: screenSize.trim(),
751
- imageWidth: 1280,
752
- note: "Screenshot captured. Coordinates in this image are scaled \u2014 multiply x by (screen_width/1280) for actual clicks."
817
+ display: display || "all",
818
+ screenWidth: displayInfo.width,
819
+ screenHeight: displayInfo.height,
820
+ totalDisplays: displayInfo.displays,
821
+ imageWidth: 1600,
822
+ note: display > 0 ? `Screenshot of display ${display}. Coordinates: multiply x by (${displayInfo.width}/1600) and y by (${displayInfo.height}/(1600*${displayInfo.height}/${displayInfo.width})) for actual clicks.` : `Screenshot of all ${displayInfo.displays} display(s) stitched horizontally. Use sys_list_displays to get individual display bounds for coordinate mapping.`
823
+ }
824
+ };
825
+ }
826
+ case "sys_screenshot_region": {
827
+ const rx = params.x;
828
+ const ry = params.y;
829
+ const rw = params.width;
830
+ const rh = params.height;
831
+ if (rx == null || ry == null || rw == null || rh == null)
832
+ return { success: false, error: "Missing x, y, width, or height" };
833
+ const ts2 = Date.now();
834
+ const regPath = `/tmp/pulso-ss-region-${ts2}.png`;
835
+ const regJpg = `/tmp/pulso-ss-region-${ts2}.jpg`;
836
+ try {
837
+ await runShell(`screencapture -x -R${rx},${ry},${rw},${rh} ${regPath}`, 15e3);
838
+ } catch (e) {
839
+ return { success: false, error: `Region screenshot failed: ${e.message}`, errorCode: "SCREENSHOT_FAILED" };
840
+ }
841
+ if (!existsSync(regPath))
842
+ return { success: false, error: "Region screenshot failed", errorCode: "SCREENSHOT_FAILED" };
843
+ try {
844
+ await runShell(`sips --setProperty format jpeg --setProperty formatOptions 85 ${regPath} --out ${regJpg}`, 1e4);
845
+ } catch {
846
+ const rb = readFileSync(regPath);
847
+ try {
848
+ unlinkSync(regPath);
849
+ } catch {
850
+ }
851
+ return { success: true, data: { image: `data:image/png;base64,${rb.toString("base64")}`, format: "png", region: { x: rx, y: ry, width: rw, height: rh } } };
852
+ }
853
+ const rb2 = readFileSync(regJpg);
854
+ try {
855
+ unlinkSync(regPath);
856
+ } catch {
857
+ }
858
+ try {
859
+ unlinkSync(regJpg);
860
+ } catch {
861
+ }
862
+ return {
863
+ success: true,
864
+ data: {
865
+ image: `data:image/jpeg;base64,${rb2.toString("base64")}`,
866
+ format: "jpeg",
867
+ region: { x: rx, y: ry, width: rw, height: rh },
868
+ note: "Region screenshot at actual resolution (no scaling). Coordinates are absolute screen coordinates."
753
869
  }
754
870
  };
755
871
  }
872
+ case "sys_list_displays": {
873
+ try {
874
+ const swift = `
875
+ import Cocoa
876
+ let screens = NSScreen.screens
877
+ var result = ""
878
+ for (i, screen) in screens.enumerated() {
879
+ let f = screen.frame
880
+ let vf = screen.visibleFrame
881
+ let isMain = (screen == NSScreen.main)
882
+ let scale = screen.backingScaleFactor
883
+ let name = screen.localizedName
884
+ result += "\\(i+1)|\\(name)|\\(Int(f.origin.x)),\\(Int(f.origin.y))|\\(Int(f.width)),\\(Int(f.height))|\\(Int(vf.origin.x)),\\(Int(vf.origin.y))|\\(Int(vf.width)),\\(Int(vf.height))|\\(scale)|\\(isMain)\\n"
885
+ }
886
+ print(result)`;
887
+ const raw = await runSwift(swift, 15e3);
888
+ const displays = raw.trim().split("\n").filter(Boolean).map((line) => {
889
+ const [index, name, origin, size, visOrigin, visSize, scale, isMain] = line.split("|");
890
+ const [ox, oy] = (origin || "0,0").split(",").map(Number);
891
+ const [sw2, sh2] = (size || "0,0").split(",").map(Number);
892
+ const [vox, voy] = (visOrigin || "0,0").split(",").map(Number);
893
+ const [vsw, vsh] = (visSize || "0,0").split(",").map(Number);
894
+ return {
895
+ display: parseInt(index) || 0,
896
+ name: name?.trim() || "Unknown",
897
+ x: ox,
898
+ y: oy,
899
+ width: sw2,
900
+ height: sh2,
901
+ visibleX: vox,
902
+ visibleY: voy,
903
+ visibleWidth: vsw,
904
+ visibleHeight: vsh,
905
+ scale: parseFloat(scale) || 1,
906
+ isMain: isMain?.trim() === "true"
907
+ };
908
+ });
909
+ return {
910
+ success: true,
911
+ data: {
912
+ displays,
913
+ count: displays.length,
914
+ note: "Display coordinates use macOS coordinate system (origin bottom-left). For screencapture: use display number (1-based). For mouse clicks: windows on display 2 have x >= display1.width."
915
+ }
916
+ };
917
+ } catch (e) {
918
+ return { success: false, error: `Failed to list displays: ${e.message}` };
919
+ }
920
+ }
756
921
  // ── Computer-Use: Mouse & Keyboard ────────────────────
757
922
  case "sys_mouse_click": {
758
923
  const x = Number(params.x);
@@ -1038,7 +1203,7 @@ print("\\(x),\\(y)")`;
1038
1203
  tell application "Safari"
1039
1204
  activate
1040
1205
  if (count of windows) = 0 then make new document
1041
- set URL of front document to "${url.replace(/"/g, '\\"')}"
1206
+ tell front window to set current tab to (make new tab with properties {URL:"${url.replace(/"/g, '\\"')}"})
1042
1207
  end tell`);
1043
1208
  } else {
1044
1209
  await runAppleScript(`
@@ -1046,10 +1211,8 @@ print("\\(x),\\(y)")`;
1046
1211
  activate
1047
1212
  if (count of windows) = 0 then
1048
1213
  make new window
1049
- set URL of active tab of front window to "${url.replace(/"/g, '\\"')}"
1050
- else
1051
- set URL of active tab of front window to "${url.replace(/"/g, '\\"')}"
1052
1214
  end if
1215
+ tell front window to make new tab with properties {URL:"${url.replace(/"/g, '\\"')}"}
1053
1216
  end tell`);
1054
1217
  }
1055
1218
  return { success: true, data: { navigated: url, browser } };
@@ -1732,6 +1895,19 @@ end tell`);
1732
1895
  }
1733
1896
  // ── Window Management ───────────────────────────────────
1734
1897
  case "sys_window_list": {
1898
+ let displayBounds = [];
1899
+ try {
1900
+ const dbSwift = `
1901
+ import Cocoa
1902
+ let screens = NSScreen.screens
1903
+ for s in screens { print("\\(Int(s.frame.origin.x)),\\(Int(s.frame.width))") }`;
1904
+ const dbRaw = await runSwift(dbSwift, 5e3);
1905
+ displayBounds = dbRaw.trim().split("\n").filter(Boolean).map((l) => {
1906
+ const [bx, bw] = l.split(",").map(Number);
1907
+ return { x: bx || 0, width: bw || 0 };
1908
+ });
1909
+ } catch {
1910
+ }
1735
1911
  const raw4 = await runAppleScript(`
1736
1912
  tell application "System Events"
1737
1913
  set output to ""
@@ -1744,15 +1920,42 @@ end tell`);
1744
1920
  return output
1745
1921
  end tell`);
1746
1922
  const windows = raw4.split("\n").filter(Boolean).map((line) => {
1747
- const [app, title, pos, sz] = line.split(" | ");
1923
+ const [appW, title, pos, sz] = line.split(" | ");
1924
+ const posMatch = (pos || "").match(/(\d+),\s*(\d+)/);
1925
+ const wx = posMatch ? parseInt(posMatch[1]) : 0;
1926
+ const wy = posMatch ? parseInt(posMatch[2]) : 0;
1927
+ const szMatch = (sz || "").match(/(\d+),\s*(\d+)/);
1928
+ const ww = szMatch ? parseInt(szMatch[1]) : 0;
1929
+ const wh = szMatch ? parseInt(szMatch[2]) : 0;
1930
+ let displayIndex = 1;
1931
+ if (displayBounds.length > 1) {
1932
+ for (let di = 0; di < displayBounds.length; di++) {
1933
+ const db = displayBounds[di];
1934
+ if (wx >= db.x && wx < db.x + db.width) {
1935
+ displayIndex = di + 1;
1936
+ break;
1937
+ }
1938
+ }
1939
+ }
1748
1940
  return {
1749
- app: app?.trim(),
1941
+ app: appW?.trim(),
1750
1942
  title: title?.trim(),
1751
- position: pos?.trim(),
1752
- size: sz?.trim()
1943
+ x: wx,
1944
+ y: wy,
1945
+ width: ww,
1946
+ height: wh,
1947
+ display: displayIndex
1753
1948
  };
1754
1949
  });
1755
- return { success: true, data: { windows, count: windows.length } };
1950
+ return {
1951
+ success: true,
1952
+ data: {
1953
+ windows,
1954
+ count: windows.length,
1955
+ displays: displayBounds.length || 1,
1956
+ note: "Window positions are in global coordinates. 'display' indicates which monitor the window is on (1=primary, 2=secondary, etc)."
1957
+ }
1958
+ };
1756
1959
  }
1757
1960
  case "sys_window_focus": {
1758
1961
  const appName = params.app;
@@ -2926,14 +3129,14 @@ var CAPABILITY_PROBES = [
2926
3129
  name: "screenshot",
2927
3130
  test: async () => {
2928
3131
  try {
2929
- await runShell("screencapture -x -D1 /tmp/pulso-probe-ss.png", 5e3);
3132
+ await runShell("screencapture -x /tmp/pulso-probe-ss.png", 5e3);
2930
3133
  unlinkSync("/tmp/pulso-probe-ss.png");
2931
3134
  return true;
2932
3135
  } catch {
2933
3136
  return false;
2934
3137
  }
2935
3138
  },
2936
- tools: ["sys_screenshot"]
3139
+ tools: ["sys_screenshot", "sys_screenshot_region", "sys_list_displays"]
2937
3140
  },
2938
3141
  {
2939
3142
  name: "chrome_js",
@@ -3100,7 +3303,7 @@ function connect() {
3100
3303
  ws.send(JSON.stringify({
3101
3304
  type: "extension_ready",
3102
3305
  platform: "macos",
3103
- version: "0.3.2",
3306
+ version: "0.4.0",
3104
3307
  accessLevel: ACCESS_LEVEL,
3105
3308
  capabilities: cap.available,
3106
3309
  unavailable: cap.unavailable,
@@ -3358,7 +3561,7 @@ function writeString(view, offset, str) {
3358
3561
  }
3359
3562
  console.log("");
3360
3563
  console.log(" \u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
3361
- console.log(" \u2551 \u{1FAC0} Pulso Mac Companion v0.3.0 \u2551");
3564
+ console.log(" \u2551 \u{1FAC0} Pulso Mac Companion v0.4.0 \u2551");
3362
3565
  console.log(" \u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
3363
3566
  console.log("");
3364
3567
  setupPermissions().then(() => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pulso/companion",
3
- "version": "0.3.3",
3
+ "version": "0.4.1",
4
4
  "type": "module",
5
5
  "description": "Pulso Companion — gives your AI agent real control over your computer",
6
6
  "bin": {