@pulso/companion 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +221 -25
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -27,7 +27,7 @@ if (!TOKEN) {
27
27
  var ACCESS_LEVEL = process.env.PULSO_ACCESS ?? process.argv.find((_, i, a) => a[i - 1] === "--access") ?? "sandboxed";
28
28
  var WAKE_WORD_ENABLED = process.argv.includes("--wake-word");
29
29
  var PICOVOICE_ACCESS_KEY = process.env.PICOVOICE_ACCESS_KEY ?? process.argv.find((_, i, a) => a[i - 1] === "--picovoice-key") ?? "";
30
- var WS_URL = API_URL.replace("https://", "wss://").replace("http://", "ws://") + "/ws/browser?token=" + TOKEN;
30
+ var WS_URL = API_URL.replace("https://", "wss://").replace("http://", "ws://") + "/ws/companion?token=" + TOKEN;
31
31
  var HOME = homedir();
32
32
  var RECONNECT_DELAY = 5e3;
33
33
  var SAFE_DIRS = ["Documents", "Desktop", "Downloads", "Projects", "Projetos"];
@@ -478,8 +478,54 @@ async function handleCommand(command, params) {
478
478
  case "sys_open_app": {
479
479
  const app = params.app;
480
480
  if (!app) return { success: false, error: "Missing app name" };
481
- await runShell(`open -a "${app.replace(/"/g, "")}"`);
482
- return { success: true, data: { opened: app } };
481
+ const sanitizedApp = app.replace(/"/g, "");
482
+ try {
483
+ await runShell(`open -a "${sanitizedApp}"`);
484
+ } catch (e) {
485
+ return { success: false, error: `Failed to open "${sanitizedApp}": ${e.message}`, errorCode: "APP_NOT_FOUND" };
486
+ }
487
+ let launched = false;
488
+ for (let i = 0; i < 10; i++) {
489
+ await new Promise((r) => setTimeout(r, 500));
490
+ try {
491
+ const running = await runAppleScript(
492
+ `tell application "System Events" to (name of processes) contains "${sanitizedApp}"`
493
+ );
494
+ if (running.trim() === "true") {
495
+ launched = true;
496
+ break;
497
+ }
498
+ } catch {
499
+ }
500
+ }
501
+ let windowInfo;
502
+ if (launched) {
503
+ try {
504
+ const winCheck = await runAppleScript(`
505
+ tell application "System Events"
506
+ tell process "${sanitizedApp}"
507
+ if (count of windows) > 0 then
508
+ set w to window 1
509
+ return (name of w) & "|" & (position of w as string) & "|" & (size of w as string)
510
+ else
511
+ return "no-window"
512
+ end if
513
+ end tell
514
+ end tell`);
515
+ windowInfo = winCheck.trim();
516
+ } catch {
517
+ }
518
+ }
519
+ return {
520
+ success: true,
521
+ data: {
522
+ opened: sanitizedApp,
523
+ launched,
524
+ hasWindow: windowInfo ? windowInfo !== "no-window" : false,
525
+ windowInfo: windowInfo && windowInfo !== "no-window" ? windowInfo : void 0,
526
+ note: launched ? `"${sanitizedApp}" is running${windowInfo && windowInfo !== "no-window" ? " with a visible window" : " (may still be loading)"}.` : `"${sanitizedApp}" was requested to open but process not detected yet. It may still be launching \u2014 take a screenshot to verify.`
527
+ }
528
+ };
483
529
  }
484
530
  case "sys_open_url": {
485
531
  const url = params.url;
@@ -685,29 +731,33 @@ async function handleCommand(command, params) {
685
731
  return { success: true, data: { path, written: content.length } };
686
732
  }
687
733
  case "sys_screenshot": {
734
+ const display = params.display || 0;
688
735
  const ts = Date.now();
689
736
  const pngPath = `/tmp/pulso-ss-${ts}.png`;
690
737
  const jpgPath = `/tmp/pulso-ss-${ts}.jpg`;
691
738
  try {
692
- await runShell(`screencapture -C -x -D1 ${pngPath}`, 15e3);
739
+ const displayFlag = display > 0 ? `-D${display}` : "";
740
+ await runShell(`screencapture -C -x ${displayFlag} ${pngPath}`, 15e3);
693
741
  } catch (ssErr) {
694
742
  const msg = ssErr.message || "";
695
743
  if (msg.includes("could not create image") || msg.includes("display")) {
696
744
  return {
697
745
  success: false,
698
- error: "Screen Recording permission required. Go to System Settings \u2192 Privacy & Security \u2192 Screen Recording \u2192 enable your terminal app (Terminal, iTerm, etc). Then restart the companion."
746
+ error: "Screen Recording permission required. Go to System Settings \u2192 Privacy & Security \u2192 Screen Recording \u2192 enable your terminal app (Terminal, iTerm, etc). Then restart the companion.",
747
+ errorCode: "PERMISSION_DENIED"
699
748
  };
700
749
  }
701
- return { success: false, error: `Screenshot failed: ${msg}` };
750
+ return { success: false, error: `Screenshot failed: ${msg}`, errorCode: "SCREENSHOT_FAILED" };
702
751
  }
703
752
  if (!existsSync(pngPath))
704
753
  return {
705
754
  success: false,
706
- error: "Screenshot failed \u2014 Screen Recording permission needed. System Settings \u2192 Privacy & Security \u2192 Screen Recording \u2192 enable your terminal app, then restart companion."
755
+ error: "Screenshot failed \u2014 Screen Recording permission needed.",
756
+ errorCode: "PERMISSION_DENIED"
707
757
  };
708
758
  try {
709
759
  await runShell(
710
- `sips --resampleWidth 1280 --setProperty format jpeg --setProperty formatOptions 60 ${pngPath} --out ${jpgPath}`,
760
+ `sips --resampleWidth 1600 --setProperty format jpeg --setProperty formatOptions 75 ${pngPath} --out ${jpgPath}`,
711
761
  1e4
712
762
  );
713
763
  } catch {
@@ -721,7 +771,8 @@ async function handleCommand(command, params) {
721
771
  data: {
722
772
  image: `data:image/png;base64,${buf2.toString("base64")}`,
723
773
  format: "png",
724
- note: "Full screen screenshot"
774
+ display: display || "all",
775
+ note: "Full screen screenshot (PNG fallback)"
725
776
  }
726
777
  };
727
778
  }
@@ -735,11 +786,18 @@ async function handleCommand(command, params) {
735
786
  unlinkSync(jpgPath);
736
787
  } catch {
737
788
  }
738
- let screenSize = "unknown";
789
+ let displayInfo = { width: 0, height: 0, displays: 1 };
739
790
  try {
740
- screenSize = await runShell(
741
- `system_profiler SPDisplaysDataType 2>/dev/null | grep Resolution | head -1 | sed 's/.*: //'`
742
- );
791
+ const diSwift = `
792
+ import Cocoa
793
+ let screens = NSScreen.screens
794
+ let main = screens.first!
795
+ let w = Int(main.frame.width)
796
+ let h = Int(main.frame.height)
797
+ print("\\(w),\\(h),\\(screens.count)")`;
798
+ const diResult = await runSwift(diSwift);
799
+ const [dw, dh, dc] = diResult.trim().split(",").map(Number);
800
+ displayInfo = { width: dw || 0, height: dh || 0, displays: dc || 1 };
743
801
  } catch {
744
802
  }
745
803
  return {
@@ -747,12 +805,110 @@ async function handleCommand(command, params) {
747
805
  data: {
748
806
  image: `data:image/jpeg;base64,${base64}`,
749
807
  format: "jpeg",
750
- resolution: screenSize.trim(),
751
- imageWidth: 1280,
752
- note: "Screenshot captured. Coordinates in this image are scaled \u2014 multiply x by (screen_width/1280) for actual clicks."
808
+ display: display || "all",
809
+ screenWidth: displayInfo.width,
810
+ screenHeight: displayInfo.height,
811
+ totalDisplays: displayInfo.displays,
812
+ imageWidth: 1600,
813
+ note: display > 0 ? `Screenshot of display ${display}. Coordinates: multiply x by (${displayInfo.width}/1600) and y by (${displayInfo.height}/(1600*${displayInfo.height}/${displayInfo.width})) for actual clicks.` : `Screenshot of all ${displayInfo.displays} display(s) stitched horizontally. Use sys_list_displays to get individual display bounds for coordinate mapping.`
753
814
  }
754
815
  };
755
816
  }
817
+ case "sys_screenshot_region": {
818
+ const rx = params.x;
819
+ const ry = params.y;
820
+ const rw = params.width;
821
+ const rh = params.height;
822
+ if (rx == null || ry == null || rw == null || rh == null)
823
+ return { success: false, error: "Missing x, y, width, or height" };
824
+ const ts2 = Date.now();
825
+ const regPath = `/tmp/pulso-ss-region-${ts2}.png`;
826
+ const regJpg = `/tmp/pulso-ss-region-${ts2}.jpg`;
827
+ try {
828
+ await runShell(`screencapture -x -R${rx},${ry},${rw},${rh} ${regPath}`, 15e3);
829
+ } catch (e) {
830
+ return { success: false, error: `Region screenshot failed: ${e.message}`, errorCode: "SCREENSHOT_FAILED" };
831
+ }
832
+ if (!existsSync(regPath))
833
+ return { success: false, error: "Region screenshot failed", errorCode: "SCREENSHOT_FAILED" };
834
+ try {
835
+ await runShell(`sips --setProperty format jpeg --setProperty formatOptions 85 ${regPath} --out ${regJpg}`, 1e4);
836
+ } catch {
837
+ const rb = readFileSync(regPath);
838
+ try {
839
+ unlinkSync(regPath);
840
+ } catch {
841
+ }
842
+ return { success: true, data: { image: `data:image/png;base64,${rb.toString("base64")}`, format: "png", region: { x: rx, y: ry, width: rw, height: rh } } };
843
+ }
844
+ const rb2 = readFileSync(regJpg);
845
+ try {
846
+ unlinkSync(regPath);
847
+ } catch {
848
+ }
849
+ try {
850
+ unlinkSync(regJpg);
851
+ } catch {
852
+ }
853
+ return {
854
+ success: true,
855
+ data: {
856
+ image: `data:image/jpeg;base64,${rb2.toString("base64")}`,
857
+ format: "jpeg",
858
+ region: { x: rx, y: ry, width: rw, height: rh },
859
+ note: "Region screenshot at actual resolution (no scaling). Coordinates are absolute screen coordinates."
860
+ }
861
+ };
862
+ }
863
+ case "sys_list_displays": {
864
+ try {
865
+ const swift = `
866
+ import Cocoa
867
+ let screens = NSScreen.screens
868
+ var result = ""
869
+ for (i, screen) in screens.enumerated() {
870
+ let f = screen.frame
871
+ let vf = screen.visibleFrame
872
+ let isMain = (screen == NSScreen.main)
873
+ let scale = screen.backingScaleFactor
874
+ let name = screen.localizedName
875
+ result += "\\(i+1)|\\(name)|\\(Int(f.origin.x)),\\(Int(f.origin.y))|\\(Int(f.width)),\\(Int(f.height))|\\(Int(vf.origin.x)),\\(Int(vf.origin.y))|\\(Int(vf.width)),\\(Int(vf.height))|\\(scale)|\\(isMain)\\n"
876
+ }
877
+ print(result)`;
878
+ const raw = await runSwift(swift, 15e3);
879
+ const displays = raw.trim().split("\n").filter(Boolean).map((line) => {
880
+ const [index, name, origin, size, visOrigin, visSize, scale, isMain] = line.split("|");
881
+ const [ox, oy] = (origin || "0,0").split(",").map(Number);
882
+ const [sw2, sh2] = (size || "0,0").split(",").map(Number);
883
+ const [vox, voy] = (visOrigin || "0,0").split(",").map(Number);
884
+ const [vsw, vsh] = (visSize || "0,0").split(",").map(Number);
885
+ return {
886
+ display: parseInt(index) || 0,
887
+ name: name?.trim() || "Unknown",
888
+ x: ox,
889
+ y: oy,
890
+ width: sw2,
891
+ height: sh2,
892
+ visibleX: vox,
893
+ visibleY: voy,
894
+ visibleWidth: vsw,
895
+ visibleHeight: vsh,
896
+ scale: parseFloat(scale) || 1,
897
+ isMain: isMain?.trim() === "true"
898
+ };
899
+ });
900
+ return {
901
+ success: true,
902
+ data: {
903
+ displays,
904
+ count: displays.length,
905
+ note: "Display coordinates use macOS coordinate system (origin bottom-left). For screencapture: use display number (1-based). For mouse clicks: windows on display 2 have x >= display1.width."
906
+ }
907
+ };
908
+ } catch (e) {
909
+ return { success: false, error: `Failed to list displays: ${e.message}` };
910
+ }
911
+ }
756
912
  // ── Computer-Use: Mouse & Keyboard ────────────────────
757
913
  case "sys_mouse_click": {
758
914
  const x = Number(params.x);
@@ -1732,6 +1888,19 @@ end tell`);
1732
1888
  }
1733
1889
  // ── Window Management ───────────────────────────────────
1734
1890
  case "sys_window_list": {
1891
+ let displayBounds = [];
1892
+ try {
1893
+ const dbSwift = `
1894
+ import Cocoa
1895
+ let screens = NSScreen.screens
1896
+ for s in screens { print("\\(Int(s.frame.origin.x)),\\(Int(s.frame.width))") }`;
1897
+ const dbRaw = await runSwift(dbSwift, 5e3);
1898
+ displayBounds = dbRaw.trim().split("\n").filter(Boolean).map((l) => {
1899
+ const [bx, bw] = l.split(",").map(Number);
1900
+ return { x: bx || 0, width: bw || 0 };
1901
+ });
1902
+ } catch {
1903
+ }
1735
1904
  const raw4 = await runAppleScript(`
1736
1905
  tell application "System Events"
1737
1906
  set output to ""
@@ -1744,15 +1913,42 @@ end tell`);
1744
1913
  return output
1745
1914
  end tell`);
1746
1915
  const windows = raw4.split("\n").filter(Boolean).map((line) => {
1747
- const [app, title, pos, sz] = line.split(" | ");
1916
+ const [appW, title, pos, sz] = line.split(" | ");
1917
+ const posMatch = (pos || "").match(/(\d+),\s*(\d+)/);
1918
+ const wx = posMatch ? parseInt(posMatch[1]) : 0;
1919
+ const wy = posMatch ? parseInt(posMatch[2]) : 0;
1920
+ const szMatch = (sz || "").match(/(\d+),\s*(\d+)/);
1921
+ const ww = szMatch ? parseInt(szMatch[1]) : 0;
1922
+ const wh = szMatch ? parseInt(szMatch[2]) : 0;
1923
+ let displayIndex = 1;
1924
+ if (displayBounds.length > 1) {
1925
+ for (let di = 0; di < displayBounds.length; di++) {
1926
+ const db = displayBounds[di];
1927
+ if (wx >= db.x && wx < db.x + db.width) {
1928
+ displayIndex = di + 1;
1929
+ break;
1930
+ }
1931
+ }
1932
+ }
1748
1933
  return {
1749
- app: app?.trim(),
1934
+ app: appW?.trim(),
1750
1935
  title: title?.trim(),
1751
- position: pos?.trim(),
1752
- size: sz?.trim()
1936
+ x: wx,
1937
+ y: wy,
1938
+ width: ww,
1939
+ height: wh,
1940
+ display: displayIndex
1753
1941
  };
1754
1942
  });
1755
- return { success: true, data: { windows, count: windows.length } };
1943
+ return {
1944
+ success: true,
1945
+ data: {
1946
+ windows,
1947
+ count: windows.length,
1948
+ displays: displayBounds.length || 1,
1949
+ note: "Window positions are in global coordinates. 'display' indicates which monitor the window is on (1=primary, 2=secondary, etc)."
1950
+ }
1951
+ };
1756
1952
  }
1757
1953
  case "sys_window_focus": {
1758
1954
  const appName = params.app;
@@ -2926,14 +3122,14 @@ var CAPABILITY_PROBES = [
2926
3122
  name: "screenshot",
2927
3123
  test: async () => {
2928
3124
  try {
2929
- await runShell("screencapture -x -D1 /tmp/pulso-probe-ss.png", 5e3);
3125
+ await runShell("screencapture -x /tmp/pulso-probe-ss.png", 5e3);
2930
3126
  unlinkSync("/tmp/pulso-probe-ss.png");
2931
3127
  return true;
2932
3128
  } catch {
2933
3129
  return false;
2934
3130
  }
2935
3131
  },
2936
- tools: ["sys_screenshot"]
3132
+ tools: ["sys_screenshot", "sys_screenshot_region", "sys_list_displays"]
2937
3133
  },
2938
3134
  {
2939
3135
  name: "chrome_js",
@@ -3100,7 +3296,7 @@ function connect() {
3100
3296
  ws.send(JSON.stringify({
3101
3297
  type: "extension_ready",
3102
3298
  platform: "macos",
3103
- version: "0.3.2",
3299
+ version: "0.4.0",
3104
3300
  accessLevel: ACCESS_LEVEL,
3105
3301
  capabilities: cap.available,
3106
3302
  unavailable: cap.unavailable,
@@ -3358,7 +3554,7 @@ function writeString(view, offset, str) {
3358
3554
  }
3359
3555
  console.log("");
3360
3556
  console.log(" \u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
3361
- console.log(" \u2551 \u{1FAC0} Pulso Mac Companion v0.3.0 \u2551");
3557
+ console.log(" \u2551 \u{1FAC0} Pulso Mac Companion v0.4.0 \u2551");
3362
3558
  console.log(" \u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
3363
3559
  console.log("");
3364
3560
  setupPermissions().then(() => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@pulso/companion",
3
- "version": "0.3.3",
3
+ "version": "0.4.0",
4
4
  "type": "module",
5
5
  "description": "Pulso Companion — gives your AI agent real control over your computer",
6
6
  "bin": {