@vm0/cli 9.104.0 → 9.105.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vm0/cli",
3
- "version": "9.104.0",
3
+ "version": "9.105.0",
4
4
  "description": "CLI application",
5
5
  "repository": {
6
6
  "type": "git",
package/zero.js CHANGED
@@ -115,7 +115,7 @@ import {
115
115
  upsertZeroOrgModelProvider,
116
116
  withErrorHandler,
117
117
  zeroAgentCustomSkillNameSchema
118
- } from "./chunk-FKDGKPGQ.js";
118
+ } from "./chunk-2SZAWYHO.js";
119
119
 
120
120
  // src/zero.ts
121
121
  import { Command as Command77 } from "commander";
@@ -4735,8 +4735,19 @@ async function captureScreenshot() {
4735
4735
  const tmpPath = join3(tmpdir(), `vm0-screenshot-${randomUUID()}.jpg`);
4736
4736
  try {
4737
4737
  await execFileAsync("screencapture", ["-x", "-t", "jpg", tmpPath]);
4738
- const buffer = await readFile(tmpPath);
4739
4738
  const info = await getScreenInfo();
4739
+ if (info.scaleFactor > 1) {
4740
+ await execFileAsync("sips", [
4741
+ "-z",
4742
+ String(info.height),
4743
+ String(info.width),
4744
+ "-s",
4745
+ "formatOptions",
4746
+ "80",
4747
+ tmpPath
4748
+ ]);
4749
+ }
4750
+ const buffer = await readFile(tmpPath);
4740
4751
  return {
4741
4752
  image: buffer.toString("base64"),
4742
4753
  width: info.width,
@@ -4761,12 +4772,24 @@ async function captureRegionScreenshot(region) {
4761
4772
  regionArg,
4762
4773
  tmpPath
4763
4774
  ]);
4775
+ const info = await getScreenInfo();
4776
+ if (info.scaleFactor > 1) {
4777
+ await execFileAsync("sips", [
4778
+ "-z",
4779
+ String(region.height),
4780
+ String(region.width),
4781
+ "-s",
4782
+ "formatOptions",
4783
+ "80",
4784
+ tmpPath
4785
+ ]);
4786
+ }
4764
4787
  const buffer = await readFile(tmpPath);
4765
4788
  return {
4766
4789
  image: buffer.toString("base64"),
4767
4790
  width: region.width,
4768
4791
  height: region.height,
4769
- scaleFactor: 1,
4792
+ scaleFactor: info.scaleFactor,
4770
4793
  format: "jpg"
4771
4794
  };
4772
4795
  } finally {
@@ -4786,14 +4809,30 @@ async function getScreenInfo() {
4786
4809
  for (const screen of screens) {
4787
4810
  const pixelStr = screen._spdisplays_pixels;
4788
4811
  if (pixelStr) {
4789
- const match = pixelStr.match(/(\d+)\s*x\s*(\d+)/);
4790
- if (match?.[1] && match[2]) {
4791
- const width = parseInt(match[1], 10);
4792
- const height = parseInt(match[2], 10);
4812
+ const pixelMatch = pixelStr.match(/(\d+)\s*x\s*(\d+)/);
4813
+ if (pixelMatch?.[1] && pixelMatch[2]) {
4814
+ const physicalWidth = parseInt(pixelMatch[1], 10);
4815
+ const physicalHeight = parseInt(pixelMatch[2], 10);
4793
4816
  const resStr = screen._spdisplays_resolution ?? screen.spdisplays_resolution ?? "";
4794
- const isRetina = /retina/i.test(resStr);
4795
- const scaleFactor = isRetina ? 2 : 1;
4796
- return { width, height, scaleFactor };
4817
+ const resMatch = resStr.match(/(\d+)\s*x\s*(\d+)/);
4818
+ let scaleFactor;
4819
+ let logicalWidth;
4820
+ let logicalHeight;
4821
+ if (resMatch?.[1] && resMatch[2]) {
4822
+ logicalWidth = parseInt(resMatch[1], 10);
4823
+ logicalHeight = parseInt(resMatch[2], 10);
4824
+ scaleFactor = Math.round(physicalWidth / logicalWidth);
4825
+ } else {
4826
+ const isRetina = /retina/i.test(resStr);
4827
+ scaleFactor = isRetina ? 2 : 1;
4828
+ logicalWidth = Math.floor(physicalWidth / scaleFactor);
4829
+ logicalHeight = Math.floor(physicalHeight / scaleFactor);
4830
+ }
4831
+ return {
4832
+ width: logicalWidth,
4833
+ height: logicalHeight,
4834
+ scaleFactor
4835
+ };
4797
4836
  }
4798
4837
  }
4799
4838
  }
@@ -4908,17 +4947,20 @@ function parseKeyCombo(keys) {
4908
4947
  }
4909
4948
  return { modifiers, mainKey };
4910
4949
  }
4950
+ function keyAction(key) {
4951
+ return VALID_SPECIAL_KEYS.has(key) ? `kp:${key}` : `t:${key}`;
4952
+ }
4911
4953
  async function pressKey(keys) {
4912
4954
  const { modifiers, mainKey } = parseKeyCombo(keys);
4913
4955
  if (modifiers.length === 0) {
4914
- await execFileAsync2("cliclick", [`kp:${mainKey}`]);
4956
+ await execFileAsync2("cliclick", [keyAction(mainKey)]);
4915
4957
  return;
4916
4958
  }
4917
4959
  const args = [];
4918
4960
  for (const mod of modifiers) {
4919
4961
  args.push(`kd:${mod}`);
4920
4962
  }
4921
- args.push(`kp:${mainKey}`);
4963
+ args.push(keyAction(mainKey));
4922
4964
  for (let i = modifiers.length - 1; i >= 0; i--) {
4923
4965
  args.push(`ku:${modifiers[i]}`);
4924
4966
  }
@@ -5101,12 +5143,10 @@ async function handleMouseRequest(req, res) {
5101
5143
  return;
5102
5144
  }
5103
5145
  const info = await getScreenInfo();
5104
- const maxX = Math.floor(info.width / info.scaleFactor);
5105
- const maxY = Math.floor(info.height / info.scaleFactor);
5106
- if (x < 0 || x >= maxX || y < 0 || y >= maxY) {
5146
+ if (x < 0 || x >= info.width || y < 0 || y >= info.height) {
5107
5147
  res.writeHead(400, { "Content-Type": "text/plain" });
5108
5148
  res.end(
5109
- `Coordinates out of bounds. Screen size: ${maxX}x${maxY} (points)`
5149
+ `Coordinates out of bounds. Screen size: ${info.width}x${info.height} (points)`
5110
5150
  );
5111
5151
  return;
5112
5152
  }
@@ -5677,31 +5717,23 @@ var clientCursorPositionCommand = new Command75().name("cursor-position").descri
5677
5717
  // src/commands/zero/computer-use/index.ts
5678
5718
  var hostCommand = new Command76().name("host").description("Manage computer-use host daemon").addCommand(hostStartCommand).addCommand(hostStopCommand);
5679
5719
  var clientCommand = new Command76().name("client").description("Interact with remote computer-use host").addCommand(clientScreenshotCommand).addCommand(clientZoomCommand).addCommand(clientInfoCommand).addCommand(clientLeftClickCommand).addCommand(clientRightClickCommand).addCommand(clientMiddleClickCommand).addCommand(clientDoubleClickCommand).addCommand(clientTripleClickCommand).addCommand(clientLeftClickDragCommand).addCommand(clientLeftMouseDownCommand).addCommand(clientLeftMouseUpCommand).addCommand(clientScrollCommand).addCommand(clientReadClipboardCommand).addCommand(clientWriteClipboardCommand).addCommand(clientKeyCommand).addCommand(clientHoldKeyCommand).addCommand(clientTypeCommand).addCommand(clientOpenAppCommand).addCommand(clientMouseMoveCommand).addCommand(clientCursorPositionCommand);
5680
- var zeroComputerUseCommand = new Command76().name("computer-use").description("Remote desktop control for cloud agents").addCommand(hostCommand).addCommand(clientCommand).addHelpText(
5720
+ clientCommand.addHelpText(
5681
5721
  "after",
5682
5722
  `
5723
+ Coordinate System:
5724
+ All coordinates use macOS logical points, not physical pixels.
5725
+ On Retina displays, logical size = physical size / scaleFactor.
5726
+ Run "info" to check your screen's logical dimensions.
5727
+
5683
5728
  Examples:
5684
- Start the host daemon (on macOS): zero computer-use host start
5685
- Stop the host daemon: zero computer-use host stop
5686
- Take a screenshot (from agent): zero computer-use client screenshot
5687
- Zoom into a region (from agent): zero computer-use client zoom --x 0 --y 0 --width 500 --height 500
5688
- Get screen info (from agent): zero computer-use client info
5689
- Left click at (500, 300): zero computer-use client left-click 500 300
5690
- Double click at (100, 200): zero computer-use client double-click 100 200
5691
- Drag from A to B: zero computer-use client left-click-drag 100 100 500 500
5692
- Press mouse button: zero computer-use client left-mouse-down 200 300
5693
- Release mouse button: zero computer-use client left-mouse-up 500 500
5694
- Scroll down at position: zero computer-use client scroll 500 300 down 5
5695
- Read clipboard text: zero computer-use client read-clipboard
5696
- Write clipboard text: zero computer-use client write-clipboard "hello"
5697
- Press key combo: zero computer-use client key "cmd+c"
5698
- Hold shift for 2 seconds: zero computer-use client hold-key "shift" 2000
5699
- Type text: zero computer-use client type "Hello, world!"
5700
- Open an application: zero computer-use client open-app Safari
5701
- Open by bundle ID: zero computer-use client open-app "com.apple.Safari"
5702
- Move mouse to (100, 200): zero computer-use client mouse-move 100 200
5703
- Get cursor position: zero computer-use client cursor-position`
5729
+ zero computer-use client screenshot
5730
+ zero computer-use client zoom --x 0 --y 0 --width 500 --height 500
5731
+ zero computer-use client info
5732
+ zero computer-use client left-click 500 300
5733
+ zero computer-use client scroll 500 300 down 5
5734
+ zero computer-use client key "cmd+c"`
5704
5735
  );
5736
+ var zeroComputerUseCommand = new Command76().name("computer-use").description("Remote desktop control for cloud agents").addCommand(hostCommand).addCommand(clientCommand);
5705
5737
 
5706
5738
  // src/zero.ts
5707
5739
  var COMMAND_CAPABILITY_MAP = {
@@ -5753,7 +5785,7 @@ function registerZeroCommands(prog, commands) {
5753
5785
  var program = new Command77();
5754
5786
  program.name("zero").description(
5755
5787
  "Zero CLI \u2014 interact with the zero platform from inside the sandbox"
5756
- ).version("9.104.0").addHelpText(
5788
+ ).version("9.105.0").addHelpText(
5757
5789
  "after",
5758
5790
  `
5759
5791
  Examples: