@vm0/cli 9.100.2 → 9.101.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/zero.js CHANGED
@@ -36,6 +36,7 @@ import {
36
36
  getAskUserAnswer,
37
37
  getAuthMethodsForType,
38
38
  getBaseUrl,
39
+ getComputerUseHost,
39
40
  getConnectorDerivedNames,
40
41
  getConnectorFirewall,
41
42
  getConnectorTypeForSecretName,
@@ -87,6 +88,7 @@ import {
87
88
  promptPassword,
88
89
  promptSelect,
89
90
  promptText,
91
+ registerComputerUseHost,
90
92
  removeZeroOrgMember,
91
93
  renderRunCreated,
92
94
  requestDeveloperSupportConsent,
@@ -103,6 +105,7 @@ import {
103
105
  setZeroVariable,
104
106
  submitDeveloperSupport,
105
107
  switchZeroOrg,
108
+ unregisterComputerUseHost,
106
109
  updateSkill,
107
110
  updateZeroAgent,
108
111
  updateZeroAgentInstructions,
@@ -111,10 +114,10 @@ import {
111
114
  updateZeroUserPreferences,
112
115
  upsertZeroOrgModelProvider,
113
116
  withErrorHandler
114
- } from "./chunk-7RQXNUYT.js";
117
+ } from "./chunk-X2L3TKWA.js";
115
118
 
116
119
  // src/zero.ts
117
- import { Command as Command74 } from "commander";
120
+ import { Command as Command77 } from "commander";
118
121
 
119
122
  // src/commands/zero/org/index.ts
120
123
  import { Command as Command23 } from "commander";
@@ -4683,6 +4686,967 @@ Notes:
4683
4686
  )
4684
4687
  );
4685
4688
 
4689
+ // src/commands/zero/computer-use/index.ts
4690
+ import { Command as Command76 } from "commander";
4691
+
4692
+ // src/commands/zero/computer-use/host.ts
4693
+ import { Command as Command74 } from "commander";
4694
+ import chalk57 from "chalk";
4695
+
4696
+ // src/lib/computer-use/desktop-server.ts
4697
+ import {
4698
+ createServer as createServer2
4699
+ } from "http";
4700
+ import { createServer as createNetServer } from "net";
4701
+
4702
+ // src/lib/computer-use/screencapture.ts
4703
+ import { execFile } from "child_process";
4704
+ import { readFile, unlink } from "fs/promises";
4705
+ import { randomUUID } from "crypto";
4706
+ import { join as join3 } from "path";
4707
+ import { tmpdir } from "os";
4708
+ import { promisify } from "util";
4709
+ var execFileAsync = promisify(execFile);
4710
+ async function captureScreenshot() {
4711
+ const tmpPath = join3(tmpdir(), `vm0-screenshot-${randomUUID()}.jpg`);
4712
+ try {
4713
+ await execFileAsync("screencapture", ["-x", "-t", "jpg", tmpPath]);
4714
+ const buffer = await readFile(tmpPath);
4715
+ const info = await getScreenInfo();
4716
+ return {
4717
+ image: buffer.toString("base64"),
4718
+ width: info.width,
4719
+ height: info.height,
4720
+ scaleFactor: info.scaleFactor,
4721
+ format: "jpg"
4722
+ };
4723
+ } finally {
4724
+ await unlink(tmpPath).catch(() => {
4725
+ });
4726
+ }
4727
+ }
4728
+ async function captureRegionScreenshot(region) {
4729
+ const tmpPath = join3(tmpdir(), `vm0-zoom-${randomUUID()}.jpg`);
4730
+ try {
4731
+ const regionArg = `${region.x},${region.y},${region.width},${region.height}`;
4732
+ await execFileAsync("screencapture", [
4733
+ "-x",
4734
+ "-t",
4735
+ "jpg",
4736
+ "-R",
4737
+ regionArg,
4738
+ tmpPath
4739
+ ]);
4740
+ const buffer = await readFile(tmpPath);
4741
+ return {
4742
+ image: buffer.toString("base64"),
4743
+ width: region.width,
4744
+ height: region.height,
4745
+ scaleFactor: 1,
4746
+ format: "jpg"
4747
+ };
4748
+ } finally {
4749
+ await unlink(tmpPath).catch(() => {
4750
+ });
4751
+ }
4752
+ }
4753
+ async function getScreenInfo() {
4754
+ const { stdout } = await execFileAsync("system_profiler", [
4755
+ "SPDisplaysDataType",
4756
+ "-json"
4757
+ ]);
4758
+ const data = JSON.parse(stdout);
4759
+ const displays = data.SPDisplaysDataType ?? [];
4760
+ for (const gpu of displays) {
4761
+ const screens = gpu.spdisplays_ndrvs ?? [];
4762
+ for (const screen of screens) {
4763
+ const pixelStr = screen._spdisplays_pixels;
4764
+ if (pixelStr) {
4765
+ const match = pixelStr.match(/(\d+)\s*x\s*(\d+)/);
4766
+ if (match?.[1] && match[2]) {
4767
+ const width = parseInt(match[1], 10);
4768
+ const height = parseInt(match[2], 10);
4769
+ const resStr = screen._spdisplays_resolution ?? screen.spdisplays_resolution ?? "";
4770
+ const isRetina = /retina/i.test(resStr);
4771
+ const scaleFactor = isRetina ? 2 : 1;
4772
+ return { width, height, scaleFactor };
4773
+ }
4774
+ }
4775
+ }
4776
+ }
4777
+ return { width: 1920, height: 1080, scaleFactor: 1 };
4778
+ }
4779
+
4780
+ // src/lib/computer-use/cliclick.ts
4781
+ import { execFile as execFile2 } from "child_process";
4782
+ import { promisify as promisify2 } from "util";
4783
+ import { setTimeout as sleep } from "timers/promises";
4784
+ var execFileAsync2 = promisify2(execFile2);
4785
+ async function leftClickDrag(startX, startY, endX, endY) {
4786
+ await execFileAsync2("cliclick", [
4787
+ `dd:${startX},${startY}`,
4788
+ `du:${endX},${endY}`
4789
+ ]);
4790
+ }
4791
+ async function leftMouseDown(x, y) {
4792
+ await execFileAsync2("cliclick", [`dd:${x},${y}`]);
4793
+ }
4794
+ async function leftMouseUp(x, y) {
4795
+ await execFileAsync2("cliclick", [`du:${x},${y}`]);
4796
+ }
4797
+ var ACTION_COMMANDS = {
4798
+ left_click: "c",
4799
+ right_click: "rc",
4800
+ middle_click: "mc",
4801
+ double_click: "dc",
4802
+ triple_click: "tc",
4803
+ move: "m"
4804
+ };
4805
+ var VALID_ACTIONS = new Set(Object.keys(ACTION_COMMANDS));
4806
+ async function checkCliclickInstalled() {
4807
+ try {
4808
+ await execFileAsync2("which", ["cliclick"]);
4809
+ } catch {
4810
+ throw new Error("cliclick not found. Install with: brew install cliclick");
4811
+ }
4812
+ }
4813
+ async function executeMouseAction(action, x, y) {
4814
+ await checkCliclickInstalled();
4815
+ const prefix = ACTION_COMMANDS[action];
4816
+ await execFileAsync2("cliclick", [`${prefix}:${x},${y}`]);
4817
+ }
4818
+ async function getCursorPosition() {
4819
+ await checkCliclickInstalled();
4820
+ const { stdout } = await execFileAsync2("cliclick", ["p"]);
4821
+ const parts = stdout.trim().split(",");
4822
+ const xStr = parts[0];
4823
+ const yStr = parts[1];
4824
+ if (parts.length !== 2 || xStr === void 0 || yStr === void 0) {
4825
+ throw new Error(`Unexpected cliclick output: ${stdout.trim()}`);
4826
+ }
4827
+ const x = parseInt(xStr, 10);
4828
+ const y = parseInt(yStr, 10);
4829
+ if (Number.isNaN(x) || Number.isNaN(y)) {
4830
+ throw new Error(`Failed to parse cursor position: ${stdout.trim()}`);
4831
+ }
4832
+ return { x, y };
4833
+ }
4834
+ var VALID_SPECIAL_KEYS = /* @__PURE__ */ new Set([
4835
+ "cmd",
4836
+ "ctrl",
4837
+ "alt",
4838
+ "shift",
4839
+ "fn",
4840
+ "arrow-up",
4841
+ "arrow-down",
4842
+ "arrow-left",
4843
+ "arrow-right",
4844
+ "tab",
4845
+ "esc",
4846
+ "space",
4847
+ "delete",
4848
+ "return",
4849
+ "enter",
4850
+ "home",
4851
+ "end",
4852
+ "page-up",
4853
+ "page-down",
4854
+ ...Array.from({ length: 19 }, (_, i) => {
4855
+ return `f${i + 1}`;
4856
+ })
4857
+ ]);
4858
+ function isValidKeyName(key) {
4859
+ return VALID_SPECIAL_KEYS.has(key) || key.length === 1;
4860
+ }
4861
+ function parseKeyCombo(keys) {
4862
+ const parts = keys.split("+");
4863
+ if (parts.length === 0 || parts.some((p) => {
4864
+ return p === "";
4865
+ })) {
4866
+ throw new Error(`Invalid key combo: "${keys}"`);
4867
+ }
4868
+ const mainKey = parts[parts.length - 1];
4869
+ const modifiers = parts.slice(0, -1);
4870
+ for (const key of parts) {
4871
+ if (!isValidKeyName(key)) {
4872
+ throw new Error(
4873
+ `Unknown key: "${key}". Valid keys: single characters, or special keys like cmd, ctrl, alt, shift, tab, esc, return, arrow-up, f1-f19, etc.`
4874
+ );
4875
+ }
4876
+ }
4877
+ return { modifiers, mainKey };
4878
+ }
4879
+ async function pressKey(keys) {
4880
+ const { modifiers, mainKey } = parseKeyCombo(keys);
4881
+ if (modifiers.length === 0) {
4882
+ await execFileAsync2("cliclick", [`kp:${mainKey}`]);
4883
+ return;
4884
+ }
4885
+ const args = [];
4886
+ for (const mod of modifiers) {
4887
+ args.push(`kd:${mod}`);
4888
+ }
4889
+ args.push(`kp:${mainKey}`);
4890
+ for (let i = modifiers.length - 1; i >= 0; i--) {
4891
+ args.push(`ku:${modifiers[i]}`);
4892
+ }
4893
+ await execFileAsync2("cliclick", args);
4894
+ }
4895
+ async function holdKey(keys, durationMs) {
4896
+ const { modifiers, mainKey } = parseKeyCombo(keys);
4897
+ const allKeys = [...modifiers, mainKey];
4898
+ const downArgs = allKeys.map((k) => {
4899
+ return `kd:${k}`;
4900
+ });
4901
+ const upArgs = [...allKeys].reverse().map((k) => {
4902
+ return `ku:${k}`;
4903
+ });
4904
+ await execFileAsync2("cliclick", downArgs);
4905
+ await sleep(durationMs);
4906
+ await execFileAsync2("cliclick", upArgs);
4907
+ }
4908
+ async function typeText(text) {
4909
+ await execFileAsync2("cliclick", [`t:${text}`]);
4910
+ }
4911
+
4912
+ // src/lib/computer-use/scroll.ts
4913
+ import { execFile as execFile3 } from "child_process";
4914
+ import { promisify as promisify3 } from "util";
4915
+ var execFileAsync3 = promisify3(execFile3);
4916
+ var DEFAULT_SCROLL_AMOUNT = 3;
4917
+ async function scroll(x, y, direction, amount = DEFAULT_SCROLL_AMOUNT) {
4918
+ await execFileAsync3("cliclick", [`m:${x},${y}`]);
4919
+ let dy = 0;
4920
+ let dx = 0;
4921
+ switch (direction) {
4922
+ case "up":
4923
+ dy = amount;
4924
+ break;
4925
+ case "down":
4926
+ dy = -amount;
4927
+ break;
4928
+ case "left":
4929
+ dx = amount;
4930
+ break;
4931
+ case "right":
4932
+ dx = -amount;
4933
+ break;
4934
+ }
4935
+ const script = [
4936
+ "ObjC.import('CoreGraphics');",
4937
+ `var e = $.CGEventCreateScrollWheelEvent(null, 0, 2, ${dy}, ${dx});`,
4938
+ "$.CGEventPost($.kCGHIDEventTap, e);"
4939
+ ].join(" ");
4940
+ await execFileAsync3("osascript", ["-l", "JavaScript", "-e", script]);
4941
+ }
4942
+
4943
+ // src/lib/computer-use/clipboard.ts
4944
+ import { execFile as execFile4, spawn as spawn2 } from "child_process";
4945
+ import { promisify as promisify4 } from "util";
4946
+ var execFileAsync4 = promisify4(execFile4);
4947
+ async function readClipboard() {
4948
+ const { stdout } = await execFileAsync4("pbpaste");
4949
+ return stdout;
4950
+ }
4951
+ async function writeClipboard(text) {
4952
+ return new Promise((resolve, reject) => {
4953
+ const proc = spawn2("pbcopy", { stdio: ["pipe", "ignore", "ignore"] });
4954
+ proc.on("error", reject);
4955
+ proc.on("close", (code) => {
4956
+ if (code === 0) {
4957
+ resolve();
4958
+ } else {
4959
+ reject(new Error(`pbcopy exited with code ${code}`));
4960
+ }
4961
+ });
4962
+ proc.stdin.end(text);
4963
+ });
4964
+ }
4965
+
4966
+ // src/lib/computer-use/application.ts
4967
+ import { execFile as execFile5 } from "child_process";
4968
+ import { promisify as promisify5 } from "util";
4969
+ var execFileAsync5 = promisify5(execFile5);
4970
+ async function openApplication(nameOrBundleId) {
4971
+ const isBundleId = nameOrBundleId.includes(".");
4972
+ const flag = isBundleId ? "-b" : "-a";
4973
+ await execFileAsync5("open", [flag, nameOrBundleId]);
4974
+ }
4975
+
4976
+ // src/lib/computer-use/desktop-server.ts
4977
+ function readBody(req) {
4978
+ return new Promise((resolve, reject) => {
4979
+ const chunks = [];
4980
+ req.on("data", (chunk) => {
4981
+ chunks.push(chunk);
4982
+ });
4983
+ req.on("end", () => {
4984
+ resolve(Buffer.concat(chunks).toString());
4985
+ });
4986
+ req.on("error", reject);
4987
+ });
4988
+ }
4989
+ async function handleZoom(searchParams, res) {
4990
+ const x = Number(searchParams.get("x"));
4991
+ const y = Number(searchParams.get("y"));
4992
+ const width = Number(searchParams.get("width"));
4993
+ const height = Number(searchParams.get("height"));
4994
+ if ([x, y, width, height].some((v) => {
4995
+ return !Number.isFinite(v);
4996
+ })) {
4997
+ res.writeHead(400, { "Content-Type": "text/plain" });
4998
+ res.end(
4999
+ "Missing or invalid query parameters: x, y, width, height are required numbers"
5000
+ );
5001
+ return;
5002
+ }
5003
+ if (width <= 0 || height <= 0) {
5004
+ res.writeHead(400, { "Content-Type": "text/plain" });
5005
+ res.end("width and height must be positive");
5006
+ return;
5007
+ }
5008
+ if (x < 0 || y < 0) {
5009
+ res.writeHead(400, { "Content-Type": "text/plain" });
5010
+ res.end("x and y must be non-negative");
5011
+ return;
5012
+ }
5013
+ const info = await getScreenInfo();
5014
+ if (x + width > info.width || y + height > info.height) {
5015
+ res.writeHead(400, { "Content-Type": "text/plain" });
5016
+ res.end(`Region exceeds screen bounds (${info.width}x${info.height})`);
5017
+ return;
5018
+ }
5019
+ const result = await captureRegionScreenshot({ x, y, width, height });
5020
+ res.writeHead(200, { "Content-Type": "application/json" });
5021
+ res.end(JSON.stringify(result));
5022
+ }
5023
+ function parseJsonBody(req) {
5024
+ return new Promise((resolve, reject) => {
5025
+ const chunks = [];
5026
+ let size = 0;
5027
+ req.on("data", (chunk) => {
5028
+ size += chunk.length;
5029
+ if (size > 1024) {
5030
+ reject(new Error("Request body too large"));
5031
+ req.destroy();
5032
+ return;
5033
+ }
5034
+ chunks.push(chunk);
5035
+ });
5036
+ req.on("end", () => {
5037
+ try {
5038
+ resolve(JSON.parse(Buffer.concat(chunks).toString()));
5039
+ } catch {
5040
+ reject(new Error("Invalid JSON body"));
5041
+ }
5042
+ });
5043
+ req.on("error", reject);
5044
+ });
5045
+ }
5046
+ async function handleMouseRequest(req, res) {
5047
+ const body = await parseJsonBody(req);
5048
+ if (typeof body !== "object" || body === null || !("action" in body)) {
5049
+ res.writeHead(400, { "Content-Type": "text/plain" });
5050
+ res.end("Missing required fields: action, x, y");
5051
+ return;
5052
+ }
5053
+ const { action } = body;
5054
+ if (typeof action !== "string") {
5055
+ res.writeHead(400, { "Content-Type": "text/plain" });
5056
+ res.end("Invalid action");
5057
+ return;
5058
+ }
5059
+ if (VALID_ACTIONS.has(action)) {
5060
+ if (!("x" in body) || !("y" in body)) {
5061
+ res.writeHead(400, { "Content-Type": "text/plain" });
5062
+ res.end("Missing required fields: action, x, y");
5063
+ return;
5064
+ }
5065
+ const { x, y } = body;
5066
+ if (typeof x !== "number" || typeof y !== "number" || !Number.isFinite(x) || !Number.isFinite(y)) {
5067
+ res.writeHead(400, { "Content-Type": "text/plain" });
5068
+ res.end("Coordinates x and y must be finite numbers");
5069
+ return;
5070
+ }
5071
+ const info = await getScreenInfo();
5072
+ const maxX = Math.floor(info.width / info.scaleFactor);
5073
+ const maxY = Math.floor(info.height / info.scaleFactor);
5074
+ if (x < 0 || x >= maxX || y < 0 || y >= maxY) {
5075
+ res.writeHead(400, { "Content-Type": "text/plain" });
5076
+ res.end(
5077
+ `Coordinates out of bounds. Screen size: ${maxX}x${maxY} (points)`
5078
+ );
5079
+ return;
5080
+ }
5081
+ await executeMouseAction(action, x, y);
5082
+ res.writeHead(200, { "Content-Type": "application/json" });
5083
+ res.end(JSON.stringify({ ok: true }));
5084
+ return;
5085
+ }
5086
+ const typedBody = body;
5087
+ switch (typedBody.action) {
5088
+ case "left_click_drag":
5089
+ await leftClickDrag(
5090
+ typedBody.startX,
5091
+ typedBody.startY,
5092
+ typedBody.endX,
5093
+ typedBody.endY
5094
+ );
5095
+ break;
5096
+ case "left_mouse_down":
5097
+ await leftMouseDown(typedBody.x, typedBody.y);
5098
+ break;
5099
+ case "left_mouse_up":
5100
+ await leftMouseUp(typedBody.x, typedBody.y);
5101
+ break;
5102
+ case "scroll":
5103
+ await scroll(
5104
+ typedBody.x,
5105
+ typedBody.y,
5106
+ typedBody.direction,
5107
+ typedBody.amount
5108
+ );
5109
+ break;
5110
+ default:
5111
+ res.writeHead(400, { "Content-Type": "text/plain" });
5112
+ res.end(
5113
+ `Unknown mouse action: ${body.action}`
5114
+ );
5115
+ return;
5116
+ }
5117
+ res.writeHead(200, { "Content-Type": "application/json" });
5118
+ res.end(JSON.stringify({ ok: true }));
5119
+ }
5120
+ async function handleKeyboard(req, res) {
5121
+ const raw = await readBody(req);
5122
+ const body = JSON.parse(raw);
5123
+ switch (body.action) {
5124
+ case "key":
5125
+ await pressKey(body.keys);
5126
+ break;
5127
+ case "hold_key":
5128
+ if (typeof body.durationMs !== "number" || !Number.isFinite(body.durationMs) || body.durationMs <= 0) {
5129
+ res.writeHead(400, { "Content-Type": "text/plain" });
5130
+ res.end("durationMs must be a positive number");
5131
+ return;
5132
+ }
5133
+ await holdKey(body.keys, body.durationMs);
5134
+ break;
5135
+ case "type":
5136
+ if (typeof body.text !== "string" || body.text.length === 0) {
5137
+ res.writeHead(400, { "Content-Type": "text/plain" });
5138
+ res.end("text must be a non-empty string");
5139
+ return;
5140
+ }
5141
+ await typeText(body.text);
5142
+ break;
5143
+ default:
5144
+ res.writeHead(400, { "Content-Type": "text/plain" });
5145
+ res.end(
5146
+ `Unknown keyboard action: ${body.action}`
5147
+ );
5148
+ return;
5149
+ }
5150
+ res.writeHead(200, { "Content-Type": "application/json" });
5151
+ res.end(JSON.stringify({ ok: true }));
5152
+ }
5153
+ async function handleClipboard(req, res) {
5154
+ if (req.method === "GET") {
5155
+ const text = await readClipboard();
5156
+ res.writeHead(200, { "Content-Type": "application/json" });
5157
+ res.end(JSON.stringify({ text }));
5158
+ } else if (req.method === "POST") {
5159
+ const raw = await readBody(req);
5160
+ const body = JSON.parse(raw);
5161
+ await writeClipboard(body.text);
5162
+ res.writeHead(200, { "Content-Type": "application/json" });
5163
+ res.end(JSON.stringify({ ok: true }));
5164
+ } else {
5165
+ res.writeHead(404, { "Content-Type": "text/plain" });
5166
+ res.end("Not found");
5167
+ }
5168
+ }
5169
+ async function handleOpenApplication(req, res) {
5170
+ const raw = await readBody(req);
5171
+ const body = JSON.parse(raw);
5172
+ if (typeof body.nameOrBundleId !== "string" || body.nameOrBundleId.length === 0) {
5173
+ res.writeHead(400, { "Content-Type": "text/plain" });
5174
+ res.end("nameOrBundleId must be a non-empty string");
5175
+ return;
5176
+ }
5177
+ await openApplication(body.nameOrBundleId);
5178
+ res.writeHead(200, { "Content-Type": "application/json" });
5179
+ res.end(JSON.stringify({ ok: true }));
5180
+ }
5181
+ async function getRandomPort2() {
5182
+ return new Promise((resolve, reject) => {
5183
+ const server = createNetServer();
5184
+ server.listen(0, "127.0.0.1", () => {
5185
+ const { port } = server.address();
5186
+ server.close(() => {
5187
+ resolve(port);
5188
+ });
5189
+ });
5190
+ server.on("error", reject);
5191
+ });
5192
+ }
5193
+ async function handleCursorPosition(res) {
5194
+ const position = await getCursorPosition();
5195
+ res.writeHead(200, { "Content-Type": "application/json" });
5196
+ res.end(JSON.stringify(position));
5197
+ }
5198
+ function routeKey(method, pathname) {
5199
+ return `${method} ${pathname}`;
5200
+ }
5201
+ async function handleRequest(token, req, res) {
5202
+ if (req.headers["x-vm0-token"] !== token) {
5203
+ res.writeHead(403, { "Content-Type": "text/plain" });
5204
+ res.end("Forbidden");
5205
+ return;
5206
+ }
5207
+ const url = new URL(req.url ?? "/", "http://localhost");
5208
+ const { pathname, searchParams } = url;
5209
+ const key = routeKey(req.method ?? "GET", pathname);
5210
+ try {
5211
+ switch (key) {
5212
+ case "GET /screenshot": {
5213
+ const result = await captureScreenshot();
5214
+ res.writeHead(200, { "Content-Type": "application/json" });
5215
+ res.end(JSON.stringify(result));
5216
+ break;
5217
+ }
5218
+ case "GET /info": {
5219
+ const info = await getScreenInfo();
5220
+ res.writeHead(200, { "Content-Type": "application/json" });
5221
+ res.end(JSON.stringify(info));
5222
+ break;
5223
+ }
5224
+ case "GET /zoom":
5225
+ await handleZoom(searchParams, res);
5226
+ break;
5227
+ case "POST /mouse":
5228
+ await handleMouseRequest(req, res);
5229
+ break;
5230
+ case "GET /clipboard":
5231
+ case "POST /clipboard":
5232
+ await handleClipboard(req, res);
5233
+ break;
5234
+ case "POST /keyboard":
5235
+ await handleKeyboard(req, res);
5236
+ break;
5237
+ case "POST /open-application":
5238
+ await handleOpenApplication(req, res);
5239
+ break;
5240
+ case "GET /cursor-position":
5241
+ await handleCursorPosition(res);
5242
+ break;
5243
+ default:
5244
+ res.writeHead(404, { "Content-Type": "text/plain" });
5245
+ res.end("Not found");
5246
+ }
5247
+ } catch (error) {
5248
+ const message = error instanceof Error ? error.message : "Internal server error";
5249
+ res.writeHead(500, { "Content-Type": "text/plain" });
5250
+ res.end(message);
5251
+ }
5252
+ }
5253
+ function startDesktopServer(token, port) {
5254
+ return new Promise((resolve, reject) => {
5255
+ const server = createServer2((req, res) => {
5256
+ handleRequest(token, req, res).catch(() => {
5257
+ if (!res.headersSent) {
5258
+ res.writeHead(500, { "Content-Type": "text/plain" });
5259
+ res.end("Internal server error");
5260
+ }
5261
+ });
5262
+ });
5263
+ server.on("error", reject);
5264
+ server.listen(port, "127.0.0.1", () => {
5265
+ resolve(server);
5266
+ });
5267
+ });
5268
+ }
5269
+
5270
+ // src/lib/computer-use/ngrok.ts
5271
+ async function loadNgrok2() {
5272
+ try {
5273
+ const mod = await import("@ngrok/ngrok");
5274
+ return mod.default;
5275
+ } catch (cause) {
5276
+ throw new Error(
5277
+ "Failed to load ngrok tunnel module. This may be caused by a system library (GLIBC) incompatibility. See: https://github.com/vm0-ai/vm0/issues/6825",
5278
+ { cause }
5279
+ );
5280
+ }
5281
+ }
5282
+ async function startDesktopTunnel(ngrokToken, endpointPrefix, port) {
5283
+ const ngrok = await loadNgrok2();
5284
+ await ngrok.forward({
5285
+ addr: `localhost:${port}`,
5286
+ authtoken: ngrokToken,
5287
+ domain: `desktop.${endpointPrefix}.internal`
5288
+ });
5289
+ }
5290
+ async function stopDesktopTunnel() {
5291
+ const ngrok = await loadNgrok2();
5292
+ await ngrok.kill();
5293
+ }
5294
+
5295
+ // src/commands/zero/computer-use/host.ts
5296
+ var hostStartCommand = new Command74().name("start").description("Start the computer-use host daemon (macOS only)").action(
5297
+ withErrorHandler(async () => {
5298
+ if (process.platform !== "darwin") {
5299
+ throw new Error(
5300
+ "Computer-use host requires macOS\n\nThe host daemon uses macOS-specific commands (screencapture, system_profiler)."
5301
+ );
5302
+ }
5303
+ console.log(chalk57.cyan("Registering computer-use host..."));
5304
+ const credentials = await registerComputerUseHost();
5305
+ const port = await getRandomPort2();
5306
+ const server = await startDesktopServer(credentials.token, port);
5307
+ try {
5308
+ await startDesktopTunnel(
5309
+ credentials.ngrokToken,
5310
+ credentials.endpointPrefix,
5311
+ port
5312
+ );
5313
+ console.log();
5314
+ console.log(chalk57.green("\u2713 Computer-use host active"));
5315
+ console.log(` Desktop: desktop.${credentials.domain}`);
5316
+ console.log();
5317
+ console.log(chalk57.dim("Press ^C twice to disconnect"));
5318
+ console.log();
5319
+ let sigintCount = 0;
5320
+ await new Promise((resolve) => {
5321
+ const keepAlive = setInterval(() => {
5322
+ }, 6e4);
5323
+ const done = () => {
5324
+ clearInterval(keepAlive);
5325
+ process.removeListener("SIGINT", onSigint);
5326
+ resolve();
5327
+ };
5328
+ const onSigint = () => {
5329
+ sigintCount++;
5330
+ if (sigintCount === 1) {
5331
+ console.log(
5332
+ chalk57.dim("\nPress ^C again to disconnect and exit...")
5333
+ );
5334
+ } else {
5335
+ done();
5336
+ }
5337
+ };
5338
+ process.on("SIGINT", onSigint);
5339
+ process.once("SIGTERM", done);
5340
+ });
5341
+ } finally {
5342
+ console.log();
5343
+ console.log(chalk57.cyan("Stopping computer-use host..."));
5344
+ server.close();
5345
+ await stopDesktopTunnel();
5346
+ await unregisterComputerUseHost().catch(() => {
5347
+ });
5348
+ console.log(chalk57.green("\u2713 Host stopped"));
5349
+ }
5350
+ })
5351
+ );
5352
+
5353
+ // src/commands/zero/computer-use/client.ts
5354
+ import { Command as Command75 } from "commander";
5355
+ import { writeFile, mkdir } from "fs/promises";
5356
+ import { join as join4 } from "path";
5357
+
5358
+ // src/lib/computer-use/client.ts
5359
+ var cachedHost = null;
5360
+ var CACHE_TTL_MS = 3e4;
5361
+ async function discoverHost() {
5362
+ if (cachedHost && Date.now() - cachedHost.cachedAt < CACHE_TTL_MS) {
5363
+ return { domain: cachedHost.domain, token: cachedHost.token };
5364
+ }
5365
+ const host = await getComputerUseHost();
5366
+ if (!host) {
5367
+ throw new Error(
5368
+ "No active computer-use host found\n\nStart a host with: zero computer-use host start"
5369
+ );
5370
+ }
5371
+ cachedHost = { ...host, cachedAt: Date.now() };
5372
+ return host;
5373
+ }
5374
+ async function callHost(path, options) {
5375
+ const { domain, token } = await discoverHost();
5376
+ const url = `https://desktop.${domain}${path}`;
5377
+ const headers = { "x-vm0-token": token };
5378
+ const init = { headers };
5379
+ if (options?.method) {
5380
+ init.method = options.method;
5381
+ }
5382
+ if (options?.body !== void 0) {
5383
+ headers["content-type"] = "application/json";
5384
+ init.body = JSON.stringify(options.body);
5385
+ }
5386
+ const response = await fetch(url, init);
5387
+ if (!response.ok) {
5388
+ const body = await response.text().catch(() => {
5389
+ return "";
5390
+ });
5391
+ throw new Error(
5392
+ `Host returned ${response.status}: ${body || response.statusText}`
5393
+ );
5394
+ }
5395
+ return response;
5396
+ }
5397
+
5398
+ // src/commands/zero/computer-use/client.ts
5399
+ function mouseClickCommand(name, action, description) {
5400
+ return new Command75().name(name).description(description).argument("<x>", "X coordinate (points)").argument("<y>", "Y coordinate (points)").action(
5401
+ withErrorHandler(async (xStr, yStr) => {
5402
+ const x = parseInt(xStr, 10);
5403
+ const y = parseInt(yStr, 10);
5404
+ if (Number.isNaN(x) || Number.isNaN(y)) {
5405
+ throw new Error("Coordinates must be integers");
5406
+ }
5407
+ const response = await callHost("/mouse", {
5408
+ method: "POST",
5409
+ body: { action, x, y }
5410
+ });
5411
+ const data = await response.json();
5412
+ process.stdout.write(JSON.stringify(data) + "\n");
5413
+ })
5414
+ );
5415
+ }
5416
+ var clientScreenshotCommand = new Command75().name("screenshot").description("Capture a screenshot from the remote host").action(
5417
+ withErrorHandler(async () => {
5418
+ const response = await callHost("/screenshot");
5419
+ const data = await response.json();
5420
+ const dir = "/tmp/computer-use";
5421
+ await mkdir(dir, { recursive: true });
5422
+ const timestamp = Date.now();
5423
+ const filePath = join4(dir, `screenshot-${timestamp}.${data.format}`);
5424
+ const buffer = Buffer.from(data.image, "base64");
5425
+ await writeFile(filePath, buffer);
5426
+ process.stdout.write(`${filePath}
5427
+ `);
5428
+ process.stderr.write(
5429
+ JSON.stringify({
5430
+ width: data.width,
5431
+ height: data.height,
5432
+ scaleFactor: data.scaleFactor
5433
+ }) + "\n"
5434
+ );
5435
+ })
5436
+ );
5437
+ var clientZoomCommand = new Command75().name("zoom").description("Capture a region screenshot from the remote host").requiredOption("--x <number>", "X coordinate of the region").requiredOption("--y <number>", "Y coordinate of the region").requiredOption("--width <number>", "Width of the region").requiredOption("--height <number>", "Height of the region").action(
5438
+ withErrorHandler(
5439
+ async (opts) => {
5440
+ const params = new URLSearchParams({
5441
+ x: opts.x,
5442
+ y: opts.y,
5443
+ width: opts.width,
5444
+ height: opts.height
5445
+ });
5446
+ const response = await callHost(`/zoom?${params.toString()}`);
5447
+ const data = await response.json();
5448
+ const dir = "/tmp/computer-use";
5449
+ await mkdir(dir, { recursive: true });
5450
+ const timestamp = Date.now();
5451
+ const filePath = join4(dir, `zoom-${timestamp}.${data.format}`);
5452
+ const buffer = Buffer.from(data.image, "base64");
5453
+ await writeFile(filePath, buffer);
5454
+ process.stdout.write(`${filePath}
5455
+ `);
5456
+ process.stderr.write(
5457
+ JSON.stringify({
5458
+ width: data.width,
5459
+ height: data.height,
5460
+ scaleFactor: data.scaleFactor
5461
+ }) + "\n"
5462
+ );
5463
+ }
5464
+ )
5465
+ );
5466
+ var clientInfoCommand = new Command75().name("info").description("Get screen info from the remote host").action(
5467
+ withErrorHandler(async () => {
5468
+ const response = await callHost("/info");
5469
+ const data = await response.json();
5470
+ process.stdout.write(JSON.stringify(data) + "\n");
5471
+ })
5472
+ );
5473
+ var clientLeftClickCommand = mouseClickCommand(
5474
+ "left-click",
5475
+ "left_click",
5476
+ "Perform a left click at coordinates"
5477
+ );
5478
+ var clientRightClickCommand = mouseClickCommand(
5479
+ "right-click",
5480
+ "right_click",
5481
+ "Perform a right click at coordinates"
5482
+ );
5483
+ var clientMiddleClickCommand = mouseClickCommand(
5484
+ "middle-click",
5485
+ "middle_click",
5486
+ "Perform a middle click at coordinates"
5487
+ );
5488
+ var clientDoubleClickCommand = mouseClickCommand(
5489
+ "double-click",
5490
+ "double_click",
5491
+ "Perform a double click at coordinates"
5492
+ );
5493
+ var clientTripleClickCommand = mouseClickCommand(
5494
+ "triple-click",
5495
+ "triple_click",
5496
+ "Perform a triple click at coordinates"
5497
+ );
5498
+ var clientLeftClickDragCommand = new Command75().name("left-click-drag").description("Drag from (startX, startY) to (endX, endY)").argument("<startX>", "Start X coordinate").argument("<startY>", "Start Y coordinate").argument("<endX>", "End X coordinate").argument("<endY>", "End Y coordinate").action(
5499
+ withErrorHandler(
5500
+ async (startX, startY, endX, endY) => {
5501
+ await callHost("/mouse", {
5502
+ method: "POST",
5503
+ body: {
5504
+ action: "left_click_drag",
5505
+ startX: Number(startX),
5506
+ startY: Number(startY),
5507
+ endX: Number(endX),
5508
+ endY: Number(endY)
5509
+ }
5510
+ });
5511
+ process.stdout.write("ok\n");
5512
+ }
5513
+ )
5514
+ );
5515
+ var clientLeftMouseDownCommand = new Command75().name("left-mouse-down").description("Press and hold the left mouse button at (x, y)").argument("<x>", "X coordinate").argument("<y>", "Y coordinate").action(
5516
+ withErrorHandler(async (x, y) => {
5517
+ await callHost("/mouse", {
5518
+ method: "POST",
5519
+ body: { action: "left_mouse_down", x: Number(x), y: Number(y) }
5520
+ });
5521
+ process.stdout.write("ok\n");
5522
+ })
5523
+ );
5524
+ var clientLeftMouseUpCommand = new Command75().name("left-mouse-up").description("Release the left mouse button at (x, y)").argument("<x>", "X coordinate").argument("<y>", "Y coordinate").action(
5525
+ withErrorHandler(async (x, y) => {
5526
+ await callHost("/mouse", {
5527
+ method: "POST",
5528
+ body: { action: "left_mouse_up", x: Number(x), y: Number(y) }
5529
+ });
5530
+ process.stdout.write("ok\n");
5531
+ })
5532
+ );
5533
+ var clientScrollCommand = new Command75().name("scroll").description("Scroll at the given screen position").argument("<x>", "X coordinate").argument("<y>", "Y coordinate").argument("<direction>", "Scroll direction: up, down, left, right").argument("[amount]", "Scroll amount in lines (default 3)").action(
5534
+ withErrorHandler(
5535
+ async (x, y, direction, amount) => {
5536
+ await callHost("/mouse", {
5537
+ method: "POST",
5538
+ body: {
5539
+ action: "scroll",
5540
+ x: Number(x),
5541
+ y: Number(y),
5542
+ direction,
5543
+ ...amount !== void 0 && { amount: Number(amount) }
5544
+ }
5545
+ });
5546
+ process.stdout.write("ok\n");
5547
+ }
5548
+ )
5549
+ );
5550
+ var clientReadClipboardCommand = new Command75().name("read-clipboard").description("Read text content from the remote clipboard").action(
5551
+ withErrorHandler(async () => {
5552
+ const response = await callHost("/clipboard");
5553
+ const data = await response.json();
5554
+ process.stdout.write(data.text);
5555
+ })
5556
+ );
5557
+ var clientWriteClipboardCommand = new Command75().name("write-clipboard").description("Write text content to the remote clipboard").argument("<text>", "Text to write to clipboard").action(
5558
+ withErrorHandler(async (text) => {
5559
+ await callHost("/clipboard", {
5560
+ method: "POST",
5561
+ body: { text }
5562
+ });
5563
+ process.stdout.write("ok\n");
5564
+ })
5565
+ );
5566
+ var clientKeyCommand = new Command75().name("key").description("Press a key or key combination (e.g., cmd+c, return)").argument("<combo>", "Key combo string (e.g., cmd+c, ctrl+shift+s, return)").action(
5567
+ withErrorHandler(async (combo) => {
5568
+ await callHost("/keyboard", {
5569
+ method: "POST",
5570
+ body: { action: "key", keys: combo }
5571
+ });
5572
+ process.stdout.write("ok\n");
5573
+ })
5574
+ );
5575
+ var clientHoldKeyCommand = new Command75().name("hold-key").description("Hold a key or key combination for a duration").argument("<combo>", "Key combo string (e.g., shift, cmd+shift)").argument("<durationMs>", "Duration to hold in milliseconds").action(
5576
+ withErrorHandler(async (combo, durationStr) => {
5577
+ const durationMs = parseInt(durationStr, 10);
5578
+ if (Number.isNaN(durationMs) || durationMs <= 0) {
5579
+ throw new Error("durationMs must be a positive integer");
5580
+ }
5581
+ await callHost("/keyboard", {
5582
+ method: "POST",
5583
+ body: { action: "hold_key", keys: combo, durationMs }
5584
+ });
5585
+ process.stdout.write("ok\n");
5586
+ })
5587
+ );
5588
+ var clientTypeCommand = new Command75().name("type").description("Type text at the current cursor position").argument("<text>", "Text to type").action(
5589
+ withErrorHandler(async (text) => {
5590
+ await callHost("/keyboard", {
5591
+ method: "POST",
5592
+ body: { action: "type", text }
5593
+ });
5594
+ process.stdout.write("ok\n");
5595
+ })
5596
+ );
5597
+ var clientOpenAppCommand = new Command75().name("open-app").description("Open or activate a macOS application by name or bundle ID").argument(
5598
+ "<nameOrBundleId>",
5599
+ "App name (e.g., Safari) or bundle ID (e.g., com.apple.Safari)"
5600
+ ).action(
5601
+ withErrorHandler(async (nameOrBundleId) => {
5602
+ await callHost("/open-application", {
5603
+ method: "POST",
5604
+ body: { nameOrBundleId }
5605
+ });
5606
+ process.stdout.write("ok\n");
5607
+ })
5608
+ );
5609
+ var clientMouseMoveCommand = mouseClickCommand(
5610
+ "mouse-move",
5611
+ "move",
5612
+ "Move mouse pointer to coordinates"
5613
+ );
5614
+ var clientCursorPositionCommand = new Command75().name("cursor-position").description("Get current cursor position from the remote host").action(
5615
+ withErrorHandler(async () => {
5616
+ const response = await callHost("/cursor-position");
5617
+ const data = await response.json();
5618
+ process.stdout.write(JSON.stringify(data) + "\n");
5619
+ })
5620
+ );
5621
+
5622
+ // src/commands/zero/computer-use/index.ts
5623
+ var hostCommand = new Command76().name("host").description("Manage computer-use host daemon").addCommand(hostStartCommand);
5624
+ var clientCommand = new Command76().name("client").description("Interact with remote computer-use host").addCommand(clientScreenshotCommand).addCommand(clientZoomCommand).addCommand(clientInfoCommand).addCommand(clientLeftClickCommand).addCommand(clientRightClickCommand).addCommand(clientMiddleClickCommand).addCommand(clientDoubleClickCommand).addCommand(clientTripleClickCommand).addCommand(clientLeftClickDragCommand).addCommand(clientLeftMouseDownCommand).addCommand(clientLeftMouseUpCommand).addCommand(clientScrollCommand).addCommand(clientReadClipboardCommand).addCommand(clientWriteClipboardCommand).addCommand(clientKeyCommand).addCommand(clientHoldKeyCommand).addCommand(clientTypeCommand).addCommand(clientOpenAppCommand).addCommand(clientMouseMoveCommand).addCommand(clientCursorPositionCommand);
5625
+ var zeroComputerUseCommand = new Command76().name("computer-use").description("Remote desktop control for cloud agents").addCommand(hostCommand).addCommand(clientCommand).addHelpText(
5626
+ "after",
5627
+ `
5628
+ Examples:
5629
+ Start the host daemon (on macOS): zero computer-use host start
5630
+ Take a screenshot (from agent): zero computer-use client screenshot
5631
+ Zoom into a region (from agent): zero computer-use client zoom --x 0 --y 0 --width 500 --height 500
5632
+ Get screen info (from agent): zero computer-use client info
5633
+ Left click at (500, 300): zero computer-use client left-click 500 300
5634
+ Double click at (100, 200): zero computer-use client double-click 100 200
5635
+ Drag from A to B: zero computer-use client left-click-drag 100 100 500 500
5636
+ Press mouse button: zero computer-use client left-mouse-down 200 300
5637
+ Release mouse button: zero computer-use client left-mouse-up 500 500
5638
+ Scroll down at position: zero computer-use client scroll 500 300 down 5
5639
+ Read clipboard text: zero computer-use client read-clipboard
5640
+ Write clipboard text: zero computer-use client write-clipboard "hello"
5641
+ Press key combo: zero computer-use client key "cmd+c"
5642
+ Hold shift for 2 seconds: zero computer-use client hold-key "shift" 2000
5643
+ Type text: zero computer-use client type "Hello, world!"
5644
+ Open an application: zero computer-use client open-app Safari
5645
+ Open by bundle ID: zero computer-use client open-app "com.apple.Safari"
5646
+ Move mouse to (100, 200): zero computer-use client mouse-move 100 200
5647
+ Get cursor position: zero computer-use client cursor-position`
5648
+ );
5649
+
4686
5650
  // src/zero.ts
4687
5651
  var COMMAND_CAPABILITY_MAP = {
4688
5652
  agent: "agent:read",
@@ -4695,7 +5659,8 @@ var COMMAND_CAPABILITY_MAP = {
4695
5659
  slack: "slack:write",
4696
5660
  whoami: null,
4697
5661
  "ask-user": null,
4698
- "developer-support": null
5662
+ "developer-support": null,
5663
+ "computer-use": "computer-use:write"
4699
5664
  };
4700
5665
  var DEFAULT_COMMANDS = [
4701
5666
  zeroOrgCommand,
@@ -4712,7 +5677,8 @@ var DEFAULT_COMMANDS = [
4712
5677
  zeroWhoamiCommand,
4713
5678
  zeroAskUserCommand,
4714
5679
  zeroSkillCommand,
4715
- zeroDeveloperSupportCommand
5680
+ zeroDeveloperSupportCommand,
5681
+ zeroComputerUseCommand
4716
5682
  ];
4717
5683
  function shouldHideCommand(name, payload) {
4718
5684
  if (!payload) return false;
@@ -4728,10 +5694,10 @@ function registerZeroCommands(prog, commands) {
4728
5694
  prog.addCommand(cmd, hidden ? { hidden: true } : {});
4729
5695
  }
4730
5696
  }
4731
- var program = new Command74();
5697
+ var program = new Command77();
4732
5698
  program.name("zero").description(
4733
5699
  "Zero CLI \u2014 interact with the zero platform from inside the sandbox"
4734
- ).version("9.100.2").addHelpText(
5700
+ ).version("9.101.0").addHelpText(
4735
5701
  "after",
4736
5702
  `
4737
5703
  Examples: