replicas-cli 0.2.321 → 0.2.322

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.mjs +712 -39
  2. package/package.json +1 -1
package/dist/index.mjs CHANGED
@@ -7985,7 +7985,7 @@ The CLI is the canonical surface. Chat transcripts that show \`DISPLAY=:99 xdoto
7985
7985
  \`\`\`bash
7986
7986
  # DO
7987
7987
  replicas computer key Return
7988
- replicas computer screenshot /tmp/state.png
7988
+ replicas computer observe /tmp/state.png
7989
7989
 
7990
7990
  # DON'T
7991
7991
  DISPLAY=:99 xdotool key Return
@@ -8003,9 +8003,9 @@ replicas computer info
8003
8003
  # 2) Launch a browser on the workspace display.
8004
8004
  replicas computer launch chrome https://news.ycombinator.com
8005
8005
 
8006
- # 3) Take a screenshot so you can see what's there.
8007
- replicas computer screenshot /tmp/state.png
8008
- # (Read the PNG yourself before deciding where to click.)
8006
+ # 3) Observe the settled screen and browser tab state before clicking.
8007
+ replicas computer observe /tmp/state.png
8008
+ replicas computer browser --snapshot
8009
8009
 
8010
8010
  # 4) Drive the UI.
8011
8011
  replicas computer click 521 700 # click coordinates from the screenshot
@@ -8028,14 +8028,37 @@ If invoked very early in the workspace lifecycle, \`info\` will poll briefly whi
8028
8028
  ### \`replicas computer status\`
8029
8029
  Prints which desktop services are running and the active preview URL (if any). Useful for debugging when a tool call seems to be doing nothing.
8030
8030
 
8031
- ### \`replicas computer screenshot <path>\`
8032
- Captures the current desktop to a PNG at the given path. Read the file (e.g. with your Read tool) to see what's on screen - coordinates from the screenshot drive subsequent \`click\` / \`move\` / \`drag\` calls.
8031
+ ### \`replicas computer screenshot <path> [--raw] [--grid [px]]\`
8032
+ Captures the current desktop to a PNG at the given path.
8033
+
8034
+ Use \`--raw\` for a 1:1 desktop capture with no branding, padding, or rounded corners. Use \`--grid\` for the same 1:1 capture plus a coordinate grid; the optional value sets grid spacing in pixels and defaults to 100. Use the default branded screenshot only when you plan to share the image with \`replicas media upload\`.
8035
+
8036
+ ### \`replicas computer observe <path> [--raw] [--grid [px]] [--timeout MS] [--stable-ms MS] [--poll-ms MS]\`
8037
+ Waits briefly for the screen to stop changing, saves a 1:1 screenshot, and prints JSON with screen dimensions, whether the screen stabilized, frame/change counts, mouse location, active window title, and visible window titles.
8038
+
8039
+ Use this instead of hand-written \`sleep && screenshot\` loops after clicks, navigation, typing, or page loads. By default it saves a 100px coordinate-grid screenshot and waits up to 3s for 600ms of visual stability. Pass \`--raw\` if you need an unannotated 1:1 screenshot.
8040
+
8041
+ ### \`replicas computer browser\`
8042
+ Prints JSON for Chrome tabs launched through Replicas, including page titles and URLs. Pass \`--snapshot\` to include visible page text and interactive controls with DOM viewport bounding boxes.
8043
+
8044
+ Use this alongside \`observe\` when testing web apps so you do not infer navigation, page state, or click targets from pixels alone. Snapshot coordinates are DOM viewport coordinates, not desktop click coordinates.
8045
+
8046
+ ### \`replicas computer browser-click <text> [--exact] [--index N]\`
8047
+ Clicks the first visible Chrome control whose text, label, placeholder, or href matches \`<text>\`. Use this for web buttons and links found via \`browser --snapshot\`; it is faster and less error-prone than converting DOM coordinates to desktop pixels.
8048
+
8049
+ ### \`replicas computer browser-fill <field> <value> [--exact] [--index N]\`
8050
+ Fills the first visible Chrome field whose label, placeholder, name, or text matches \`<field>\`, then dispatches input/change events. Use this for web forms instead of clicking a field and typing through the desktop.
8051
+
8052
+ ### \`replicas computer browser-wait <text> [--mode any|text|title|url|control] [--exact] [--timeout MS]\`
8053
+ Waits until the active Chrome page matches text in the title, URL, body text, or visible controls. Use this after \`browser-click\` / \`browser-fill\` when you need web app state to settle without screenshot polling.
8054
+
8055
+ For \`browser-click\`, \`browser-fill\`, and \`browser-wait\`, pass \`--id <id>\`, \`--title <text>\`, \`--url <text>\`, or \`--page <n>\` when multiple Chrome tabs are open. Run \`replicas computer browser\` first to list tabs. Prefer \`--id\` when a click may change the page title or URL.
8033
8056
 
8034
8057
  ### \`replicas computer click <x> <y> [--button N] [--double] [--modifiers ctrl+shift]\`
8035
- Move to (x, y) and click. Default is left-click (button 1); pass \`--button 3\` for right-click. \`--modifiers\` holds keys during the click (e.g. ctrl-click a link to open in a new tab).
8058
+ Move to (x, y) and click. Coordinates can be absolute pixels or percentages such as \`50%\` \`50%\`. Default is left-click (button 1); pass \`--button 3\` for right-click. \`--modifiers\` holds keys during the click (e.g. ctrl-click a link to open in a new tab).
8036
8059
 
8037
8060
  ### \`replicas computer move <x> <y>\`
8038
- Move the mouse without clicking. Useful for hovering tooltips.
8061
+ Move the mouse without clicking. Coordinates can be absolute pixels or percentages. Useful for hovering tooltips.
8039
8062
 
8040
8063
  ### \`replicas computer type <text> [--delay MS]\`
8041
8064
  Type a literal string into the focused field. Default per-character delay is 12ms (~80 wpm) - feels human and avoids breaking apps that debounce input. Bump \`--delay 30\` for stricter apps.
@@ -8046,10 +8069,10 @@ For key combos (not literal text), use \`key\`. \`type "ctrl+l"\` will literally
8046
8069
  Press a single key or combo. Examples: \`Return\`, \`Escape\`, \`Tab\`, \`ctrl+l\`, \`ctrl+shift+t\`, \`alt+Left\`, \`Page_Down\`, \`Home\`. Syntax matches \`xdotool key\`.
8047
8070
 
8048
8071
  ### \`replicas computer scroll <up|down|left|right> [--amount N] [--x X --y Y]\`
8049
- Scroll the wheel. Pass \`--x\` / \`--y\` to hover before scrolling (otherwise scrolls wherever the cursor currently is). Default amount is 3 wheel ticks.
8072
+ Scroll the wheel. Pass \`--x\` / \`--y\` to hover before scrolling (otherwise scrolls wherever the cursor currently is). Hover coordinates can be absolute pixels or percentages. Default amount is 3 wheel ticks.
8050
8073
 
8051
8074
  ### \`replicas computer drag <fromX> <fromY> <toX> <toY>\`
8052
- Press left mouse at (fromX, fromY), drag to (toX, toY), release. For things like dragging a file onto an upload zone.
8075
+ Press left mouse at (fromX, fromY), drag to (toX, toY), release. Coordinates can be absolute pixels or percentages. For things like dragging a file onto an upload zone.
8053
8076
 
8054
8077
  ### \`replicas computer launch <app> [args...]\`
8055
8078
  Spawns an app on the workspace display. Built-in aliases:
@@ -8072,31 +8095,37 @@ SIGINTs ffmpeg, waits for it to finalize the MP4, prints the output path. Upload
8072
8095
 
8073
8096
  ## Patterns
8074
8097
 
8075
- ### Action / screenshot loop
8076
- You are blind between tool calls. After any action that changes the screen, take a screenshot before deciding the next coordinate:
8098
+ ### Action / observe loop
8099
+ You are blind between tool calls. After any action that changes the screen, observe before deciding the next coordinate:
8077
8100
 
8078
8101
  \`\`\`bash
8079
8102
  replicas computer click 521 700
8080
- sleep 2 # let the page settle
8081
- replicas computer screenshot /tmp/after-click.png
8082
- # read /tmp/after-click.png, decide next click
8103
+ replicas computer observe /tmp/after-click.png
8104
+ # read /tmp/after-click.png and the JSON output, decide next click
8083
8105
  \`\`\`
8084
8106
 
8085
- \`sleep\` is a regular shell sleep - there's no \`replicas computer wait\` command, but you can mix shell sleeps freely.
8107
+ The JSON \`stable\`, \`frames\`, and \`changes\` fields tell you whether something changed while you were waiting. If \`stable\` is false, observe again or increase \`--timeout\` before acting on coordinates.
8086
8108
 
8087
8109
  ### Typing into an address bar
8088
8110
  \`\`\`bash
8089
8111
  replicas computer launch chrome
8090
- sleep 2
8091
- replicas computer key ctrl+l # focus address bar
8112
+ replicas computer observe /tmp/browser-open.png
8113
+ replicas computer key ctrl+l
8092
8114
  replicas computer type "https://example.com"
8093
8115
  replicas computer key Return
8094
- sleep 3 # wait for page load
8095
- replicas computer screenshot /tmp/loaded.png
8116
+ replicas computer observe /tmp/loaded.png
8117
+ replicas computer browser --snapshot
8118
+ replicas computer browser-fill "Search" "replicas"
8119
+ replicas computer browser-click "More information" --title "Example"
8120
+ replicas computer browser-wait "Example Domain" --mode title --title "Example"
8096
8121
  \`\`\`
8097
8122
 
8098
8123
  ### Coordinates from screenshots
8099
- The display is 1920\xD71080 by default. Screenshot pixels map 1:1 to click coordinates - if your Read tool shows a button at pixel (520, 700), click \`replicas computer click 520 700\`. **No translation needed.** Modern image-reading models often imagine the screenshot is at a different resolution; trust the \`xdpyinfo\` value (\`replicas computer status\` shows the real size).
8124
+ The display is 1920\xD71080 by default. For click planning, use \`observe\`, \`screenshot --raw\`, or \`screenshot --grid\`; those pixels map 1:1 to click coordinates. If the grid/raw screenshot shows a button at pixel (520, 700), click \`replicas computer click 520 700\`. The default screenshot is branded for sharing and has padding around the desktop, so do not use it for coordinates.
8125
+
8126
+ Use percentages for broad, layout-relative targets when exact pixels are unnecessary: \`replicas computer click 50% 50%\` clicks the center of the screen, and \`replicas computer move 95% 5%\` moves near the top-right.
8127
+
8128
+ Modern image-reading models often imagine the screenshot is at a different resolution. Trust the dimensions printed by \`observe\`, \`replicas computer screenshot ... --raw\` / \`--grid\`, and the \`xdpyinfo\` value shown by \`replicas computer status\`.
8100
8129
 
8101
8130
  ### Letting the user watch
8102
8131
  The Desktop tab is already live in the dashboard - the user can open it any time. If you're communicating with the user somewhere else (Slack, PR comment, etc.), grab the URL with \`replicas computer info\` and share it inline so they can watch you work.
@@ -8115,7 +8144,7 @@ Then embed the printed \`![\u2026](\u2026)\` line in your chat reply. See \`MEDI
8115
8144
 
8116
8145
  - **"Desktop services script missing"**: workspace image is older than this skill. Tell the user - nothing you can do from the CLI side.
8117
8146
  - **\`xdotool ... failed: Can't open display\`**: Xvfb didn't come up. \`replicas computer status\` will show which service is dead. Re-running any CLI command auto-attempts to start it.
8118
- - **Browser doesn't appear after \`launch chrome\`**: give it 1-2s, then screenshot. Chrome cold-start on the virtual display takes ~500ms but bigger pages take longer.
8147
+ - **Browser doesn't appear after \`launch chrome\`**: run \`replicas computer observe /tmp/state.png\`. Chrome cold-start on the virtual display takes ~500ms but bigger pages take longer.
8119
8148
  - **Live preview shows static / black screen**: the browser may have crashed. \`replicas computer status\` should show no Chrome process - re-launch.
8120
8149
  - **\`replicas computer info\` errors with "not registered"**: engine couldn't register the preview at startup (transient monolith error, or warming mode). Re-running the engine usually fixes it. Until it's registered, the Desktop tab will show a placeholder.
8121
8150
 
@@ -9507,7 +9536,7 @@ var HOOK_EXEC_MAX_BUFFER_BYTES = 10 * 1024 * 1024;
9507
9536
  var REPLICAS_CONFIG_FILENAMES = ["replicas.json", "replicas.yaml", "replicas.yml"];
9508
9537
 
9509
9538
  // ../shared/src/cli-version.ts
9510
- var CLI_VERSION = "0.2.321";
9539
+ var CLI_VERSION = "0.2.322";
9511
9540
 
9512
9541
  // ../shared/src/engine/environment.ts
9513
9542
  var DESKTOP_NOVNC_PORT = 6080;
@@ -14374,7 +14403,8 @@ async function mediaListCommand(options) {
14374
14403
 
14375
14404
  // src/commands/computer.ts
14376
14405
  import { spawn as spawn3, spawnSync } from "child_process";
14377
- import { closeSync, existsSync, mkdirSync, openSync, readFileSync, readSync, rmSync, writeFileSync } from "fs";
14406
+ import { createHash as createHash2 } from "crypto";
14407
+ import { closeSync, copyFileSync, existsSync, mkdirSync, openSync, readFileSync, readSync, rmSync, writeFileSync } from "fs";
14378
14408
  import { dirname, isAbsolute, resolve } from "path";
14379
14409
  import chalk20 from "chalk";
14380
14410
  var STATE_DIR = process.env.REPLICAS_DESKTOP_STATE_DIR || "/tmp/replicas-computer";
@@ -14384,6 +14414,15 @@ var SERVICES_SCRIPT = "/usr/local/bin/replicas-start-desktop-services";
14384
14414
  var CHROME_WRAPPER = "/usr/local/bin/replicas-chrome";
14385
14415
  var INFO_WAIT_TIMEOUT_MS = 1e4;
14386
14416
  var INFO_WAIT_INTERVAL_MS = 500;
14417
+ var CHROME_DEBUG_PORT = (() => {
14418
+ const value = process.env.REPLICAS_DESKTOP_CHROME_DEBUG_PORT;
14419
+ if (!value) return 9222;
14420
+ const port = parseInt(value, 10);
14421
+ if (!Number.isInteger(port) || port <= 0 || port > 65535) {
14422
+ fail(`REPLICAS_DESKTOP_CHROME_DEBUG_PORT must be a valid TCP port, got ${JSON.stringify(value)}`);
14423
+ }
14424
+ return port;
14425
+ })();
14387
14426
  function fail(msg) {
14388
14427
  throw new Error(msg);
14389
14428
  }
@@ -14409,14 +14448,41 @@ function runDisplayCmd(bin, args) {
14409
14448
  }
14410
14449
  return r.stdout?.toString() ?? "";
14411
14450
  }
14451
+ function tryDisplayCmd(bin, args) {
14452
+ ensureServicesRunning();
14453
+ const r = spawnSync(bin, args, { env: withDisplay(), stdio: "pipe" });
14454
+ if (r.status !== 0) return null;
14455
+ const out = r.stdout?.toString().trim();
14456
+ return out || null;
14457
+ }
14412
14458
  function parseCoord(value, label) {
14459
+ if (!/^-?\d+$/.test(value)) fail(`${label} must be an integer (got "${value}")`);
14413
14460
  const n = Number.parseInt(value, 10);
14414
14461
  if (!Number.isFinite(n)) fail(`${label} must be an integer (got "${value}")`);
14415
14462
  return n;
14416
14463
  }
14464
+ function getDisplayDimensions() {
14465
+ const out = runDisplayCmd("xdpyinfo", []);
14466
+ const match = out.match(/dimensions:\s+(\d+)x(\d+)\s+pixels/);
14467
+ if (!match) fail("could not read display dimensions from xdpyinfo");
14468
+ return { width: Number.parseInt(match[1], 10), height: Number.parseInt(match[2], 10) };
14469
+ }
14470
+ function parseScreenCoord(value, label, size) {
14471
+ if (value.endsWith("%")) {
14472
+ const rawPct = value.slice(0, -1);
14473
+ if (!/^(?:\d+(?:\.\d+)?|\.\d+)$/.test(rawPct)) fail(`${label} percent must be a number between 0% and 100% (got "${value}")`);
14474
+ const pct = Number.parseFloat(rawPct);
14475
+ if (!Number.isFinite(pct) || pct < 0 || pct > 100) fail(`${label} percent must be between 0% and 100% (got "${value}")`);
14476
+ return Math.round(pct / 100 * (size - 1));
14477
+ }
14478
+ const n = parseCoord(value, label);
14479
+ if (n < 0 || n >= size) fail(`${label} must be between 0 and ${size - 1} pixels, or 0%-100% (got "${value}")`);
14480
+ return n;
14481
+ }
14417
14482
  function resolvePath(p) {
14418
14483
  return isAbsolute(p) ? p : resolve(process.cwd(), p);
14419
14484
  }
14485
+ var sleep = (ms) => new Promise((resolve2) => setTimeout(resolve2, ms));
14420
14486
  async function lookupDesktopViewerUrl() {
14421
14487
  try {
14422
14488
  const list = await listAgentPreviews();
@@ -14493,7 +14559,59 @@ var SHADOW_OFFSET_Y_FRACTION = 0.013;
14493
14559
  var SHADOW_ALPHA = 0.6;
14494
14560
  var SCREENSHOT_MASK_TEMPLATE = `<svg width="__W__" height="__H__" xmlns="http://www.w3.org/2000/svg"><rect width="__W__" height="__H__" rx="__R__" ry="__R__" fill="white"/></svg>`;
14495
14561
  var SHADOW_MASK_TEMPLATE = `<svg width="__SW__" height="__SH__" xmlns="http://www.w3.org/2000/svg"><rect x="__M__" y="__M__" width="__W__" height="__H__" rx="__R__" ry="__R__" fill="white"/></svg>`;
14496
- async function computerScreenshotCommand(path6) {
14562
+ function parseGridSize(value) {
14563
+ if (value === void 0 || value === false) return null;
14564
+ const size = value === true ? 100 : parseCoord(value, "--grid");
14565
+ if (size < 25 || size > 500) fail("--grid must be between 25 and 500 pixels");
14566
+ return size;
14567
+ }
14568
+ function buildGridSvg(width, height, size) {
14569
+ const lines = [];
14570
+ const labels = [];
14571
+ for (let x = 0; x <= width; x += size) {
14572
+ const strokeWidth = x % (size * 5) === 0 ? 2 : 1;
14573
+ lines.push(`<line x1="${x}" y1="0" x2="${x}" y2="${height}" stroke="#00e5ff" stroke-opacity="0.42" stroke-width="${strokeWidth}"/>`);
14574
+ if (x > 0 && x < width) labels.push(`<text x="${x + 4}" y="18">${x}</text>`);
14575
+ }
14576
+ for (let y = 0; y <= height; y += size) {
14577
+ const strokeWidth = y % (size * 5) === 0 ? 2 : 1;
14578
+ lines.push(`<line x1="0" y1="${y}" x2="${width}" y2="${y}" stroke="#00e5ff" stroke-opacity="0.42" stroke-width="${strokeWidth}"/>`);
14579
+ if (y > 0 && y < height) labels.push(`<text x="4" y="${y - 4}">${y}</text>`);
14580
+ }
14581
+ return `<svg width="${width}" height="${height}" xmlns="http://www.w3.org/2000/svg">
14582
+ <style>text{font:16px monospace;fill:white;paint-order:stroke;stroke:black;stroke-width:4px;stroke-linejoin:round}</style>
14583
+ ${lines.join("\n")}
14584
+ ${labels.join("\n")}
14585
+ </svg>`;
14586
+ }
14587
+ function overlayGrid(rawPath, target, width, height, gridSize, gridPath) {
14588
+ writeFileSync(gridPath, buildGridSvg(width, height, gridSize));
14589
+ const r = spawnSync(
14590
+ "ffmpeg",
14591
+ [
14592
+ "-y",
14593
+ "-hide_banner",
14594
+ "-loglevel",
14595
+ "error",
14596
+ "-i",
14597
+ rawPath,
14598
+ "-i",
14599
+ gridPath,
14600
+ "-filter_complex",
14601
+ "[0:v][1:v]overlay=0:0:format=auto",
14602
+ "-frames:v",
14603
+ "1",
14604
+ "-update",
14605
+ "1",
14606
+ target
14607
+ ],
14608
+ { stdio: "pipe" }
14609
+ );
14610
+ if (r.status !== 0) {
14611
+ fail(`ffmpeg grid overlay failed: ${r.stderr?.toString().trim() || `exit ${r.status}`}`);
14612
+ }
14613
+ }
14614
+ async function computerScreenshotCommand(path6, options = {}) {
14497
14615
  const target = resolvePath(path6);
14498
14616
  mkdirSync(dirname(target), { recursive: true });
14499
14617
  const stamp = `${process.pid}-${Date.now()}`;
@@ -14501,9 +14619,21 @@ async function computerScreenshotCommand(path6) {
14501
14619
  const svgPath = `/tmp/replicas-screenshot-${stamp}.brand.svg`;
14502
14620
  const maskPath = `/tmp/replicas-screenshot-${stamp}.mask.svg`;
14503
14621
  const shadowPath = `/tmp/replicas-screenshot-${stamp}.shadow.svg`;
14622
+ const gridPath = `/tmp/replicas-screenshot-${stamp}.grid.svg`;
14504
14623
  try {
14505
14624
  runDisplayCmd("scrot", ["-o", rawPath]);
14506
14625
  const { width, height } = readPngDimensions(rawPath);
14626
+ const gridSize = parseGridSize(options.grid);
14627
+ if (options.raw && gridSize === null) {
14628
+ copyFileSync(rawPath, target);
14629
+ console.log(`${target} (${width}x${height}, raw 1:1 desktop pixels)`);
14630
+ return;
14631
+ }
14632
+ if (gridSize !== null) {
14633
+ overlayGrid(rawPath, target, width, height, gridSize, gridPath);
14634
+ console.log(`${target} (${width}x${height}, raw 1:1 desktop pixels, ${gridSize}px grid)`);
14635
+ return;
14636
+ }
14507
14637
  const padX = Math.round(width * BRAND_PAD_FRACTION);
14508
14638
  const padY = Math.round(height * BRAND_PAD_FRACTION);
14509
14639
  const canvasW = width + padX * 2;
@@ -14557,12 +14687,542 @@ async function computerScreenshotCommand(path6) {
14557
14687
  rmSync(svgPath, { force: true });
14558
14688
  rmSync(maskPath, { force: true });
14559
14689
  rmSync(shadowPath, { force: true });
14690
+ rmSync(gridPath, { force: true });
14560
14691
  }
14561
14692
  console.log(target);
14562
14693
  }
14694
+ function hashFile(path6) {
14695
+ return createHash2("sha256").update(readFileSync(path6)).digest("hex");
14696
+ }
14697
+ async function captureStableRawScreenshot(target, options) {
14698
+ const start = Date.now();
14699
+ const framePath = `${target}.frame.png`;
14700
+ let lastHash = null;
14701
+ let lastChangeAt = start;
14702
+ let frames = 0;
14703
+ let changes = 0;
14704
+ let width = 0;
14705
+ let height = 0;
14706
+ try {
14707
+ while (Date.now() - start <= options.timeoutMs) {
14708
+ runDisplayCmd("scrot", ["-o", framePath]);
14709
+ frames++;
14710
+ const dimensions = readPngDimensions(framePath);
14711
+ width = dimensions.width;
14712
+ height = dimensions.height;
14713
+ const hash = hashFile(framePath);
14714
+ const now = Date.now();
14715
+ if (lastHash === null) {
14716
+ lastChangeAt = now;
14717
+ } else if (hash !== lastHash) {
14718
+ changes++;
14719
+ lastChangeAt = now;
14720
+ }
14721
+ lastHash = hash;
14722
+ copyFileSync(framePath, target);
14723
+ if (frames > 1 && now - lastChangeAt >= options.stableMs) {
14724
+ return { width, height, stable: true, elapsedMs: now - start, frames, changes };
14725
+ }
14726
+ await sleep(options.pollMs);
14727
+ }
14728
+ return { width, height, stable: false, elapsedMs: Date.now() - start, frames, changes };
14729
+ } finally {
14730
+ rmSync(framePath, { force: true });
14731
+ }
14732
+ }
14733
+ function getMouseLocation() {
14734
+ const out = tryDisplayCmd("xdotool", ["getmouselocation", "--shell"]);
14735
+ if (!out) return null;
14736
+ const values = Object.fromEntries(
14737
+ out.split("\n").map((line) => {
14738
+ const [key, value] = line.split("=");
14739
+ return [key.toLowerCase(), Number.parseInt(value, 10)];
14740
+ })
14741
+ );
14742
+ if (!Number.isFinite(values.x) || !Number.isFinite(values.y)) return null;
14743
+ return {
14744
+ x: values.x,
14745
+ y: values.y,
14746
+ screen: Number.isFinite(values.screen) ? values.screen : 0,
14747
+ window: Number.isFinite(values.window) ? values.window : 0
14748
+ };
14749
+ }
14750
+ function getActiveWindowTitle() {
14751
+ return tryDisplayCmd("xdotool", ["getactivewindow", "getwindowname"]);
14752
+ }
14753
+ function getVisibleWindowTitles() {
14754
+ const out = tryDisplayCmd("xdotool", ["search", "--onlyvisible", "--name", ".", "getwindowname", "%@"]);
14755
+ return out ? out.split("\n").filter(Boolean).slice(0, 20) : [];
14756
+ }
14757
+ async function computerObserveCommand(path6, options = {}) {
14758
+ const target = resolvePath(path6);
14759
+ mkdirSync(dirname(target), { recursive: true });
14760
+ const timeoutMs = options.timeout ? parseCoord(options.timeout, "--timeout") : 3e3;
14761
+ const stableMs = options.stableMs ? parseCoord(options.stableMs, "--stable-ms") : 600;
14762
+ const pollMs = options.pollMs ? parseCoord(options.pollMs, "--poll-ms") : 200;
14763
+ if (timeoutMs < 0) fail("--timeout must be >= 0");
14764
+ if (stableMs < 0) fail("--stable-ms must be >= 0");
14765
+ if (pollMs < 50 || pollMs > 2e3) fail("--poll-ms must be between 50 and 2000");
14766
+ const stamp = `${process.pid}-${Date.now()}`;
14767
+ const rawPath = `/tmp/replicas-observe-${stamp}.raw.png`;
14768
+ const gridPath = `/tmp/replicas-observe-${stamp}.grid.svg`;
14769
+ try {
14770
+ const capture = await captureStableRawScreenshot(rawPath, { timeoutMs, stableMs, pollMs });
14771
+ const gridSize = options.raw ? null : parseGridSize(options.grid ?? true);
14772
+ if (gridSize === null) {
14773
+ copyFileSync(rawPath, target);
14774
+ } else {
14775
+ overlayGrid(rawPath, target, capture.width, capture.height, gridSize, gridPath);
14776
+ }
14777
+ console.log(JSON.stringify({
14778
+ screenshot: target,
14779
+ width: capture.width,
14780
+ height: capture.height,
14781
+ stable: capture.stable,
14782
+ elapsedMs: capture.elapsedMs,
14783
+ frames: capture.frames,
14784
+ changes: capture.changes,
14785
+ mouse: getMouseLocation(),
14786
+ activeWindow: getActiveWindowTitle(),
14787
+ visibleWindows: getVisibleWindowTitles(),
14788
+ mode: gridSize === null ? "raw" : "grid",
14789
+ gridSize
14790
+ }, null, 2));
14791
+ } finally {
14792
+ rmSync(rawPath, { force: true });
14793
+ rmSync(gridPath, { force: true });
14794
+ }
14795
+ }
14796
+ async function fetchChromeJson(path6) {
14797
+ let res;
14798
+ try {
14799
+ res = await fetch(`http://127.0.0.1:${CHROME_DEBUG_PORT}${path6}`);
14800
+ } catch {
14801
+ fail(
14802
+ `Chrome DevTools is not available on localhost:${CHROME_DEBUG_PORT}. Launch Chrome with \`replicas computer launch chrome <url>\` and try again.`
14803
+ );
14804
+ }
14805
+ if (!res.ok) fail(`Chrome DevTools returned HTTP ${res.status}`);
14806
+ try {
14807
+ return await res.json();
14808
+ } catch (error) {
14809
+ const reason = error instanceof Error ? error.message : String(error);
14810
+ fail(`Chrome DevTools returned invalid JSON from ${path6}: ${reason}`);
14811
+ }
14812
+ }
14813
+ async function getChromePages() {
14814
+ const targets = await fetchChromeJson("/json/list");
14815
+ if (!Array.isArray(targets)) fail("Chrome DevTools returned an unexpected target list");
14816
+ return targets.filter((target) => typeof target === "object" && target !== null).filter((target) => target.type === "page");
14817
+ }
14818
+ async function selectChromePage(options = {}) {
14819
+ const pages = await getChromePages();
14820
+ let matches = pages;
14821
+ if (options.id) {
14822
+ matches = matches.filter((page2) => page2.id === options.id);
14823
+ }
14824
+ if (options.title) {
14825
+ const needle = options.title.toLowerCase();
14826
+ matches = matches.filter((page2) => (page2.title ?? "").toLowerCase().includes(needle));
14827
+ }
14828
+ if (options.url) {
14829
+ const needle = options.url.toLowerCase();
14830
+ matches = matches.filter((page2) => (page2.url ?? "").toLowerCase().includes(needle));
14831
+ }
14832
+ const index = options.page ? parseCoord(options.page, "--page") : 0;
14833
+ if (index < 0) fail("--page must be >= 0");
14834
+ const page = matches[index];
14835
+ const webSocketDebuggerUrl = page?.webSocketDebuggerUrl;
14836
+ if (!webSocketDebuggerUrl) {
14837
+ fail(
14838
+ `No matching debuggable Chrome page found (${matches.length} match${matches.length === 1 ? "" : "es"}, index ${index}). Use \`replicas computer browser\` to list pages.`
14839
+ );
14840
+ }
14841
+ return { ...page, webSocketDebuggerUrl };
14842
+ }
14843
+ function buildBrowserSnapshotExpression(textLimit, elementLimit) {
14844
+ return `(() => {
14845
+ const clip = (value, limit) => String(value || '').replace(/\\s+/g, ' ').trim().slice(0, limit);
14846
+ const escape = (value) => globalThis.CSS && CSS.escape ? CSS.escape(value) : String(value).replace(/["\\\\]/g, '\\\\$&');
14847
+ const visible = (el) => {
14848
+ const rect = el.getBoundingClientRect();
14849
+ const style = getComputedStyle(el);
14850
+ return rect.width > 0 && rect.height > 0 && style.visibility !== 'hidden' && style.display !== 'none';
14851
+ };
14852
+ const labels = (el) => {
14853
+ const values = [];
14854
+ const id = el.getAttribute('id');
14855
+ if (id) values.push(...Array.from(document.querySelectorAll('label[for="' + escape(id) + '"]')).map((label) => label.innerText));
14856
+ const wrappingLabel = el.closest('label');
14857
+ if (wrappingLabel) values.push(wrappingLabel.innerText);
14858
+ values.push(el.innerText, el.value, el.getAttribute('aria-label'), el.getAttribute('title'), el.getAttribute('placeholder'), el.href, el.id, el.name, el.tagName);
14859
+ return values.map((value) => clip(value, 160)).filter(Boolean);
14860
+ };
14861
+ const name = (el) => labels(el)[0] || clip(el.tagName, 160);
14862
+ const selector = 'a,button,input,textarea,select,[role="button"],[role="link"],[role="textbox"],[contenteditable="true"]';
14863
+ const controls = Array.from(document.querySelectorAll(selector))
14864
+ .filter(visible)
14865
+ .slice(0, ${elementLimit})
14866
+ .map((el) => {
14867
+ const rect = el.getBoundingClientRect();
14868
+ return {
14869
+ tag: el.tagName.toLowerCase(),
14870
+ role: el.getAttribute('role') || null,
14871
+ type: el.getAttribute('type') || null,
14872
+ text: name(el),
14873
+ href: el.href || null,
14874
+ disabled: !!el.disabled || el.getAttribute('aria-disabled') === 'true',
14875
+ rect: {
14876
+ x: Math.round(rect.x),
14877
+ y: Math.round(rect.y),
14878
+ width: Math.round(rect.width),
14879
+ height: Math.round(rect.height),
14880
+ centerX: Math.round(rect.x + rect.width / 2),
14881
+ centerY: Math.round(rect.y + rect.height / 2),
14882
+ },
14883
+ };
14884
+ });
14885
+ return {
14886
+ title: document.title,
14887
+ url: location.href,
14888
+ text: clip(document.body ? document.body.innerText : '', ${textLimit}),
14889
+ controls,
14890
+ };
14891
+ })()`;
14892
+ }
14893
+ async function evaluateChromeTarget(webSocketDebuggerUrl, expression) {
14894
+ return await new Promise((resolve2, reject) => {
14895
+ const ws = new WebSocket(webSocketDebuggerUrl);
14896
+ let settled = false;
14897
+ const finish = (callback) => {
14898
+ if (settled) return;
14899
+ settled = true;
14900
+ clearTimeout(timeout);
14901
+ ws.close();
14902
+ callback();
14903
+ };
14904
+ const timeout = setTimeout(() => {
14905
+ finish(() => reject(new Error("Chrome DevTools evaluation timed out")));
14906
+ }, 5e3);
14907
+ ws.addEventListener("open", () => {
14908
+ ws.send(JSON.stringify({
14909
+ id: 1,
14910
+ method: "Runtime.evaluate",
14911
+ params: {
14912
+ expression,
14913
+ awaitPromise: true,
14914
+ returnByValue: true
14915
+ }
14916
+ }));
14917
+ });
14918
+ ws.addEventListener("message", (event) => {
14919
+ const data = typeof event.data === "string" ? event.data : Buffer.from(event.data).toString("utf8");
14920
+ let parsed;
14921
+ try {
14922
+ parsed = JSON.parse(data);
14923
+ } catch (error) {
14924
+ const reason = error instanceof Error ? error.message : String(error);
14925
+ finish(() => reject(new Error(`Chrome DevTools returned invalid WebSocket JSON: ${reason}`)));
14926
+ return;
14927
+ }
14928
+ if (typeof parsed !== "object" || parsed === null) {
14929
+ finish(() => reject(new Error("Chrome DevTools returned an invalid WebSocket message")));
14930
+ return;
14931
+ }
14932
+ const message = parsed;
14933
+ if (message.id !== 1) return;
14934
+ if (message.error) {
14935
+ finish(() => reject(new Error(message.error?.message || "Chrome DevTools evaluation failed")));
14936
+ return;
14937
+ }
14938
+ if (message.result?.exceptionDetails) {
14939
+ finish(() => reject(new Error(message.result.exceptionDetails.text || "Chrome DevTools evaluation threw")));
14940
+ return;
14941
+ }
14942
+ finish(() => resolve2(message.result?.result?.value ?? null));
14943
+ });
14944
+ ws.addEventListener("error", () => {
14945
+ finish(() => reject(new Error("Chrome DevTools WebSocket failed")));
14946
+ });
14947
+ });
14948
+ }
14949
+ async function computerBrowserCommand(options = {}) {
14950
+ const textLimit = options.limit ? parseCoord(options.limit, "--limit") : 4e3;
14951
+ const elementLimit = options.elementLimit ? parseCoord(options.elementLimit, "--element-limit") : 80;
14952
+ if (textLimit < 0 || textLimit > 5e4) fail("--limit must be between 0 and 50000");
14953
+ if (elementLimit < 0 || elementLimit > 500) fail("--element-limit must be between 0 and 500");
14954
+ const expression = buildBrowserSnapshotExpression(textLimit, elementLimit);
14955
+ const pages = (await getChromePages()).map((target) => ({
14956
+ target,
14957
+ page: {
14958
+ id: target.id ?? null,
14959
+ title: target.title ?? "",
14960
+ url: target.url ?? "",
14961
+ attached: !!target.attached
14962
+ }
14963
+ }));
14964
+ const pageResults = [];
14965
+ for (const { target, page } of pages) {
14966
+ if (options.snapshot && target.webSocketDebuggerUrl) {
14967
+ try {
14968
+ pageResults.push({ ...page, snapshot: await evaluateChromeTarget(target.webSocketDebuggerUrl, expression) });
14969
+ } catch (error) {
14970
+ pageResults.push({ ...page, snapshotError: error instanceof Error ? error.message : "snapshot failed" });
14971
+ }
14972
+ } else {
14973
+ pageResults.push(page);
14974
+ }
14975
+ }
14976
+ console.log(JSON.stringify({
14977
+ port: CHROME_DEBUG_PORT,
14978
+ pageCount: pageResults.length,
14979
+ pages: pageResults
14980
+ }, null, 2));
14981
+ }
14982
+ function buildBrowserClickExpression(query, options) {
14983
+ return `(() => {
14984
+ const query = ${JSON.stringify(query)};
14985
+ const exact = ${JSON.stringify(options.exact)};
14986
+ const index = ${options.index};
14987
+ const normalize = (value) => String(value || '').replace(/\\s+/g, ' ').trim();
14988
+ const visible = (el) => {
14989
+ const rect = el.getBoundingClientRect();
14990
+ const style = getComputedStyle(el);
14991
+ return rect.width > 0 && rect.height > 0 && style.visibility !== 'hidden' && style.display !== 'none';
14992
+ };
14993
+ const label = (el) => normalize(el.innerText || el.value || el.getAttribute('aria-label') || el.getAttribute('title') || el.getAttribute('placeholder') || el.href || el.id || el.name || el.tagName);
14994
+ const matches = (text) => {
14995
+ const haystack = text.toLowerCase();
14996
+ const needle = query.toLowerCase();
14997
+ return exact ? haystack === needle : haystack.includes(needle);
14998
+ };
14999
+ const selector = 'a,button,input,textarea,select,[role="button"],[role="link"],[role="textbox"],[contenteditable="true"]';
15000
+ const candidates = Array.from(document.querySelectorAll(selector))
15001
+ .filter(visible)
15002
+ .map((el) => ({ el, text: label(el) }))
15003
+ .filter((candidate) => candidate.text && matches(candidate.text));
15004
+ const candidate = candidates[index];
15005
+ if (!candidate) {
15006
+ return {
15007
+ clicked: false,
15008
+ query,
15009
+ matchCount: candidates.length,
15010
+ reason: candidates.length ? 'index out of range' : 'no visible matching control',
15011
+ };
15012
+ }
15013
+ const { el, text } = candidate;
15014
+ el.scrollIntoView({ block: 'center', inline: 'center' });
15015
+ const rect = el.getBoundingClientRect();
15016
+ if (typeof el.focus === 'function') el.focus();
15017
+ el.click();
15018
+ return {
15019
+ clicked: true,
15020
+ query,
15021
+ matchCount: candidates.length,
15022
+ index,
15023
+ tag: el.tagName.toLowerCase(),
15024
+ role: el.getAttribute('role') || null,
15025
+ type: el.getAttribute('type') || null,
15026
+ text,
15027
+ href: el.href || null,
15028
+ rect: {
15029
+ x: Math.round(rect.x),
15030
+ y: Math.round(rect.y),
15031
+ width: Math.round(rect.width),
15032
+ height: Math.round(rect.height),
15033
+ centerX: Math.round(rect.x + rect.width / 2),
15034
+ centerY: Math.round(rect.y + rect.height / 2),
15035
+ },
15036
+ };
15037
+ })()`;
15038
+ }
15039
+ async function computerBrowserClickCommand(query, options = {}) {
15040
+ const page = await selectChromePage(options);
15041
+ const index = options.index ? parseCoord(options.index, "--index") : 0;
15042
+ if (index < 0) fail("--index must be >= 0");
15043
+ const result = await evaluateChromeTarget(
15044
+ page.webSocketDebuggerUrl,
15045
+ buildBrowserClickExpression(query, { exact: !!options.exact, index })
15046
+ );
15047
+ console.log(JSON.stringify({
15048
+ title: page.title ?? "",
15049
+ url: page.url ?? "",
15050
+ result
15051
+ }, null, 2));
15052
+ }
15053
+ function buildBrowserFillExpression(query, value, options) {
15054
+ return `(() => {
15055
+ const query = ${JSON.stringify(query)};
15056
+ const value = ${JSON.stringify(value)};
15057
+ const exact = ${JSON.stringify(options.exact)};
15058
+ const index = ${options.index};
15059
+ const normalize = (text) => String(text || '').replace(/\\s+/g, ' ').trim();
15060
+ const escape = (text) => globalThis.CSS && CSS.escape ? CSS.escape(text) : String(text).replace(/["\\\\]/g, '\\\\$&');
15061
+ const visible = (el) => {
15062
+ const rect = el.getBoundingClientRect();
15063
+ const style = getComputedStyle(el);
15064
+ return rect.width > 0 && rect.height > 0 && style.visibility !== 'hidden' && style.display !== 'none';
15065
+ };
15066
+ const matches = (text) => {
15067
+ const haystack = text.toLowerCase();
15068
+ const needle = query.toLowerCase();
15069
+ return exact ? haystack === needle : haystack.includes(needle);
15070
+ };
15071
+ const labelParts = (el) => {
15072
+ const id = el.getAttribute('id');
15073
+ const labels = [];
15074
+ if (id) labels.push(...Array.from(document.querySelectorAll('label[for="' + escape(id) + '"]')).map((label) => label.innerText));
15075
+ const wrappingLabel = el.closest('label');
15076
+ if (wrappingLabel) labels.push(wrappingLabel.innerText);
15077
+ labels.push(
15078
+ el.getAttribute('aria-label'),
15079
+ el.getAttribute('title'),
15080
+ el.getAttribute('placeholder'),
15081
+ el.getAttribute('name'),
15082
+ el.getAttribute('id'),
15083
+ el.value,
15084
+ el.innerText,
15085
+ );
15086
+ return labels.map(normalize).filter(Boolean);
15087
+ };
15088
+ const labelText = (el) => labelParts(el).join(' ');
15089
+ const fieldMatches = (el) => {
15090
+ const parts = labelParts(el);
15091
+ return exact ? parts.some(matches) : matches(parts.join(' '));
15092
+ };
15093
+ const selector = 'input:not([type="button"]):not([type="submit"]):not([type="reset"]):not([type="checkbox"]):not([type="radio"]),textarea,select,[role="textbox"],[contenteditable="true"]';
15094
+ const candidates = Array.from(document.querySelectorAll(selector))
15095
+ .filter(visible)
15096
+ .map((el) => ({ el, text: labelText(el) }))
15097
+ .filter((candidate) => candidate.text && fieldMatches(candidate.el));
15098
+ const candidate = candidates[index];
15099
+ if (!candidate) {
15100
+ return {
15101
+ filled: false,
15102
+ query,
15103
+ matchCount: candidates.length,
15104
+ reason: candidates.length ? 'index out of range' : 'no visible matching field',
15105
+ };
15106
+ }
15107
+ const { el, text } = candidate;
15108
+ el.scrollIntoView({ block: 'center', inline: 'center' });
15109
+ if (typeof el.focus === 'function') el.focus();
15110
+ if (el.tagName.toLowerCase() === 'select') {
15111
+ const option = Array.from(el.options).find((item) => item.value === value || normalize(item.text) === value);
15112
+ if (option) el.value = option.value;
15113
+ else el.value = value;
15114
+ } else if (el.isContentEditable) {
15115
+ el.textContent = value;
15116
+ } else {
15117
+ el.value = value;
15118
+ }
15119
+ el.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'insertText', data: value }));
15120
+ el.dispatchEvent(new Event('change', { bubbles: true }));
15121
+ const rect = el.getBoundingClientRect();
15122
+ return {
15123
+ filled: true,
15124
+ query,
15125
+ matchCount: candidates.length,
15126
+ index,
15127
+ tag: el.tagName.toLowerCase(),
15128
+ role: el.getAttribute('role') || null,
15129
+ type: el.getAttribute('type') || null,
15130
+ text,
15131
+ value: el.isContentEditable ? el.textContent : el.value,
15132
+ rect: {
15133
+ x: Math.round(rect.x),
15134
+ y: Math.round(rect.y),
15135
+ width: Math.round(rect.width),
15136
+ height: Math.round(rect.height),
15137
+ centerX: Math.round(rect.x + rect.width / 2),
15138
+ centerY: Math.round(rect.y + rect.height / 2),
15139
+ },
15140
+ };
15141
+ })()`;
15142
+ }
15143
+ async function computerBrowserFillCommand(query, value, options = {}) {
15144
+ const page = await selectChromePage(options);
15145
+ const index = options.index ? parseCoord(options.index, "--index") : 0;
15146
+ if (index < 0) fail("--index must be >= 0");
15147
+ const result = await evaluateChromeTarget(
15148
+ page.webSocketDebuggerUrl,
15149
+ buildBrowserFillExpression(query, value, { exact: !!options.exact, index })
15150
+ );
15151
+ console.log(JSON.stringify({
15152
+ title: page.title ?? "",
15153
+ url: page.url ?? "",
15154
+ result
15155
+ }, null, 2));
15156
+ }
15157
+ function buildBrowserWaitExpression(query, options) {
15158
+ return `(() => {
15159
+ const query = ${JSON.stringify(query)};
15160
+ const mode = ${JSON.stringify(options.mode)};
15161
+ const exact = ${JSON.stringify(options.exact)};
15162
+ const normalize = (text) => String(text || '').replace(/\\s+/g, ' ').trim();
15163
+ const visible = (el) => {
15164
+ const rect = el.getBoundingClientRect();
15165
+ const style = getComputedStyle(el);
15166
+ return rect.width > 0 && rect.height > 0 && style.visibility !== 'hidden' && style.display !== 'none';
15167
+ };
15168
+ const matches = (text) => {
15169
+ const haystack = normalize(text).toLowerCase();
15170
+ const needle = query.toLowerCase();
15171
+ return exact ? haystack === needle : haystack.includes(needle);
15172
+ };
15173
+ const controlText = (el) => normalize(el.innerText || el.value || el.getAttribute('aria-label') || el.getAttribute('title') || el.getAttribute('placeholder') || el.href || el.id || el.name || el.tagName);
15174
+ const controls = () => Array.from(document.querySelectorAll('a,button,input,textarea,select,[role="button"],[role="link"],[role="textbox"],[contenteditable="true"]'))
15175
+ .filter(visible)
15176
+ .map(controlText)
15177
+ .filter(Boolean);
15178
+ const values = {
15179
+ title: [document.title],
15180
+ url: [location.href],
15181
+ text: [document.body ? document.body.innerText : ''],
15182
+ control: controls(),
15183
+ any: [document.title, location.href, document.body ? document.body.innerText : '', ...controls()],
15184
+ }[mode];
15185
+ if (!values) return { matched: false, query, mode, reason: 'invalid mode' };
15186
+ const match = values.find(matches) || null;
15187
+ return {
15188
+ matched: !!match,
15189
+ query,
15190
+ mode,
15191
+ exact,
15192
+ match: match ? normalize(match).slice(0, 500) : null,
15193
+ title: document.title,
15194
+ url: location.href,
15195
+ };
15196
+ })()`;
15197
+ }
15198
+ async function computerBrowserWaitCommand(query, options = {}) {
15199
+ const mode = options.mode ?? "any";
15200
+ if (!["any", "text", "title", "url", "control"].includes(mode)) fail("--mode must be one of any|text|title|url|control");
15201
+ const timeoutMs = options.timeout ? parseCoord(options.timeout, "--timeout") : 1e4;
15202
+ const pollMs = options.pollMs ? parseCoord(options.pollMs, "--poll-ms") : 250;
15203
+ if (timeoutMs < 0) fail("--timeout must be >= 0");
15204
+ if (pollMs < 50 || pollMs > 2e3) fail("--poll-ms must be between 50 and 2000");
15205
+ const page = await selectChromePage(options);
15206
+ const start = Date.now();
15207
+ const expression = buildBrowserWaitExpression(query, { mode, exact: !!options.exact });
15208
+ let attempts = 0;
15209
+ let lastResult = null;
15210
+ while (Date.now() - start <= timeoutMs) {
15211
+ attempts++;
15212
+ lastResult = await evaluateChromeTarget(page.webSocketDebuggerUrl, expression);
15213
+ if (typeof lastResult === "object" && lastResult !== null && "matched" in lastResult && lastResult.matched) {
15214
+ console.log(JSON.stringify({ ok: true, elapsedMs: Date.now() - start, attempts, result: lastResult }, null, 2));
15215
+ return;
15216
+ }
15217
+ await sleep(pollMs);
15218
+ }
15219
+ console.log(JSON.stringify({ ok: false, elapsedMs: Date.now() - start, attempts, result: lastResult }, null, 2));
15220
+ process.exitCode = 1;
15221
+ }
14563
15222
  async function computerClickCommand(xStr, yStr, options) {
14564
- const x = parseCoord(xStr, "x");
14565
- const y = parseCoord(yStr, "y");
15223
+ const dimensions = getDisplayDimensions();
15224
+ const x = parseScreenCoord(xStr, "x", dimensions.width);
15225
+ const y = parseScreenCoord(yStr, "y", dimensions.height);
14566
15226
  const button = options.button ?? "1";
14567
15227
  const args = ["mousemove", "--sync", String(x), String(y)];
14568
15228
  if (options.modifiers) {
@@ -14584,8 +15244,9 @@ async function computerClickCommand(xStr, yStr, options) {
14584
15244
  console.log(`clicked ${button === "1" ? "left" : button === "2" ? "middle" : button === "3" ? "right" : `button ${button}`} at (${x},${y})${options.double ? " x2" : ""}`);
14585
15245
  }
14586
15246
  async function computerMoveCommand(xStr, yStr) {
14587
- const x = parseCoord(xStr, "x");
14588
- const y = parseCoord(yStr, "y");
15247
+ const dimensions = getDisplayDimensions();
15248
+ const x = parseScreenCoord(xStr, "x", dimensions.width);
15249
+ const y = parseScreenCoord(yStr, "y", dimensions.height);
14589
15250
  runDisplayCmd("xdotool", ["mousemove", "--sync", String(x), String(y)]);
14590
15251
  console.log(`moved to (${x},${y})`);
14591
15252
  }
@@ -14606,17 +15267,24 @@ async function computerScrollCommand(direction, options) {
14606
15267
  const amount = options.amount ? parseCoord(options.amount, "--amount") : 3;
14607
15268
  const args = [];
14608
15269
  if (options.x && options.y) {
14609
- args.push("mousemove", "--sync", options.x, options.y);
15270
+ const dimensions = getDisplayDimensions();
15271
+ args.push(
15272
+ "mousemove",
15273
+ "--sync",
15274
+ String(parseScreenCoord(options.x, "--x", dimensions.width)),
15275
+ String(parseScreenCoord(options.y, "--y", dimensions.height))
15276
+ );
14610
15277
  }
14611
15278
  args.push("click", "--repeat", String(amount), "--delay", "30", button);
14612
15279
  runDisplayCmd("xdotool", args);
14613
15280
  console.log(`scrolled ${dir} x${amount}`);
14614
15281
  }
14615
15282
  async function computerDragCommand(fx, fy, tx, ty) {
14616
- const fromX = parseCoord(fx, "fromX");
14617
- const fromY = parseCoord(fy, "fromY");
14618
- const toX = parseCoord(tx, "toX");
14619
- const toY = parseCoord(ty, "toY");
15283
+ const dimensions = getDisplayDimensions();
15284
+ const fromX = parseScreenCoord(fx, "fromX", dimensions.width);
15285
+ const fromY = parseScreenCoord(fy, "fromY", dimensions.height);
15286
+ const toX = parseScreenCoord(tx, "toX", dimensions.width);
15287
+ const toY = parseScreenCoord(ty, "toY", dimensions.height);
14620
15288
  runDisplayCmd("xdotool", [
14621
15289
  "mousemove",
14622
15290
  "--sync",
@@ -19043,13 +19711,18 @@ if (isAgentMode()) {
19043
19711
  };
19044
19712
  computer.command("info").description("Print the live noVNC viewer URL for the workspace desktop. The preview is registered automatically at engine startup, so this just looks it up \u2014 share the URL with the user to let them watch the desktop.").action(wrap(() => computerInfoCommand()));
19045
19713
  computer.command("status").description("Show which desktop services are running and the active preview URL").action(wrap(() => computerStatusCommand()));
19046
- computer.command("screenshot <path>").description("Capture the current desktop to a PNG file. Use the path with `replicas media upload` to share it.").action(wrap((path6) => computerScreenshotCommand(path6)));
19047
- computer.command("click <x> <y>").description("Move to (x, y) and click. Coordinates are in pixels on the workspace display.").option("-b, --button <n>", "Mouse button (1=left, 2=middle, 3=right). Default 1.").option("--double", "Double-click instead of single-click").option("--modifiers <mods>", "Hold modifier keys during the click, e.g. ctrl or ctrl+shift").action(wrap((x, y, options) => computerClickCommand(x, y, options)));
19048
- computer.command("move <x> <y>").description("Move the mouse to (x, y) without clicking").action(wrap((x, y) => computerMoveCommand(x, y)));
19714
+ computer.command("screenshot <path>").description("Capture the current desktop to a PNG file. Use --raw or --grid for agent click planning; omit both for a branded shareable image.").option("--raw", "Save a 1:1 desktop capture with no branding, padding, or rounded corners").option("--grid [px]", "Overlay a coordinate grid on a 1:1 desktop capture. Default 100px.").action(wrap((path6, options) => computerScreenshotCommand(path6, options)));
19715
+ computer.command("observe <path>").description("Wait briefly for the desktop to settle, save a 1:1 screenshot, and print JSON screen context for agents.").option("--raw", "Save a 1:1 desktop capture with no coordinate grid").option("--grid [px]", "Overlay a coordinate grid on the 1:1 desktop capture. Default 100px.").option("--timeout <ms>", "Maximum time to wait for visual stability. Default 3000.").option("--stable-ms <ms>", "Required unchanged time before the screen is considered stable. Default 600.").option("--poll-ms <ms>", "Screenshot polling interval while waiting. Default 200.").action(wrap((path6, options) => computerObserveCommand(path6, options)));
19716
+ computer.command("browser").description("Print JSON for Chrome tabs launched through Replicas, including page titles and URLs.").option("--snapshot", "Include visible page text and interactive controls with bounding boxes").option("--limit <chars>", "Maximum body text characters per page snapshot. Default 4000.").option("--element-limit <n>", "Maximum controls per page snapshot. Default 80.").action(wrap((options) => computerBrowserCommand(options)));
19717
+ computer.command("browser-click <text>").description("Click the first visible Chrome control whose text, label, placeholder, or href matches <text>.").option("--exact", "Require an exact text match instead of substring matching").option("--index <n>", "When multiple controls match, click the nth match. Default 0.").option("--id <id>", "Only target the Chrome page with this target id").option("--page <n>", "When multiple pages match, target the nth page. Default 0.").option("--title <text>", "Only target pages whose title contains text").option("--url <text>", "Only target pages whose URL contains text").action(wrap((text, options) => computerBrowserClickCommand(text, options)));
19718
+ computer.command("browser-fill <field> <value>").description("Fill the first visible Chrome field whose label, placeholder, name, or text matches <field>.").option("--exact", "Require an exact field match instead of substring matching").option("--index <n>", "When multiple fields match, fill the nth match. Default 0.").option("--id <id>", "Only target the Chrome page with this target id").option("--page <n>", "When multiple pages match, target the nth page. Default 0.").option("--title <text>", "Only target pages whose title contains text").option("--url <text>", "Only target pages whose URL contains text").action(wrap((field, value, options) => computerBrowserFillCommand(field, value, options)));
19719
+ computer.command("browser-wait <text>").description("Wait until Chrome page title, URL, body text, or controls match <text>.").option("--mode <mode>", "Where to match: any, text, title, url, or control. Default any.").option("--exact", "Require an exact match instead of substring matching").option("--timeout <ms>", "Maximum time to wait. Default 10000.").option("--poll-ms <ms>", "Polling interval. Default 250.").option("--id <id>", "Only target the Chrome page with this target id").option("--page <n>", "When multiple pages match, target the nth page. Default 0.").option("--title <text>", "Only target pages whose title contains text").option("--url <text>", "Only target pages whose URL contains text").action(wrap((text, options) => computerBrowserWaitCommand(text, options)));
19720
+ computer.command("click <x> <y>").description("Move to (x, y) and click. Coordinates are pixels or percentages like 50% 50%.").option("-b, --button <n>", "Mouse button (1=left, 2=middle, 3=right). Default 1.").option("--double", "Double-click instead of single-click").option("--modifiers <mods>", "Hold modifier keys during the click, e.g. ctrl or ctrl+shift").action(wrap((x, y, options) => computerClickCommand(x, y, options)));
19721
+ computer.command("move <x> <y>").description("Move the mouse to (x, y) without clicking. Coordinates are pixels or percentages like 50% 50%.").action(wrap((x, y) => computerMoveCommand(x, y)));
19049
19722
  computer.command("type <text>").description("Type a literal string into the focused field. Use `key` for key combos like ctrl+l.").option("--delay <ms>", "Per-character delay in ms (default 12 \u2248 80 wpm)").action(wrap((text, options) => computerTypeCommand(text, options)));
19050
19723
  computer.command("key <combo>").description("Press a key combo, e.g. Return, Escape, ctrl+l, ctrl+shift+t. Same syntax as xdotool key.").action(wrap((combo) => computerKeyCommand(combo)));
19051
- computer.command("scroll <direction>").description("Scroll up | down | left | right. Optionally provide --x / --y to hover before scrolling.").option("--amount <n>", "Wheel ticks (default 3)").option("--x <x>", "Hover x before scrolling").option("--y <y>", "Hover y before scrolling").action(wrap((direction, options) => computerScrollCommand(direction, options)));
19052
- computer.command("drag <fromX> <fromY> <toX> <toY>").description("Press the left mouse button at (fromX, fromY), drag to (toX, toY), release.").action(wrap((fx, fy, tx, ty) => computerDragCommand(fx, fy, tx, ty)));
19724
+ computer.command("scroll <direction>").description("Scroll up | down | left | right. Optionally provide --x / --y to hover before scrolling.").option("--amount <n>", "Wheel ticks (default 3)").option("--x <x>", "Hover x before scrolling. Pixels or percentage.").option("--y <y>", "Hover y before scrolling. Pixels or percentage.").action(wrap((direction, options) => computerScrollCommand(direction, options)));
19725
+ computer.command("drag <fromX> <fromY> <toX> <toY>").description("Press the left mouse button at (fromX, fromY), drag to (toX, toY), release. Coordinates are pixels or percentages.").action(wrap((fx, fy, tx, ty) => computerDragCommand(fx, fy, tx, ty)));
19053
19726
  computer.command("launch <app> [args...]").description("Launch an app on the workspace display. Aliases: chrome, chromium, firefox, terminal.").action(wrap((app, args) => computerLaunchCommand(app, args)));
19054
19727
  const record = computer.command("record").description("Screen-record the workspace display to an MP4 (60fps libx264).");
19055
19728
  record.command("start <path>").description("Start recording to <path>. Output is fragmented MP4 (safe if the workspace dies mid-record).").option("--fps <n>", "Frame rate (default 60)").action(wrap((path6, options) => computerRecordStartCommand(path6, options)));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "replicas-cli",
3
- "version": "0.2.321",
3
+ "version": "0.2.322",
4
4
  "description": "CLI for managing Replicas workspaces - SSH into cloud dev environments with automatic port forwarding",
5
5
  "main": "dist/index.mjs",
6
6
  "bin": {