usecomputer 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/README.md +338 -0
  3. package/build.zig +1 -0
  4. package/dist/bridge-contract.test.js +124 -63
  5. package/dist/bridge.d.ts.map +1 -1
  6. package/dist/bridge.js +241 -46
  7. package/dist/cli-parsing.test.js +34 -11
  8. package/dist/cli.d.ts.map +1 -1
  9. package/dist/cli.js +328 -22
  10. package/dist/coord-map.d.ts +14 -0
  11. package/dist/coord-map.d.ts.map +1 -0
  12. package/dist/coord-map.js +75 -0
  13. package/dist/coord-map.test.d.ts +2 -0
  14. package/dist/coord-map.test.d.ts.map +1 -0
  15. package/dist/coord-map.test.js +157 -0
  16. package/dist/darwin-arm64/usecomputer.node +0 -0
  17. package/dist/darwin-x64/usecomputer.node +0 -0
  18. package/dist/debug-point-image.d.ts +8 -0
  19. package/dist/debug-point-image.d.ts.map +1 -0
  20. package/dist/debug-point-image.js +43 -0
  21. package/dist/debug-point-image.test.d.ts +2 -0
  22. package/dist/debug-point-image.test.d.ts.map +1 -0
  23. package/dist/debug-point-image.test.js +44 -0
  24. package/dist/index.d.ts +2 -0
  25. package/dist/index.d.ts.map +1 -1
  26. package/dist/index.js +3 -1
  27. package/dist/lib.d.ts +26 -0
  28. package/dist/lib.d.ts.map +1 -0
  29. package/dist/lib.js +88 -0
  30. package/dist/native-click-smoke.test.js +69 -29
  31. package/dist/native-lib.d.ts +59 -1
  32. package/dist/native-lib.d.ts.map +1 -1
  33. package/dist/terminal-table.d.ts +10 -0
  34. package/dist/terminal-table.d.ts.map +1 -0
  35. package/dist/terminal-table.js +55 -0
  36. package/dist/terminal-table.test.d.ts +2 -0
  37. package/dist/terminal-table.test.d.ts.map +1 -0
  38. package/dist/terminal-table.test.js +41 -0
  39. package/dist/types.d.ts +45 -0
  40. package/dist/types.d.ts.map +1 -1
  41. package/package.json +16 -4
  42. package/src/bridge-contract.test.ts +140 -69
  43. package/src/bridge.ts +293 -53
  44. package/src/cli-parsing.test.ts +61 -0
  45. package/src/cli.ts +401 -25
  46. package/src/coord-map.test.ts +178 -0
  47. package/src/coord-map.ts +105 -0
  48. package/src/debug-point-image.test.ts +50 -0
  49. package/src/debug-point-image.ts +69 -0
  50. package/src/index.ts +3 -1
  51. package/src/lib.ts +125 -0
  52. package/src/native-click-smoke.test.ts +81 -63
  53. package/src/native-lib.ts +39 -1
  54. package/src/terminal-table.test.ts +44 -0
  55. package/src/terminal-table.ts +88 -0
  56. package/src/types.ts +50 -0
  57. package/zig/src/lib.zig +1280 -163
  58. package/zig/src/scroll.zig +213 -0
  59. package/zig/src/window.zig +123 -0
package/src/cli.ts CHANGED
@@ -1,13 +1,24 @@
1
1
  // usecomputer CLI entrypoint and command wiring for desktop automation actions.
2
2
 
3
3
  import { goke } from 'goke'
4
+ import pc from 'picocolors'
4
5
  import { z } from 'zod'
5
6
  import dedent from 'string-dedent'
6
7
  import { createRequire } from 'node:module'
8
+ import fs from 'node:fs'
9
+ import pathModule from 'node:path'
7
10
  import url from 'node:url'
8
11
  import { createBridge } from './bridge.js'
12
+ import {
13
+ getRegionFromCoordMap,
14
+ mapPointFromCoordMap,
15
+ mapPointToCoordMap,
16
+ parseCoordMapOrThrow,
17
+ } from './coord-map.js'
9
18
  import { parseDirection, parseModifiers, parsePoint, parseRegion } from './command-parsers.js'
10
- import type { MouseButton, Point, UseComputerBridge } from './types.js'
19
+ import { drawDebugPointOnImage } from './debug-point-image.js'
20
+ import { renderAlignedTable } from './terminal-table.js'
21
+ import type { DisplayInfo, MouseButton, Point, UseComputerBridge, WindowInfo } from './types.js'
11
22
 
12
23
  const require = createRequire(import.meta.url)
13
24
  const packageJson = require('../package.json') as { version: string }
@@ -20,6 +31,58 @@ function printLine(value: string): void {
20
31
  process.stdout.write(`${value}\n`)
21
32
  }
22
33
 
34
+ function readTextFromStdin(): string {
35
+ return fs.readFileSync(0, 'utf8')
36
+ }
37
+
38
+ function parsePositiveInteger({
39
+ value,
40
+ option,
41
+ }: {
42
+ value?: number
43
+ option: string
44
+ }): number | undefined {
45
+ if (typeof value !== 'number') {
46
+ return undefined
47
+ }
48
+ if (!Number.isFinite(value) || value <= 0) {
49
+ throw new Error(`Option ${option} must be a positive number`)
50
+ }
51
+ return Math.round(value)
52
+ }
53
+
54
+ function splitIntoChunks({
55
+ text,
56
+ chunkSize,
57
+ }: {
58
+ text: string
59
+ chunkSize?: number
60
+ }): string[] {
61
+ if (!chunkSize || text.length <= chunkSize) {
62
+ return [text]
63
+ }
64
+ const chunkCount = Math.ceil(text.length / chunkSize)
65
+ return Array.from({ length: chunkCount }, (_, index) => {
66
+ const start = index * chunkSize
67
+ const end = start + chunkSize
68
+ return text.slice(start, end)
69
+ }).filter((chunk) => {
70
+ return chunk.length > 0
71
+ })
72
+ }
73
+
74
+ function sleep({
75
+ ms,
76
+ }: {
77
+ ms: number
78
+ }): Promise<void> {
79
+ return new Promise((resolve) => {
80
+ setTimeout(() => {
81
+ resolve()
82
+ }, ms)
83
+ })
84
+ }
85
+
23
86
  function parsePointOrThrow(input: string): Point {
24
87
  const parsed = parsePoint(input)
25
88
  if (parsed instanceof Error) {
@@ -28,6 +91,26 @@ function parsePointOrThrow(input: string): Point {
28
91
  return parsed
29
92
  }
30
93
 
94
+
95
+ function resolveOutputPath({ path }: { path?: string }): string | undefined {
96
+ if (!path) {
97
+ return undefined
98
+ }
99
+
100
+ return path.startsWith('/')
101
+ ? path
102
+ : `${process.cwd()}/${path}`
103
+ }
104
+
105
+ function ensureParentDirectory({ filePath }: { filePath?: string }): void {
106
+ if (!filePath) {
107
+ return
108
+ }
109
+
110
+ const parentDirectory = pathModule.dirname(filePath)
111
+ fs.mkdirSync(parentDirectory, { recursive: true })
112
+ }
113
+
31
114
  function resolvePointInput({
32
115
  x,
33
116
  y,
@@ -58,8 +141,106 @@ function parseButton(input?: string): MouseButton {
58
141
  return 'left'
59
142
  }
60
143
 
144
+ function printDesktopList({ displays }: { displays: DisplayInfo[] }) {
145
+ const rows = displays.map((display) => {
146
+ return {
147
+ desktop: `#${display.index}`,
148
+ primary: display.isPrimary ? pc.green('yes') : 'no',
149
+ size: `${display.width}x${display.height}`,
150
+ position: `${display.x},${display.y}`,
151
+ id: String(display.id),
152
+ scale: String(display.scale),
153
+ name: display.name,
154
+ }
155
+ })
156
+
157
+ const lines = renderAlignedTable({
158
+ rows,
159
+ columns: [
160
+ { header: pc.bold('desktop'), value: (row) => { return row.desktop } },
161
+ { header: pc.bold('primary'), value: (row) => { return row.primary } },
162
+ { header: pc.bold('size'), value: (row) => { return row.size }, align: 'right' },
163
+ { header: pc.bold('position'), value: (row) => { return row.position }, align: 'right' },
164
+ { header: pc.bold('id'), value: (row) => { return row.id }, align: 'right' },
165
+ { header: pc.bold('scale'), value: (row) => { return row.scale }, align: 'right' },
166
+ { header: pc.bold('name'), value: (row) => { return row.name } },
167
+ ],
168
+ })
169
+ lines.forEach((line) => {
170
+ printLine(line)
171
+ })
172
+ }
173
+
174
+ function mapWindowsByDesktopIndex({
175
+ windows,
176
+ }: {
177
+ windows: WindowInfo[]
178
+ }): Map<number, WindowInfo[]> {
179
+ return windows.reduce((acc, window) => {
180
+ const list = acc.get(window.desktopIndex) ?? []
181
+ list.push(window)
182
+ acc.set(window.desktopIndex, list)
183
+ return acc
184
+ }, new Map<number, WindowInfo[]>())
185
+ }
186
+
187
+ function printDesktopListWithWindows({
188
+ displays,
189
+ windows,
190
+ }: {
191
+ displays: DisplayInfo[]
192
+ windows: WindowInfo[]
193
+ }) {
194
+ const windowsByDesktop = mapWindowsByDesktopIndex({ windows })
195
+ printDesktopList({ displays })
196
+
197
+ displays.forEach((display) => {
198
+ printLine('')
199
+ printLine(pc.bold(pc.cyan(`desktop #${display.index} windows`)))
200
+
201
+ const desktopWindows = windowsByDesktop.get(display.index) ?? []
202
+ if (desktopWindows.length === 0) {
203
+ printLine(pc.dim('none'))
204
+ return
205
+ }
206
+
207
+ const lines = renderAlignedTable({
208
+ rows: desktopWindows,
209
+ columns: [
210
+ { header: pc.bold('id'), value: (row) => { return String(row.id) }, align: 'right' },
211
+ { header: pc.bold('app'), value: (row) => { return row.ownerName } },
212
+ { header: pc.bold('pid'), value: (row) => { return String(row.ownerPid) }, align: 'right' },
213
+ { header: pc.bold('size'), value: (row) => { return `${row.width}x${row.height}` }, align: 'right' },
214
+ { header: pc.bold('position'), value: (row) => { return `${row.x},${row.y}` }, align: 'right' },
215
+ { header: pc.bold('title'), value: (row) => { return row.title } },
216
+ ],
217
+ })
218
+ lines.forEach((line) => {
219
+ printLine(line)
220
+ })
221
+ })
222
+ }
223
+
224
+ function printWindowList({ windows }: { windows: WindowInfo[] }) {
225
+ const lines = renderAlignedTable({
226
+ rows: windows,
227
+ columns: [
228
+ { header: pc.bold('id'), value: (row) => { return String(row.id) }, align: 'right' },
229
+ { header: pc.bold('desktop'), value: (row) => { return `#${row.desktopIndex}` }, align: 'right' },
230
+ { header: pc.bold('app'), value: (row) => { return row.ownerName } },
231
+ { header: pc.bold('pid'), value: (row) => { return String(row.ownerPid) }, align: 'right' },
232
+ { header: pc.bold('size'), value: (row) => { return `${row.width}x${row.height}` }, align: 'right' },
233
+ { header: pc.bold('position'), value: (row) => { return `${row.x},${row.y}` }, align: 'right' },
234
+ { header: pc.bold('title'), value: (row) => { return row.title } },
235
+ ],
236
+ })
237
+ lines.forEach((line) => {
238
+ printLine(line)
239
+ })
240
+ }
241
+
61
242
  function notImplemented({ command }: { command: string }): never {
62
- throw new Error(`Command \"${command}\" is not implemented yet`)
243
+ throw new Error(`TODO not implemented: ${command}`)
63
244
  }
64
245
 
65
246
  export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBridge } = {}) {
@@ -75,18 +256,31 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
75
256
  `,
76
257
  )
77
258
  .option('-r, --region [region]', z.string().describe('Capture region as x,y,width,height'))
78
- .option('--display [display]', z.number().describe('Display index for multi-monitor setups'))
259
+ .option(
260
+ '--display [display]',
261
+ z.number().describe('Display index for multi-monitor setups (0-based: first display is index 0)'),
262
+ )
263
+ .option('--window [window]', z.number().describe('Capture a specific window by window id'))
79
264
  .option('--annotate', 'Annotate screenshot with labels')
80
265
  .option('--json', 'Output as JSON')
81
266
  .action(async (path, options) => {
267
+ const outputPath = resolveOutputPath({ path })
268
+ ensureParentDirectory({ filePath: outputPath })
82
269
  const region = options.region ? parseRegion(options.region) : undefined
83
270
  if (region instanceof Error) {
84
271
  throw region
85
272
  }
273
+ if (typeof options.window === 'number' && region) {
274
+ throw new Error('Cannot use --window and --region together')
275
+ }
276
+ if (typeof options.window === 'number' && typeof options.display === 'number') {
277
+ throw new Error('Cannot use --window and --display together')
278
+ }
86
279
  const result = await bridge.screenshot({
87
- path,
280
+ path: outputPath,
88
281
  region,
89
282
  display: options.display,
283
+ window: options.window,
90
284
  annotate: options.annotate,
91
285
  })
92
286
  if (options.json) {
@@ -94,15 +288,31 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
94
288
  return
95
289
  }
96
290
  printLine(result.path)
291
+ printLine(result.hint)
292
+ printLine(`desktop-index=${String(result.desktopIndex)}`)
97
293
  })
98
294
 
99
295
  cli
100
- .command('click [target]', 'Click at coordinates')
101
- .option('-x [x]', z.number().describe('X coordinate'))
102
- .option('-y [y]', z.number().describe('Y coordinate'))
296
+ .command(
297
+ 'click [target]',
298
+ dedent`
299
+ Click at coordinates.
300
+
301
+ When you are clicking from a screenshot, use the exact pixel coordinates
302
+ of the target in that screenshot image and always pass the exact
303
+ --coord-map value printed by usecomputer screenshot. The coord map
304
+ scales screenshot-space pixels back into the real captured desktop or
305
+ window rectangle before sending the native click.
306
+ `,
307
+ )
308
+ .option('-x [x]', z.number().describe('X coordinate. When using --coord-map, this must be the exact pixel from the screenshot image'))
309
+ .option('-y [y]', z.number().describe('Y coordinate. When using --coord-map, this must be the exact pixel from the screenshot image'))
103
310
  .option('--button [button]', z.enum(['left', 'right', 'middle']).default('left').describe('Mouse button'))
104
311
  .option('--count [count]', z.number().default(1).describe('Number of clicks'))
105
312
  .option('--modifiers [modifiers]', z.string().describe('Modifiers as ctrl,shift,alt,meta'))
313
+ .option('--coord-map [coordMap]', z.string().describe('Map exact screenshot-space pixels back into the real captured desktop or window rectangle'))
314
+ .example('# Click the exact pixel you saw in a screenshot')
315
+ .example('usecomputer click -x 155 -y 446 --coord-map "0,0,1720,1440,1568,1313"')
106
316
  .action(async (target, options) => {
107
317
  const point = resolvePointInput({
108
318
  x: options.x,
@@ -110,8 +320,9 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
110
320
  target,
111
321
  command: 'click',
112
322
  })
323
+ const coordMap = parseCoordMapOrThrow(options.coordMap)
113
324
  await bridge.click({
114
- point,
325
+ point: mapPointFromCoordMap({ point, coordMap }),
115
326
  button: options.button,
116
327
  count: options.count,
117
328
  modifiers: parseModifiers(options.modifiers),
@@ -119,16 +330,153 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
119
330
  })
120
331
 
121
332
  cli
122
- .command('type <text>', 'Type text in the focused element')
123
- .option('--delay [delay]', z.number().describe('Delay in milliseconds between keystrokes'))
333
+ .command(
334
+ 'debug-point [target]',
335
+ dedent`
336
+ Capture a screenshot and draw a red marker where a click would land.
337
+
338
+ Pass the same --coord-map you plan to use for click. This validates
339
+ screenshot-space coordinates before you send a real click. When
340
+ --coord-map is present, debug-point captures that same region so the
341
+ overlay matches the screenshot you are targeting.
342
+ `,
343
+ )
344
+ .option('-x [x]', z.number().describe('X coordinate'))
345
+ .option('-y [y]', z.number().describe('Y coordinate'))
346
+ .option('--coord-map [coordMap]', z.string().describe('Map input coordinates from screenshot space'))
347
+ .option('--output [path]', z.string().describe('Write the annotated screenshot to this path'))
348
+ .option('--json', 'Output as JSON')
349
+ .example('# Validate the same coordinates you plan to click')
350
+ .example('usecomputer debug-point -x 210 -y 560 --coord-map "0,0,1720,1440,1568,1313"')
351
+ .action(async (target, options) => {
352
+ const point = resolvePointInput({
353
+ x: options.x,
354
+ y: options.y,
355
+ target,
356
+ command: 'debug-point',
357
+ })
358
+ const inputCoordMap = parseCoordMapOrThrow(options.coordMap)
359
+ const desktopPoint = mapPointFromCoordMap({ point, coordMap: inputCoordMap })
360
+ const outputPath = resolveOutputPath({ path: options.output ?? './tmp/debug-point.png' })
361
+ ensureParentDirectory({ filePath: outputPath })
362
+ const screenshotRegion = getRegionFromCoordMap({ coordMap: inputCoordMap })
363
+
364
+ const screenshot = await bridge.screenshot({
365
+ path: outputPath,
366
+ region: screenshotRegion,
367
+ })
368
+ const screenshotCoordMap = parseCoordMapOrThrow(screenshot.coordMap)
369
+ const screenshotPoint = mapPointToCoordMap({ point: desktopPoint, coordMap: screenshotCoordMap })
370
+
371
+ await drawDebugPointOnImage({
372
+ imagePath: screenshot.path,
373
+ point: screenshotPoint,
374
+ imageWidth: screenshot.imageWidth,
375
+ imageHeight: screenshot.imageHeight,
376
+ })
377
+
378
+ if (options.json) {
379
+ printJson({
380
+ path: screenshot.path,
381
+ inputPoint: point,
382
+ desktopPoint,
383
+ screenshotPoint,
384
+ inputCoordMap: options.coordMap ?? null,
385
+ screenshotCoordMap: screenshot.coordMap,
386
+ hint: screenshot.hint,
387
+ })
388
+ return
389
+ }
390
+
391
+ printLine(screenshot.path)
392
+ printLine(`input-point=${point.x},${point.y}`)
393
+ printLine(`desktop-point=${desktopPoint.x},${desktopPoint.y}`)
394
+ printLine(`screenshot-point=${screenshotPoint.x},${screenshotPoint.y}`)
395
+ printLine(screenshot.hint)
396
+ })
397
+
398
+ cli
399
+ .command(
400
+ 'type [text]',
401
+ dedent`
402
+ Type text in the currently focused input.
403
+
404
+ Supports direct text arguments or --stdin for long/multiline content.
405
+ For very long text, use --chunk-size to split input into multiple native
406
+ type calls so shells and apps are less likely to drop input.
407
+ `,
408
+ )
409
+ .option('--stdin', 'Read text from stdin instead of [text] argument')
410
+ .option('--delay [delay]', z.number().describe('Delay in milliseconds between typed characters'))
411
+ .option('--chunk-size [size]', z.number().describe('Split text into fixed-size chunks before typing'))
412
+ .option('--chunk-delay [delay]', z.number().describe('Delay in milliseconds between chunks'))
413
+ .option('--max-length [length]', z.number().describe('Fail when input text exceeds this maximum length'))
414
+ .example('# Type a short string')
415
+ .example('usecomputer type "hello"')
416
+ .example('# Type multiline text from a file')
417
+ .example('cat ./notes.txt | usecomputer type --stdin --chunk-size 4000 --chunk-delay 15')
124
418
  .action(async (text, options) => {
125
- await bridge.typeText({ text, delayMs: options.delay })
419
+ const fromStdin = Boolean(options.stdin)
420
+ if (fromStdin && text) {
421
+ throw new Error('Use either [text] or --stdin, not both')
422
+ }
423
+ if (!fromStdin && !text) {
424
+ throw new Error('Command "type" requires [text] or --stdin')
425
+ }
426
+
427
+ const sourceText = fromStdin ? readTextFromStdin() : text ?? ''
428
+ const chunkSize = parsePositiveInteger({
429
+ value: options.chunkSize,
430
+ option: '--chunk-size',
431
+ })
432
+ const maxLength = parsePositiveInteger({
433
+ value: options.maxLength,
434
+ option: '--max-length',
435
+ })
436
+ const chunkDelay = parsePositiveInteger({
437
+ value: options.chunkDelay,
438
+ option: '--chunk-delay',
439
+ })
440
+
441
+ if (typeof maxLength === 'number' && sourceText.length > maxLength) {
442
+ throw new Error(`Input text length ${String(sourceText.length)} exceeds --max-length ${String(maxLength)}`)
443
+ }
444
+
445
+ const chunks = splitIntoChunks({
446
+ text: sourceText,
447
+ chunkSize,
448
+ })
449
+ await chunks.reduce(async (previousChunk, chunk, index) => {
450
+ await previousChunk
451
+ await bridge.typeText({
452
+ text: chunk,
453
+ delayMs: options.delay,
454
+ })
455
+ if (typeof chunkDelay === 'number' && index < chunks.length - 1) {
456
+ await sleep({ ms: chunkDelay })
457
+ }
458
+ }, Promise.resolve())
126
459
  })
127
460
 
128
461
  cli
129
- .command('press <key>', 'Press a key or key combo')
462
+ .command(
463
+ 'press <key>',
464
+ dedent`
465
+ Press a key or key combo in the focused app.
466
+
467
+ Key combos use plus syntax such as cmd+s or ctrl+shift+p.
468
+ Platform behavior: cmd maps to Command on macOS, Win/Super on
469
+ Windows/Linux. For cross-platform app shortcuts, prefer ctrl+... .
470
+ `,
471
+ )
130
472
  .option('--count [count]', z.number().default(1).describe('How many times to press'))
131
473
  .option('--delay [delay]', z.number().describe('Delay between presses in milliseconds'))
474
+ .example('# Save in the current app on macOS')
475
+ .example('usecomputer press "cmd+s"')
476
+ .example('# Portable save shortcut across most apps')
477
+ .example('usecomputer press "ctrl+s"')
478
+ .example('# Open command palette in many editors')
479
+ .example('usecomputer press "cmd+shift+p"')
132
480
  .action(async (key, options) => {
133
481
  await bridge.press({ key, count: options.count, delayMs: options.delay })
134
482
  })
@@ -157,10 +505,12 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
157
505
  .command('drag <from> <to>', 'Drag from one coordinate to another')
158
506
  .option('--duration [duration]', z.number().describe('Duration in milliseconds'))
159
507
  .option('--button [button]', z.enum(['left', 'right', 'middle']).default('left').describe('Mouse button'))
508
+ .option('--coord-map [coordMap]', z.string().describe('Map input coordinates from screenshot space'))
160
509
  .action(async (from, to, options) => {
510
+ const coordMap = parseCoordMapOrThrow(options.coordMap)
161
511
  await bridge.drag({
162
- from: parsePointOrThrow(from),
163
- to: parsePointOrThrow(to),
512
+ from: mapPointFromCoordMap({ point: parsePointOrThrow(from), coordMap }),
513
+ to: mapPointFromCoordMap({ point: parsePointOrThrow(to), coordMap }),
164
514
  durationMs: options.duration,
165
515
  button: options.button,
166
516
  })
@@ -170,6 +520,7 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
170
520
  .command('hover [target]', 'Move mouse cursor to coordinates without clicking')
171
521
  .option('-x [x]', z.number().describe('X coordinate'))
172
522
  .option('-y [y]', z.number().describe('Y coordinate'))
523
+ .option('--coord-map [coordMap]', z.string().describe('Map input coordinates from screenshot space'))
173
524
  .action(async (target, options) => {
174
525
  const point = resolvePointInput({
175
526
  x: options.x,
@@ -177,13 +528,15 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
177
528
  target,
178
529
  command: 'hover',
179
530
  })
180
- await bridge.hover(point)
531
+ const coordMap = parseCoordMapOrThrow(options.coordMap)
532
+ await bridge.hover(mapPointFromCoordMap({ point, coordMap }))
181
533
  })
182
534
 
183
535
  cli
184
- .command('mouse move [x] [y]', 'Move mouse cursor to absolute coordinates')
536
+ .command('mouse move [x] [y]', 'Move mouse cursor to absolute coordinates (optional before click; click can target coordinates directly)')
185
537
  .option('-x [x]', z.number().describe('X coordinate'))
186
538
  .option('-y [y]', z.number().describe('Y coordinate'))
539
+ .option('--coord-map [coordMap]', z.string().describe('Map input coordinates from screenshot space'))
187
540
  .action(async (x, y, options) => {
188
541
  const point = resolvePointInput({
189
542
  x: options.x,
@@ -191,7 +544,8 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
191
544
  target: x && y ? `${x},${y}` : undefined,
192
545
  command: 'mouse move',
193
546
  })
194
- await bridge.mouseMove(point)
547
+ const coordMap = parseCoordMapOrThrow(options.coordMap)
548
+ await bridge.mouseMove(mapPointFromCoordMap({ point, coordMap }))
195
549
  })
196
550
 
197
551
  cli
@@ -229,12 +583,29 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
229
583
  printJson(displays)
230
584
  return
231
585
  }
232
- displays.forEach((display) => {
233
- const primary = display.isPrimary ? ' (primary)' : ''
234
- printLine(
235
- `#${display.id} ${display.name}${primary} ${display.width}x${display.height} @ (${display.x},${display.y}) scale=${display.scale}`,
236
- )
237
- })
586
+ printDesktopList({ displays })
587
+ })
588
+
589
+ cli
590
+ .command('desktop list', 'List desktops as display indexes and sizes (#0 is the primary display)')
591
+ .option('--windows', 'Include available windows grouped by desktop index')
592
+ .option('--json', 'Output as JSON')
593
+ .action(async (options) => {
594
+ const displays = await bridge.displayList()
595
+ const windows = options.windows ? await bridge.windowList() : []
596
+ if (options.json) {
597
+ if (options.windows) {
598
+ printJson({ displays, windows })
599
+ return
600
+ }
601
+ printJson(displays)
602
+ return
603
+ }
604
+ if (options.windows) {
605
+ printDesktopListWithWindows({ displays, windows })
606
+ return
607
+ }
608
+ printDesktopList({ displays })
238
609
  })
239
610
 
240
611
  cli
@@ -268,8 +639,13 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
268
639
  cli.command('get focused').action(() => {
269
640
  notImplemented({ command: 'get focused' })
270
641
  })
271
- cli.command('window list').action(() => {
272
- notImplemented({ command: 'window list' })
642
+ cli.command('window list').option('--json', 'Output as JSON').action(async (options) => {
643
+ const windows = await bridge.windowList()
644
+ if (options.json) {
645
+ printJson(windows)
646
+ return
647
+ }
648
+ printWindowList({ windows })
273
649
  })
274
650
  cli.command('window focus <target>').action(() => {
275
651
  notImplemented({ command: 'window focus' })