usecomputer 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +67 -0
  2. package/README.md +324 -0
  3. package/build.zig +95 -11
  4. package/build.zig.zon +5 -0
  5. package/dist/bridge-contract.test.js +61 -67
  6. package/dist/bridge.d.ts.map +1 -1
  7. package/dist/bridge.js +241 -46
  8. package/dist/cli-parsing.test.js +34 -11
  9. package/dist/cli.d.ts.map +1 -1
  10. package/dist/cli.js +323 -28
  11. package/dist/coord-map.d.ts +14 -0
  12. package/dist/coord-map.d.ts.map +1 -0
  13. package/dist/coord-map.js +75 -0
  14. package/dist/coord-map.test.d.ts +2 -0
  15. package/dist/coord-map.test.d.ts.map +1 -0
  16. package/dist/coord-map.test.js +157 -0
  17. package/dist/darwin-arm64/usecomputer.node +0 -0
  18. package/dist/darwin-x64/usecomputer.node +0 -0
  19. package/dist/debug-point-image.d.ts +8 -0
  20. package/dist/debug-point-image.d.ts.map +1 -0
  21. package/dist/debug-point-image.js +43 -0
  22. package/dist/debug-point-image.test.d.ts +2 -0
  23. package/dist/debug-point-image.test.d.ts.map +1 -0
  24. package/dist/debug-point-image.test.js +44 -0
  25. package/dist/index.d.ts +2 -0
  26. package/dist/index.d.ts.map +1 -1
  27. package/dist/index.js +3 -1
  28. package/dist/lib.d.ts +26 -0
  29. package/dist/lib.d.ts.map +1 -0
  30. package/dist/lib.js +88 -0
  31. package/dist/native-click-smoke.test.js +69 -29
  32. package/dist/native-lib.d.ts +59 -1
  33. package/dist/native-lib.d.ts.map +1 -1
  34. package/dist/terminal-table.d.ts +10 -0
  35. package/dist/terminal-table.d.ts.map +1 -0
  36. package/dist/terminal-table.js +55 -0
  37. package/dist/terminal-table.test.d.ts +2 -0
  38. package/dist/terminal-table.test.d.ts.map +1 -0
  39. package/dist/terminal-table.test.js +41 -0
  40. package/dist/types.d.ts +45 -0
  41. package/dist/types.d.ts.map +1 -1
  42. package/package.json +19 -5
  43. package/src/bridge-contract.test.ts +68 -73
  44. package/src/bridge.ts +293 -53
  45. package/src/cli-parsing.test.ts +61 -0
  46. package/src/cli.ts +393 -32
  47. package/src/coord-map.test.ts +178 -0
  48. package/src/coord-map.ts +105 -0
  49. package/src/debug-point-image.test.ts +50 -0
  50. package/src/debug-point-image.ts +69 -0
  51. package/src/index.ts +3 -1
  52. package/src/lib.ts +125 -0
  53. package/src/native-click-smoke.test.ts +81 -63
  54. package/src/native-lib.ts +39 -1
  55. package/src/terminal-table.test.ts +44 -0
  56. package/src/terminal-table.ts +88 -0
  57. package/src/types.ts +50 -0
  58. package/zig/src/lib.zig +1966 -270
  59. package/zig/src/main.zig +382 -0
  60. package/zig/src/scroll.zig +213 -0
  61. package/zig/src/window.zig +123 -0
package/src/cli.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  // usecomputer CLI entrypoint and command wiring for desktop automation actions.
2
2
 
3
3
  import { goke } from 'goke'
4
+ import pc from 'picocolors'
4
5
  import { z } from 'zod'
5
6
  import dedent from 'string-dedent'
6
7
  import { createRequire } from 'node:module'
@@ -8,8 +9,16 @@ import fs from 'node:fs'
8
9
  import pathModule from 'node:path'
9
10
  import url from 'node:url'
10
11
  import { createBridge } from './bridge.js'
12
+ import {
13
+ getRegionFromCoordMap,
14
+ mapPointFromCoordMap,
15
+ mapPointToCoordMap,
16
+ parseCoordMapOrThrow,
17
+ } from './coord-map.js'
11
18
  import { parseDirection, parseModifiers, parsePoint, parseRegion } from './command-parsers.js'
12
- import type { MouseButton, Point, UseComputerBridge } from './types.js'
19
+ import { drawDebugPointOnImage } from './debug-point-image.js'
20
+ import { renderAlignedTable } from './terminal-table.js'
21
+ import type { DisplayInfo, MouseButton, Point, UseComputerBridge, WindowInfo } from './types.js'
13
22
 
14
23
  const require = createRequire(import.meta.url)
15
24
  const packageJson = require('../package.json') as { version: string }
@@ -22,6 +31,58 @@ function printLine(value: string): void {
22
31
  process.stdout.write(`${value}\n`)
23
32
  }
24
33
 
34
+ function readTextFromStdin(): string {
35
+ return fs.readFileSync(0, 'utf8')
36
+ }
37
+
38
+ function parsePositiveInteger({
39
+ value,
40
+ option,
41
+ }: {
42
+ value?: number
43
+ option: string
44
+ }): number | undefined {
45
+ if (typeof value !== 'number') {
46
+ return undefined
47
+ }
48
+ if (!Number.isFinite(value) || value <= 0) {
49
+ throw new Error(`Option ${option} must be a positive number`)
50
+ }
51
+ return Math.round(value)
52
+ }
53
+
54
+ function splitIntoChunks({
55
+ text,
56
+ chunkSize,
57
+ }: {
58
+ text: string
59
+ chunkSize?: number
60
+ }): string[] {
61
+ if (!chunkSize || text.length <= chunkSize) {
62
+ return [text]
63
+ }
64
+ const chunkCount = Math.ceil(text.length / chunkSize)
65
+ return Array.from({ length: chunkCount }, (_, index) => {
66
+ const start = index * chunkSize
67
+ const end = start + chunkSize
68
+ return text.slice(start, end)
69
+ }).filter((chunk) => {
70
+ return chunk.length > 0
71
+ })
72
+ }
73
+
74
+ function sleep({
75
+ ms,
76
+ }: {
77
+ ms: number
78
+ }): Promise<void> {
79
+ return new Promise((resolve) => {
80
+ setTimeout(() => {
81
+ resolve()
82
+ }, ms)
83
+ })
84
+ }
85
+
25
86
  function parsePointOrThrow(input: string): Point {
26
87
  const parsed = parsePoint(input)
27
88
  if (parsed instanceof Error) {
@@ -30,6 +91,26 @@ function parsePointOrThrow(input: string): Point {
30
91
  return parsed
31
92
  }
32
93
 
94
+
95
+ function resolveOutputPath({ path }: { path?: string }): string | undefined {
96
+ if (!path) {
97
+ return undefined
98
+ }
99
+
100
+ return path.startsWith('/')
101
+ ? path
102
+ : `${process.cwd()}/${path}`
103
+ }
104
+
105
+ function ensureParentDirectory({ filePath }: { filePath?: string }): void {
106
+ if (!filePath) {
107
+ return
108
+ }
109
+
110
+ const parentDirectory = pathModule.dirname(filePath)
111
+ fs.mkdirSync(parentDirectory, { recursive: true })
112
+ }
113
+
33
114
  function resolvePointInput({
34
115
  x,
35
116
  y,
@@ -60,6 +141,104 @@ function parseButton(input?: string): MouseButton {
60
141
  return 'left'
61
142
  }
62
143
 
144
+ function printDesktopList({ displays }: { displays: DisplayInfo[] }) {
145
+ const rows = displays.map((display) => {
146
+ return {
147
+ desktop: `#${display.index}`,
148
+ primary: display.isPrimary ? pc.green('yes') : 'no',
149
+ size: `${display.width}x${display.height}`,
150
+ position: `${display.x},${display.y}`,
151
+ id: String(display.id),
152
+ scale: String(display.scale),
153
+ name: display.name,
154
+ }
155
+ })
156
+
157
+ const lines = renderAlignedTable({
158
+ rows,
159
+ columns: [
160
+ { header: pc.bold('desktop'), value: (row) => { return row.desktop } },
161
+ { header: pc.bold('primary'), value: (row) => { return row.primary } },
162
+ { header: pc.bold('size'), value: (row) => { return row.size }, align: 'right' },
163
+ { header: pc.bold('position'), value: (row) => { return row.position }, align: 'right' },
164
+ { header: pc.bold('id'), value: (row) => { return row.id }, align: 'right' },
165
+ { header: pc.bold('scale'), value: (row) => { return row.scale }, align: 'right' },
166
+ { header: pc.bold('name'), value: (row) => { return row.name } },
167
+ ],
168
+ })
169
+ lines.forEach((line) => {
170
+ printLine(line)
171
+ })
172
+ }
173
+
174
+ function mapWindowsByDesktopIndex({
175
+ windows,
176
+ }: {
177
+ windows: WindowInfo[]
178
+ }): Map<number, WindowInfo[]> {
179
+ return windows.reduce((acc, window) => {
180
+ const list = acc.get(window.desktopIndex) ?? []
181
+ list.push(window)
182
+ acc.set(window.desktopIndex, list)
183
+ return acc
184
+ }, new Map<number, WindowInfo[]>())
185
+ }
186
+
187
+ function printDesktopListWithWindows({
188
+ displays,
189
+ windows,
190
+ }: {
191
+ displays: DisplayInfo[]
192
+ windows: WindowInfo[]
193
+ }) {
194
+ const windowsByDesktop = mapWindowsByDesktopIndex({ windows })
195
+ printDesktopList({ displays })
196
+
197
+ displays.forEach((display) => {
198
+ printLine('')
199
+ printLine(pc.bold(pc.cyan(`desktop #${display.index} windows`)))
200
+
201
+ const desktopWindows = windowsByDesktop.get(display.index) ?? []
202
+ if (desktopWindows.length === 0) {
203
+ printLine(pc.dim('none'))
204
+ return
205
+ }
206
+
207
+ const lines = renderAlignedTable({
208
+ rows: desktopWindows,
209
+ columns: [
210
+ { header: pc.bold('id'), value: (row) => { return String(row.id) }, align: 'right' },
211
+ { header: pc.bold('app'), value: (row) => { return row.ownerName } },
212
+ { header: pc.bold('pid'), value: (row) => { return String(row.ownerPid) }, align: 'right' },
213
+ { header: pc.bold('size'), value: (row) => { return `${row.width}x${row.height}` }, align: 'right' },
214
+ { header: pc.bold('position'), value: (row) => { return `${row.x},${row.y}` }, align: 'right' },
215
+ { header: pc.bold('title'), value: (row) => { return row.title } },
216
+ ],
217
+ })
218
+ lines.forEach((line) => {
219
+ printLine(line)
220
+ })
221
+ })
222
+ }
223
+
224
+ function printWindowList({ windows }: { windows: WindowInfo[] }) {
225
+ const lines = renderAlignedTable({
226
+ rows: windows,
227
+ columns: [
228
+ { header: pc.bold('id'), value: (row) => { return String(row.id) }, align: 'right' },
229
+ { header: pc.bold('desktop'), value: (row) => { return `#${row.desktopIndex}` }, align: 'right' },
230
+ { header: pc.bold('app'), value: (row) => { return row.ownerName } },
231
+ { header: pc.bold('pid'), value: (row) => { return String(row.ownerPid) }, align: 'right' },
232
+ { header: pc.bold('size'), value: (row) => { return `${row.width}x${row.height}` }, align: 'right' },
233
+ { header: pc.bold('position'), value: (row) => { return `${row.x},${row.y}` }, align: 'right' },
234
+ { header: pc.bold('title'), value: (row) => { return row.title } },
235
+ ],
236
+ })
237
+ lines.forEach((line) => {
238
+ printLine(line)
239
+ })
240
+ }
241
+
63
242
  function notImplemented({ command }: { command: string }): never {
64
243
  throw new Error(`TODO not implemented: ${command}`)
65
244
  }
@@ -81,27 +260,27 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
81
260
  '--display [display]',
82
261
  z.number().describe('Display index for multi-monitor setups (0-based: first display is index 0)'),
83
262
  )
263
+ .option('--window [window]', z.number().describe('Capture a specific window by window id'))
84
264
  .option('--annotate', 'Annotate screenshot with labels')
85
265
  .option('--json', 'Output as JSON')
86
266
  .action(async (path, options) => {
87
- const outputPath = path
88
- ? path.startsWith('/')
89
- ? path
90
- : `${process.cwd()}/${path}`
91
- : undefined
92
-
93
- if (path) {
94
- const parentDirectory = pathModule.dirname(outputPath)
95
- fs.mkdirSync(parentDirectory, { recursive: true })
96
- }
267
+ const outputPath = resolveOutputPath({ path })
268
+ ensureParentDirectory({ filePath: outputPath })
97
269
  const region = options.region ? parseRegion(options.region) : undefined
98
270
  if (region instanceof Error) {
99
271
  throw region
100
272
  }
273
+ if (typeof options.window === 'number' && region) {
274
+ throw new Error('Cannot use --window and --region together')
275
+ }
276
+ if (typeof options.window === 'number' && typeof options.display === 'number') {
277
+ throw new Error('Cannot use --window and --display together')
278
+ }
101
279
  const result = await bridge.screenshot({
102
280
  path: outputPath,
103
281
  region,
104
282
  display: options.display,
283
+ window: options.window,
105
284
  annotate: options.annotate,
106
285
  })
107
286
  if (options.json) {
@@ -109,15 +288,31 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
109
288
  return
110
289
  }
111
290
  printLine(result.path)
291
+ printLine(result.hint)
292
+ printLine(`desktop-index=${String(result.desktopIndex)}`)
112
293
  })
113
294
 
114
295
  cli
115
- .command('click [target]', 'Click at coordinates')
116
- .option('-x [x]', z.number().describe('X coordinate'))
117
- .option('-y [y]', z.number().describe('Y coordinate'))
296
+ .command(
297
+ 'click [target]',
298
+ dedent`
299
+ Click at coordinates.
300
+
301
+ When you are clicking from a screenshot, use the exact pixel coordinates
302
+ of the target in that screenshot image and always pass the exact
303
+ --coord-map value printed by usecomputer screenshot. The coord map
304
+ scales screenshot-space pixels back into the real captured desktop or
305
+ window rectangle before sending the native click.
306
+ `,
307
+ )
308
+ .option('-x [x]', z.number().describe('X coordinate. When using --coord-map, this must be the exact pixel from the screenshot image'))
309
+ .option('-y [y]', z.number().describe('Y coordinate. When using --coord-map, this must be the exact pixel from the screenshot image'))
118
310
  .option('--button [button]', z.enum(['left', 'right', 'middle']).default('left').describe('Mouse button'))
119
311
  .option('--count [count]', z.number().default(1).describe('Number of clicks'))
120
312
  .option('--modifiers [modifiers]', z.string().describe('Modifiers as ctrl,shift,alt,meta'))
313
+ .option('--coord-map [coordMap]', z.string().describe('Map exact screenshot-space pixels back into the real captured desktop or window rectangle'))
314
+ .example('# Click the exact pixel you saw in a screenshot')
315
+ .example('usecomputer click -x 155 -y 446 --coord-map "0,0,1720,1440,1568,1313"')
121
316
  .action(async (target, options) => {
122
317
  const point = resolvePointInput({
123
318
  x: options.x,
@@ -125,8 +320,9 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
125
320
  target,
126
321
  command: 'click',
127
322
  })
323
+ const coordMap = parseCoordMapOrThrow(options.coordMap)
128
324
  await bridge.click({
129
- point,
325
+ point: mapPointFromCoordMap({ point, coordMap }),
130
326
  button: options.button,
131
327
  count: options.count,
132
328
  modifiers: parseModifiers(options.modifiers),
@@ -134,16 +330,153 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
134
330
  })
135
331
 
136
332
  cli
137
- .command('type <text>', 'Type text in the focused element')
138
- .option('--delay [delay]', z.number().describe('Delay in milliseconds between keystrokes'))
333
+ .command(
334
+ 'debug-point [target]',
335
+ dedent`
336
+ Capture a screenshot and draw a red marker where a click would land.
337
+
338
+ Pass the same --coord-map you plan to use for click. This validates
339
+ screenshot-space coordinates before you send a real click. When
340
+ --coord-map is present, debug-point captures that same region so the
341
+ overlay matches the screenshot you are targeting.
342
+ `,
343
+ )
344
+ .option('-x [x]', z.number().describe('X coordinate'))
345
+ .option('-y [y]', z.number().describe('Y coordinate'))
346
+ .option('--coord-map [coordMap]', z.string().describe('Map input coordinates from screenshot space'))
347
+ .option('--output [path]', z.string().describe('Write the annotated screenshot to this path'))
348
+ .option('--json', 'Output as JSON')
349
+ .example('# Validate the same coordinates you plan to click')
350
+ .example('usecomputer debug-point -x 210 -y 560 --coord-map "0,0,1720,1440,1568,1313"')
351
+ .action(async (target, options) => {
352
+ const point = resolvePointInput({
353
+ x: options.x,
354
+ y: options.y,
355
+ target,
356
+ command: 'debug-point',
357
+ })
358
+ const inputCoordMap = parseCoordMapOrThrow(options.coordMap)
359
+ const desktopPoint = mapPointFromCoordMap({ point, coordMap: inputCoordMap })
360
+ const outputPath = resolveOutputPath({ path: options.output ?? './tmp/debug-point.png' })
361
+ ensureParentDirectory({ filePath: outputPath })
362
+ const screenshotRegion = getRegionFromCoordMap({ coordMap: inputCoordMap })
363
+
364
+ const screenshot = await bridge.screenshot({
365
+ path: outputPath,
366
+ region: screenshotRegion,
367
+ })
368
+ const screenshotCoordMap = parseCoordMapOrThrow(screenshot.coordMap)
369
+ const screenshotPoint = mapPointToCoordMap({ point: desktopPoint, coordMap: screenshotCoordMap })
370
+
371
+ await drawDebugPointOnImage({
372
+ imagePath: screenshot.path,
373
+ point: screenshotPoint,
374
+ imageWidth: screenshot.imageWidth,
375
+ imageHeight: screenshot.imageHeight,
376
+ })
377
+
378
+ if (options.json) {
379
+ printJson({
380
+ path: screenshot.path,
381
+ inputPoint: point,
382
+ desktopPoint,
383
+ screenshotPoint,
384
+ inputCoordMap: options.coordMap ?? null,
385
+ screenshotCoordMap: screenshot.coordMap,
386
+ hint: screenshot.hint,
387
+ })
388
+ return
389
+ }
390
+
391
+ printLine(screenshot.path)
392
+ printLine(`input-point=${point.x},${point.y}`)
393
+ printLine(`desktop-point=${desktopPoint.x},${desktopPoint.y}`)
394
+ printLine(`screenshot-point=${screenshotPoint.x},${screenshotPoint.y}`)
395
+ printLine(screenshot.hint)
396
+ })
397
+
398
+ cli
399
+ .command(
400
+ 'type [text]',
401
+ dedent`
402
+ Type text in the currently focused input.
403
+
404
+ Supports direct text arguments or --stdin for long/multiline content.
405
+ For very long text, use --chunk-size to split input into multiple native
406
+ type calls so shells and apps are less likely to drop input.
407
+ `,
408
+ )
409
+ .option('--stdin', 'Read text from stdin instead of [text] argument')
410
+ .option('--delay [delay]', z.number().describe('Delay in milliseconds between typed characters'))
411
+ .option('--chunk-size [size]', z.number().describe('Split text into fixed-size chunks before typing'))
412
+ .option('--chunk-delay [delay]', z.number().describe('Delay in milliseconds between chunks'))
413
+ .option('--max-length [length]', z.number().describe('Fail when input text exceeds this maximum length'))
414
+ .example('# Type a short string')
415
+ .example('usecomputer type "hello"')
416
+ .example('# Type multiline text from a file')
417
+ .example('cat ./notes.txt | usecomputer type --stdin --chunk-size 4000 --chunk-delay 15')
139
418
  .action(async (text, options) => {
140
- await bridge.typeText({ text, delayMs: options.delay })
419
+ const fromStdin = Boolean(options.stdin)
420
+ if (fromStdin && text) {
421
+ throw new Error('Use either [text] or --stdin, not both')
422
+ }
423
+ if (!fromStdin && !text) {
424
+ throw new Error('Command "type" requires [text] or --stdin')
425
+ }
426
+
427
+ const sourceText = fromStdin ? readTextFromStdin() : text ?? ''
428
+ const chunkSize = parsePositiveInteger({
429
+ value: options.chunkSize,
430
+ option: '--chunk-size',
431
+ })
432
+ const maxLength = parsePositiveInteger({
433
+ value: options.maxLength,
434
+ option: '--max-length',
435
+ })
436
+ const chunkDelay = parsePositiveInteger({
437
+ value: options.chunkDelay,
438
+ option: '--chunk-delay',
439
+ })
440
+
441
+ if (typeof maxLength === 'number' && sourceText.length > maxLength) {
442
+ throw new Error(`Input text length ${String(sourceText.length)} exceeds --max-length ${String(maxLength)}`)
443
+ }
444
+
445
+ const chunks = splitIntoChunks({
446
+ text: sourceText,
447
+ chunkSize,
448
+ })
449
+ await chunks.reduce(async (previousChunk, chunk, index) => {
450
+ await previousChunk
451
+ await bridge.typeText({
452
+ text: chunk,
453
+ delayMs: options.delay,
454
+ })
455
+ if (typeof chunkDelay === 'number' && index < chunks.length - 1) {
456
+ await sleep({ ms: chunkDelay })
457
+ }
458
+ }, Promise.resolve())
141
459
  })
142
460
 
143
461
  cli
144
- .command('press <key>', 'Press a key or key combo')
462
+ .command(
463
+ 'press <key>',
464
+ dedent`
465
+ Press a key or key combo in the focused app.
466
+
467
+ Key combos use plus syntax such as cmd+s or ctrl+shift+p.
468
+ Platform behavior: cmd maps to Command on macOS, Win/Super on
469
+ Windows/Linux. For cross-platform app shortcuts, prefer ctrl+... .
470
+ `,
471
+ )
145
472
  .option('--count [count]', z.number().default(1).describe('How many times to press'))
146
473
  .option('--delay [delay]', z.number().describe('Delay between presses in milliseconds'))
474
+ .example('# Save in the current app on macOS')
475
+ .example('usecomputer press "cmd+s"')
476
+ .example('# Portable save shortcut across most apps')
477
+ .example('usecomputer press "ctrl+s"')
478
+ .example('# Open command palette in many editors')
479
+ .example('usecomputer press "cmd+shift+p"')
147
480
  .action(async (key, options) => {
148
481
  await bridge.press({ key, count: options.count, delayMs: options.delay })
149
482
  })
@@ -172,10 +505,12 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
172
505
  .command('drag <from> <to>', 'Drag from one coordinate to another')
173
506
  .option('--duration [duration]', z.number().describe('Duration in milliseconds'))
174
507
  .option('--button [button]', z.enum(['left', 'right', 'middle']).default('left').describe('Mouse button'))
508
+ .option('--coord-map [coordMap]', z.string().describe('Map input coordinates from screenshot space'))
175
509
  .action(async (from, to, options) => {
510
+ const coordMap = parseCoordMapOrThrow(options.coordMap)
176
511
  await bridge.drag({
177
- from: parsePointOrThrow(from),
178
- to: parsePointOrThrow(to),
512
+ from: mapPointFromCoordMap({ point: parsePointOrThrow(from), coordMap }),
513
+ to: mapPointFromCoordMap({ point: parsePointOrThrow(to), coordMap }),
179
514
  durationMs: options.duration,
180
515
  button: options.button,
181
516
  })
@@ -185,6 +520,7 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
185
520
  .command('hover [target]', 'Move mouse cursor to coordinates without clicking')
186
521
  .option('-x [x]', z.number().describe('X coordinate'))
187
522
  .option('-y [y]', z.number().describe('Y coordinate'))
523
+ .option('--coord-map [coordMap]', z.string().describe('Map input coordinates from screenshot space'))
188
524
  .action(async (target, options) => {
189
525
  const point = resolvePointInput({
190
526
  x: options.x,
@@ -192,13 +528,15 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
192
528
  target,
193
529
  command: 'hover',
194
530
  })
195
- await bridge.hover(point)
531
+ const coordMap = parseCoordMapOrThrow(options.coordMap)
532
+ await bridge.hover(mapPointFromCoordMap({ point, coordMap }))
196
533
  })
197
534
 
198
535
  cli
199
- .command('mouse move [x] [y]', 'Move mouse cursor to absolute coordinates')
536
+ .command('mouse move [x] [y]', 'Move mouse cursor to absolute coordinates (optional before click; click can target coordinates directly)')
200
537
  .option('-x [x]', z.number().describe('X coordinate'))
201
538
  .option('-y [y]', z.number().describe('Y coordinate'))
539
+ .option('--coord-map [coordMap]', z.string().describe('Map input coordinates from screenshot space'))
202
540
  .action(async (x, y, options) => {
203
541
  const point = resolvePointInput({
204
542
  x: options.x,
@@ -206,7 +544,8 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
206
544
  target: x && y ? `${x},${y}` : undefined,
207
545
  command: 'mouse move',
208
546
  })
209
- await bridge.mouseMove(point)
547
+ const coordMap = parseCoordMapOrThrow(options.coordMap)
548
+ await bridge.mouseMove(mapPointFromCoordMap({ point, coordMap }))
210
549
  })
211
550
 
212
551
  cli
@@ -244,12 +583,29 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
244
583
  printJson(displays)
245
584
  return
246
585
  }
247
- displays.forEach((display) => {
248
- const primary = display.isPrimary ? ' (primary)' : ''
249
- printLine(
250
- `#${display.id} ${display.name}${primary} ${display.width}x${display.height} @ (${display.x},${display.y}) scale=${display.scale}`,
251
- )
252
- })
586
+ printDesktopList({ displays })
587
+ })
588
+
589
+ cli
590
+ .command('desktop list', 'List desktops as display indexes and sizes (#0 is the primary display)')
591
+ .option('--windows', 'Include available windows grouped by desktop index')
592
+ .option('--json', 'Output as JSON')
593
+ .action(async (options) => {
594
+ const displays = await bridge.displayList()
595
+ const windows = options.windows ? await bridge.windowList() : []
596
+ if (options.json) {
597
+ if (options.windows) {
598
+ printJson({ displays, windows })
599
+ return
600
+ }
601
+ printJson(displays)
602
+ return
603
+ }
604
+ if (options.windows) {
605
+ printDesktopListWithWindows({ displays, windows })
606
+ return
607
+ }
608
+ printDesktopList({ displays })
253
609
  })
254
610
 
255
611
  cli
@@ -283,8 +639,13 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
283
639
  cli.command('get focused').action(() => {
284
640
  notImplemented({ command: 'get focused' })
285
641
  })
286
- cli.command('window list').action(() => {
287
- notImplemented({ command: 'window list' })
642
+ cli.command('window list').option('--json', 'Output as JSON').action(async (options) => {
643
+ const windows = await bridge.windowList()
644
+ if (options.json) {
645
+ printJson(windows)
646
+ return
647
+ }
648
+ printWindowList({ windows })
288
649
  })
289
650
  cli.command('window focus <target>').action(() => {
290
651
  notImplemented({ command: 'window focus' })