usecomputer 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/CHANGELOG.md +18 -0
  2. package/README.md +37 -0
  3. package/bin.js +3 -1
  4. package/build.zig +52 -0
  5. package/build.zig.zon +21 -0
  6. package/dist/bridge-contract.test.d.ts +2 -0
  7. package/dist/bridge-contract.test.d.ts.map +1 -0
  8. package/dist/bridge-contract.test.js +74 -0
  9. package/dist/bridge.d.ts +7 -0
  10. package/dist/bridge.d.ts.map +1 -0
  11. package/dist/bridge.js +130 -0
  12. package/dist/cli-parsing.test.d.ts +2 -0
  13. package/dist/cli-parsing.test.d.ts.map +1 -0
  14. package/dist/cli-parsing.test.js +30 -0
  15. package/dist/cli.d.ts +5 -1
  16. package/dist/cli.d.ts.map +1 -1
  17. package/dist/cli.js +286 -335
  18. package/dist/command-parsers.d.ts +6 -0
  19. package/dist/command-parsers.d.ts.map +1 -0
  20. package/dist/command-parsers.js +54 -0
  21. package/dist/command-parsers.test.d.ts +2 -0
  22. package/dist/command-parsers.test.d.ts.map +1 -0
  23. package/dist/command-parsers.test.js +44 -0
  24. package/dist/darwin-arm64/usecomputer.node +0 -0
  25. package/dist/darwin-x64/usecomputer.node +0 -0
  26. package/dist/index.d.ts +4 -1
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +5 -4
  29. package/dist/native-click-smoke.test.d.ts +2 -0
  30. package/dist/native-click-smoke.test.d.ts.map +1 -0
  31. package/dist/native-click-smoke.test.js +93 -0
  32. package/dist/native-lib.cjs +33 -0
  33. package/dist/native-lib.d.cts +7 -0
  34. package/dist/native-lib.d.ts +5 -0
  35. package/dist/native-lib.d.ts.map +1 -0
  36. package/dist/native-lib.js +27 -0
  37. package/dist/types.d.ts +80 -0
  38. package/dist/types.d.ts.map +1 -0
  39. package/dist/types.js +2 -0
  40. package/package.json +23 -12
  41. package/src/bridge-contract.test.ts +85 -0
  42. package/src/bridge.ts +159 -0
  43. package/src/cli.ts +329 -473
  44. package/src/command-parsers.test.ts +50 -0
  45. package/src/command-parsers.ts +60 -0
  46. package/src/index.ts +5 -4
  47. package/src/native-click-smoke.test.ts +131 -0
  48. package/src/native-lib.ts +38 -0
  49. package/src/types.ts +87 -0
  50. package/zig/src/lib.zig +367 -0
package/src/cli.ts CHANGED
@@ -1,478 +1,334 @@
1
- // usecomputer CLI computer automation for AI agents.
2
- // Outline only. Commands print "not implemented" placeholders.
1
+ // usecomputer CLI entrypoint and command wiring for desktop automation actions.
3
2
 
4
3
  import { goke } from 'goke'
5
4
  import { z } from 'zod'
6
5
  import dedent from 'string-dedent'
7
- import pkg from '../package.json' with { type: 'json' }
6
+ import { createRequire } from 'node:module'
7
+ import url from 'node:url'
8
+ import { createBridge } from './bridge.js'
9
+ import { parseDirection, parseModifiers, parsePoint, parseRegion } from './command-parsers.js'
10
+ import type { MouseButton, Point, UseComputerBridge } from './types.js'
11
+
12
+ const require = createRequire(import.meta.url)
13
+ const packageJson = require('../package.json') as { version: string }
14
+
15
+ function printJson(value: unknown): void {
16
+ process.stdout.write(`${JSON.stringify(value, null, 2)}\n`)
17
+ }
18
+
19
+ function printLine(value: string): void {
20
+ process.stdout.write(`${value}\n`)
21
+ }
22
+
23
+ function parsePointOrThrow(input: string): Point {
24
+ const parsed = parsePoint(input)
25
+ if (parsed instanceof Error) {
26
+ throw parsed
27
+ }
28
+ return parsed
29
+ }
30
+
31
+ function resolvePointInput({
32
+ x,
33
+ y,
34
+ target,
35
+ command,
36
+ }: {
37
+ x?: number
38
+ y?: number
39
+ target?: string
40
+ command: string
41
+ }): Point {
42
+ if (typeof x === 'number' || typeof y === 'number') {
43
+ if (typeof x !== 'number' || typeof y !== 'number') {
44
+ throw new Error(`Command \"${command}\" requires both -x and -y when using coordinate flags`)
45
+ }
46
+ return { x, y }
47
+ }
48
+ if (target) {
49
+ return parsePointOrThrow(target)
50
+ }
51
+ throw new Error(`Command \"${command}\" requires coordinates. Use -x <n> -y <n>`)
52
+ }
53
+
54
+ function parseButton(input?: string): MouseButton {
55
+ if (input === 'right' || input === 'middle') {
56
+ return input
57
+ }
58
+ return 'left'
59
+ }
60
+
61
+ function notImplemented({ command }: { command: string }): never {
62
+ throw new Error(`Command \"${command}\" is not implemented yet`)
63
+ }
64
+
65
+ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBridge } = {}) {
66
+ const cli = goke('usecomputer')
67
+
68
+ cli
69
+ .command(
70
+ 'screenshot [path]',
71
+ dedent`
72
+ Take a screenshot of the entire screen or a region.
73
+
74
+ This command uses a native Zig backend over macOS APIs.
75
+ `,
76
+ )
77
+ .option('-r, --region [region]', z.string().describe('Capture region as x,y,width,height'))
78
+ .option('--display [display]', z.number().describe('Display index for multi-monitor setups'))
79
+ .option('--annotate', 'Annotate screenshot with labels')
80
+ .option('--json', 'Output as JSON')
81
+ .action(async (path, options) => {
82
+ const region = options.region ? parseRegion(options.region) : undefined
83
+ if (region instanceof Error) {
84
+ throw region
85
+ }
86
+ const result = await bridge.screenshot({
87
+ path,
88
+ region,
89
+ display: options.display,
90
+ annotate: options.annotate,
91
+ })
92
+ if (options.json) {
93
+ printJson(result)
94
+ return
95
+ }
96
+ printLine(result.path)
97
+ })
98
+
99
+ cli
100
+ .command('click [target]', 'Click at coordinates')
101
+ .option('-x [x]', z.number().describe('X coordinate'))
102
+ .option('-y [y]', z.number().describe('Y coordinate'))
103
+ .option('--button [button]', z.enum(['left', 'right', 'middle']).default('left').describe('Mouse button'))
104
+ .option('--count [count]', z.number().default(1).describe('Number of clicks'))
105
+ .option('--modifiers [modifiers]', z.string().describe('Modifiers as ctrl,shift,alt,meta'))
106
+ .action(async (target, options) => {
107
+ const point = resolvePointInput({
108
+ x: options.x,
109
+ y: options.y,
110
+ target,
111
+ command: 'click',
112
+ })
113
+ await bridge.click({
114
+ point,
115
+ button: options.button,
116
+ count: options.count,
117
+ modifiers: parseModifiers(options.modifiers),
118
+ })
119
+ })
120
+
121
+ cli
122
+ .command('type <text>', 'Type text in the focused element')
123
+ .option('--delay [delay]', z.number().describe('Delay in milliseconds between keystrokes'))
124
+ .action(async (text, options) => {
125
+ await bridge.typeText({ text, delayMs: options.delay })
126
+ })
127
+
128
+ cli
129
+ .command('press <key>', 'Press a key or key combo')
130
+ .option('--count [count]', z.number().default(1).describe('How many times to press'))
131
+ .option('--delay [delay]', z.number().describe('Delay between presses in milliseconds'))
132
+ .action(async (key, options) => {
133
+ await bridge.press({ key, count: options.count, delayMs: options.delay })
134
+ })
135
+
136
+ cli
137
+ .command('scroll <direction> [amount]', 'Scroll in a direction')
138
+ .option('--at [at]', z.string().describe('Coordinates x,y where scroll happens'))
139
+ .action(async (direction, amount, options) => {
140
+ const parsedDirection = parseDirection(direction)
141
+ if (parsedDirection instanceof Error) {
142
+ throw parsedDirection
143
+ }
144
+ const at = options.at ? parsePointOrThrow(options.at) : undefined
145
+ const scrollAmount = amount ? Number(amount) : 300
146
+ if (!Number.isFinite(scrollAmount)) {
147
+ throw new Error(`Invalid amount \"${amount}\"`)
148
+ }
149
+ await bridge.scroll({
150
+ direction: parsedDirection,
151
+ amount: scrollAmount,
152
+ at,
153
+ })
154
+ })
155
+
156
+ cli
157
+ .command('drag <from> <to>', 'Drag from one coordinate to another')
158
+ .option('--duration [duration]', z.number().describe('Duration in milliseconds'))
159
+ .option('--button [button]', z.enum(['left', 'right', 'middle']).default('left').describe('Mouse button'))
160
+ .action(async (from, to, options) => {
161
+ await bridge.drag({
162
+ from: parsePointOrThrow(from),
163
+ to: parsePointOrThrow(to),
164
+ durationMs: options.duration,
165
+ button: options.button,
166
+ })
167
+ })
168
+
169
+ cli
170
+ .command('hover [target]', 'Move mouse cursor to coordinates without clicking')
171
+ .option('-x [x]', z.number().describe('X coordinate'))
172
+ .option('-y [y]', z.number().describe('Y coordinate'))
173
+ .action(async (target, options) => {
174
+ const point = resolvePointInput({
175
+ x: options.x,
176
+ y: options.y,
177
+ target,
178
+ command: 'hover',
179
+ })
180
+ await bridge.hover(point)
181
+ })
182
+
183
+ cli
184
+ .command('mouse move [x] [y]', 'Move mouse cursor to absolute coordinates')
185
+ .option('-x [x]', z.number().describe('X coordinate'))
186
+ .option('-y [y]', z.number().describe('Y coordinate'))
187
+ .action(async (x, y, options) => {
188
+ const point = resolvePointInput({
189
+ x: options.x,
190
+ y: options.y,
191
+ target: x && y ? `${x},${y}` : undefined,
192
+ command: 'mouse move',
193
+ })
194
+ await bridge.mouseMove(point)
195
+ })
196
+
197
+ cli
198
+ .command('mouse down', 'Press and hold mouse button')
199
+ .option('--button [button]', z.enum(['left', 'right', 'middle']).default('left').describe('Mouse button'))
200
+ .action(async (options) => {
201
+ await bridge.mouseDown({ button: parseButton(options.button) })
202
+ })
203
+
204
+ cli
205
+ .command('mouse up', 'Release mouse button')
206
+ .option('--button [button]', z.enum(['left', 'right', 'middle']).default('left').describe('Mouse button'))
207
+ .action(async (options) => {
208
+ await bridge.mouseUp({ button: parseButton(options.button) })
209
+ })
210
+
211
+ cli
212
+ .command('mouse position', 'Print current mouse position as x,y')
213
+ .option('--json', 'Output as JSON')
214
+ .action(async (options) => {
215
+ const position = await bridge.mousePosition()
216
+ if (options.json) {
217
+ printJson(position)
218
+ return
219
+ }
220
+ printLine(`${position.x},${position.y}`)
221
+ })
222
+
223
+ cli
224
+ .command('display list', 'List connected displays')
225
+ .option('--json', 'Output as JSON')
226
+ .action(async (options) => {
227
+ const displays = await bridge.displayList()
228
+ if (options.json) {
229
+ printJson(displays)
230
+ return
231
+ }
232
+ displays.forEach((display) => {
233
+ const primary = display.isPrimary ? ' (primary)' : ''
234
+ printLine(
235
+ `#${display.id} ${display.name}${primary} ${display.width}x${display.height} @ (${display.x},${display.y}) scale=${display.scale}`,
236
+ )
237
+ })
238
+ })
239
+
240
+ cli
241
+ .command('clipboard get', 'Print clipboard text')
242
+ .action(async () => {
243
+ const text = await bridge.clipboardGet()
244
+ printLine(text)
245
+ })
246
+
247
+ cli
248
+ .command('clipboard set <text>', 'Set clipboard text')
249
+ .action(async (text) => {
250
+ await bridge.clipboardSet({ text })
251
+ })
252
+
253
+ cli.command('snapshot').action(() => {
254
+ notImplemented({ command: 'snapshot' })
255
+ })
256
+ cli.command('get text <target>').action(() => {
257
+ notImplemented({ command: 'get text' })
258
+ })
259
+ cli.command('get title <target>').action(() => {
260
+ notImplemented({ command: 'get title' })
261
+ })
262
+ cli.command('get value <target>').action(() => {
263
+ notImplemented({ command: 'get value' })
264
+ })
265
+ cli.command('get bounds <target>').action(() => {
266
+ notImplemented({ command: 'get bounds' })
267
+ })
268
+ cli.command('get focused').action(() => {
269
+ notImplemented({ command: 'get focused' })
270
+ })
271
+ cli.command('window list').action(() => {
272
+ notImplemented({ command: 'window list' })
273
+ })
274
+ cli.command('window focus <target>').action(() => {
275
+ notImplemented({ command: 'window focus' })
276
+ })
277
+ cli.command('window resize <target> <width> <height>').action(() => {
278
+ notImplemented({ command: 'window resize' })
279
+ })
280
+ cli.command('window move <target> <x> <y>').action(() => {
281
+ notImplemented({ command: 'window move' })
282
+ })
283
+ cli.command('window minimize <target>').action(() => {
284
+ notImplemented({ command: 'window minimize' })
285
+ })
286
+ cli.command('window maximize <target>').action(() => {
287
+ notImplemented({ command: 'window maximize' })
288
+ })
289
+ cli.command('window close <target>').action(() => {
290
+ notImplemented({ command: 'window close' })
291
+ })
292
+ cli.command('app list').action(() => {
293
+ notImplemented({ command: 'app list' })
294
+ })
295
+ cli.command('app launch <name>').action(() => {
296
+ notImplemented({ command: 'app launch' })
297
+ })
298
+ cli.command('app quit <name>').action(() => {
299
+ notImplemented({ command: 'app quit' })
300
+ })
301
+ cli.command('wait <target>').action(() => {
302
+ notImplemented({ command: 'wait' })
303
+ })
304
+ cli.command('find <query>').action(() => {
305
+ notImplemented({ command: 'find' })
306
+ })
307
+ cli.command('diff snapshot').action(() => {
308
+ notImplemented({ command: 'diff snapshot' })
309
+ })
310
+ cli.command('diff screenshot').action(() => {
311
+ notImplemented({ command: 'diff screenshot' })
312
+ })
313
+
314
+ cli.help()
315
+ cli.version(packageJson.version)
316
+ return cli
317
+ }
318
+
319
+ export function runCli(): void {
320
+ const cli = createCli()
321
+ cli.parse()
322
+ }
323
+
324
+ const isDirectEntrypoint = (() => {
325
+ const argvPath = process.argv[1]
326
+ if (!argvPath) {
327
+ return false
328
+ }
329
+ return import.meta.url === url.pathToFileURL(argvPath).href
330
+ })()
8
331
 
9
- const cli = goke('usecomputer')
10
-
11
- // ─── Core Commands ──────────────────────────────────────────────────────
12
-
13
- cli
14
- .command(
15
- 'snapshot',
16
- dedent`
17
- Capture the accessibility tree of the desktop or a window.
18
-
19
- Uses native accessibility APIs (macOS AX, AT-SPI on Linux, UIA on Windows)
20
- to produce a structured tree of UI elements with roles, names, and ref IDs.
21
- Refs like @e1, @e2 can be used in click, type, and other commands.
22
- `,
23
- )
24
- .option('-w, --window [window]', z.string().describe('Target a specific window by title or ID'))
25
- .option('-a, --app [app]', z.string().describe('Target a specific application by name or bundle ID'))
26
- .option('-i, --interactive', 'Only show interactive elements (buttons, inputs, links)')
27
- .option('-c, --compact', 'Remove empty structural elements')
28
- .option('-d, --depth [depth]', z.number().describe('Limit tree depth'))
29
- .example('# Full desktop accessibility snapshot')
30
- .example('usecomputer snapshot')
31
- .example('# Interactive elements in a specific app')
32
- .example('usecomputer snapshot --app "Visual Studio Code" -i')
33
- .action((options) => {
34
- console.log('not implemented')
35
- })
36
-
37
- cli
38
- .command(
39
- 'screenshot [path]',
40
- dedent`
41
- Take a screenshot of the entire screen, a window, or a region.
42
-
43
- Saves as PNG. If no path is given, prints the file path of a temp file.
44
- Use --window or --app to capture a specific window.
45
- Use --region to capture a rectangular area (x,y,width,height).
46
- `,
47
- )
48
- .option('-w, --window [window]', z.string().describe('Capture a specific window by title or ID'))
49
- .option('-a, --app [app]', z.string().describe('Capture a specific application by name or bundle ID'))
50
- .option('-r, --region [region]', z.string().describe('Capture region as x,y,width,height'))
51
- .option('--display [display]', z.number().describe('Display/monitor index for multi-monitor setups'))
52
- .option('--annotate', 'Annotate screenshot with numbered labels on interactive elements')
53
- .example('# Screenshot entire screen')
54
- .example('usecomputer screenshot')
55
- .example('# Screenshot a specific app window')
56
- .example('usecomputer screenshot --app Finder ~/Desktop/finder.png')
57
- .example('# Screenshot a region')
58
- .example('usecomputer screenshot --region 100,200,800,600')
59
- .action((path, options) => {
60
- console.log('not implemented')
61
- })
62
-
63
- cli
64
- .command(
65
- 'click <target>',
66
- dedent`
67
- Click at a target. Target can be:
68
- - Coordinates: "500,300" (x,y pixels)
69
- - Accessibility ref: "@e2" (from a snapshot)
70
- - Text match: "Submit" (finds element by accessible name)
71
- `,
72
- )
73
- .option('--button [button]', z.enum(['left', 'right', 'middle']).default('left').describe('Mouse button'))
74
- .option('--count [count]', z.number().default(1).describe('Number of clicks (2 for double-click)'))
75
- .option('--modifiers [modifiers]', z.string().describe('Modifier keys held during click (ctrl,shift,alt,meta)'))
76
- .example('# Click at coordinates')
77
- .example('usecomputer click 500,300')
78
- .example('# Click an element from snapshot')
79
- .example('usecomputer click @e2')
80
- .example('# Right-click')
81
- .example('usecomputer click 500,300 --button right')
82
- .example('# Double-click')
83
- .example('usecomputer click @e5 --count 2')
84
- .action((target, options) => {
85
- console.log('not implemented')
86
- })
87
-
88
- cli
89
- .command(
90
- 'type <text>',
91
- dedent`
92
- Type text using keyboard input, as if the user is typing.
93
-
94
- Types each character sequentially with realistic key events.
95
- Works with the currently focused element. Use "click" first to focus.
96
- `,
97
- )
98
- .option('--delay [delay]', z.number().describe('Delay between keystrokes in milliseconds'))
99
- .example('# Type into the currently focused field')
100
- .example('usecomputer type "Hello, world!"')
101
- .action((text, options) => {
102
- console.log('not implemented')
103
- })
104
-
105
- cli
106
- .command(
107
- 'press <key>',
108
- dedent`
109
- Press a key or key combination.
110
-
111
- Supports modifier combos like "ctrl+c", "cmd+shift+s", "alt+tab".
112
- Key names: enter, tab, escape, space, backspace, delete, up, down,
113
- left, right, home, end, pageup, pagedown, f1-f12.
114
- `,
115
- )
116
- .option('--count [count]', z.number().default(1).describe('Number of times to press'))
117
- .option('--delay [delay]', z.number().describe('Delay between repeated presses in milliseconds'))
118
- .example('# Press Enter')
119
- .example('usecomputer press enter')
120
- .example('# Copy to clipboard')
121
- .example('usecomputer press cmd+c')
122
- .example('# Switch apps on macOS')
123
- .example('usecomputer press cmd+tab')
124
- .example('# Press Escape 3 times')
125
- .example('usecomputer press escape --count 3')
126
- .action((key, options) => {
127
- console.log('not implemented')
128
- })
129
-
130
- cli
131
- .command(
132
- 'scroll <direction> [amount]',
133
- dedent`
134
- Scroll in a direction. Amount is in pixels (default: 300).
135
-
136
- Directions: up, down, left, right.
137
- Scrolls at the current mouse position unless --at is specified.
138
- `,
139
- )
140
- .option('--at [at]', z.string().describe('Scroll at specific coordinates (x,y)'))
141
- .example('# Scroll down')
142
- .example('usecomputer scroll down')
143
- .example('# Scroll up 500px at a specific position')
144
- .example('usecomputer scroll up 500 --at 400,300')
145
- .action((direction, amount, options) => {
146
- console.log('not implemented')
147
- })
148
-
149
- cli
150
- .command(
151
- 'drag <from> <to>',
152
- dedent`
153
- Drag from one position to another.
154
-
155
- Positions are x,y coordinates or accessibility refs (@e1).
156
- Performs mouse-down at "from", moves to "to", then mouse-up.
157
- `,
158
- )
159
- .option('--duration [duration]', z.number().describe('Duration of the drag in milliseconds'))
160
- .example('# Drag from one position to another')
161
- .example('usecomputer drag 100,200 500,200')
162
- .example('# Drag an accessibility element')
163
- .example('usecomputer drag @e3 400,600')
164
- .action((from, to, options) => {
165
- console.log('not implemented')
166
- })
167
-
168
- cli
169
- .command(
170
- 'hover <target>',
171
- dedent`
172
- Move the mouse to a target without clicking.
173
-
174
- Target can be coordinates (x,y) or an accessibility ref (@e1).
175
- `,
176
- )
177
- .example('usecomputer hover 500,300')
178
- .example('usecomputer hover @e4')
179
- .action((target) => {
180
- console.log('not implemented')
181
- })
182
-
183
- // ─── Mouse Commands ─────────────────────────────────────────────────────
184
-
185
- cli
186
- .command('mouse move <x> <y>', 'Move mouse cursor to absolute screen coordinates.')
187
- .action((x, y) => {
188
- console.log('not implemented')
189
- })
190
-
191
- cli
192
- .command('mouse down', 'Press and hold mouse button.')
193
- .option('--button [button]', z.enum(['left', 'right', 'middle']).default('left').describe('Mouse button'))
194
- .action((options) => {
195
- console.log('not implemented')
196
- })
197
-
198
- cli
199
- .command('mouse up', 'Release mouse button.')
200
- .option('--button [button]', z.enum(['left', 'right', 'middle']).default('left').describe('Mouse button'))
201
- .action((options) => {
202
- console.log('not implemented')
203
- })
204
-
205
- cli
206
- .command('mouse position', 'Print the current mouse cursor position as x,y.')
207
- .action(() => {
208
- console.log('not implemented')
209
- })
210
-
211
- // ─── Get Info Commands ──────────────────────────────────────────────────
212
-
213
- cli
214
- .command(
215
- 'get text <target>',
216
- 'Get the accessible text content of an element. Target is an accessibility ref (@e1) or coordinates.',
217
- )
218
- .action((target) => {
219
- console.log('not implemented')
220
- })
221
-
222
- cli
223
- .command(
224
- 'get title <target>',
225
- 'Get the title/name of a window or element. Target is a ref, coordinates, or window ID.',
226
- )
227
- .action((target) => {
228
- console.log('not implemented')
229
- })
230
-
231
- cli
232
- .command(
233
- 'get value <target>',
234
- 'Get the current value of an input element (text fields, sliders, checkboxes).',
235
- )
236
- .action((target) => {
237
- console.log('not implemented')
238
- })
239
-
240
- cli
241
- .command(
242
- 'get bounds <target>',
243
- 'Get the bounding rectangle (x, y, width, height) of an element or window.',
244
- )
245
- .action((target) => {
246
- console.log('not implemented')
247
- })
248
-
249
- cli
250
- .command(
251
- 'get focused',
252
- 'Get the currently focused element and its accessibility info.',
253
- )
254
- .action(() => {
255
- console.log('not implemented')
256
- })
257
-
258
- // ─── Window Management ─────────────────────────────────────────────────
259
-
260
- cli
261
- .command(
262
- 'window list',
263
- 'List all open windows with their titles, apps, positions, and sizes.',
264
- )
265
- .option('--app [app]', z.string().describe('Filter by application name'))
266
- .option('--json', 'Output as JSON')
267
- .action((options) => {
268
- console.log('not implemented')
269
- })
270
-
271
- cli
272
- .command(
273
- 'window focus <target>',
274
- 'Bring a window to the foreground. Target is a window title, ID, or app name.',
275
- )
276
- .action((target) => {
277
- console.log('not implemented')
278
- })
279
-
280
- cli
281
- .command(
282
- 'window resize <target> <width> <height>',
283
- 'Resize a window. Target is a window title, ID, or app name.',
284
- )
285
- .action((target, width, height) => {
286
- console.log('not implemented')
287
- })
288
-
289
- cli
290
- .command(
291
- 'window move <target> <x> <y>',
292
- 'Move a window to absolute screen coordinates.',
293
- )
294
- .action((target, x, y) => {
295
- console.log('not implemented')
296
- })
297
-
298
- cli
299
- .command(
300
- 'window minimize <target>',
301
- 'Minimize a window.',
302
- )
303
- .action((target) => {
304
- console.log('not implemented')
305
- })
306
-
307
- cli
308
- .command(
309
- 'window maximize <target>',
310
- 'Maximize/fullscreen a window.',
311
- )
312
- .action((target) => {
313
- console.log('not implemented')
314
- })
315
-
316
- cli
317
- .command(
318
- 'window close <target>',
319
- 'Close a window.',
320
- )
321
- .action((target) => {
322
- console.log('not implemented')
323
- })
324
-
325
- // ─── App Management ────────────────────────────────────────────────────
326
-
327
- cli
328
- .command(
329
- 'app list',
330
- 'List all running applications with their process IDs and window counts.',
331
- )
332
- .option('--json', 'Output as JSON')
333
- .action((options) => {
334
- console.log('not implemented')
335
- })
336
-
337
- cli
338
- .command(
339
- 'app launch <name>',
340
- dedent`
341
- Launch an application by name or path.
342
-
343
- On macOS: app name ("Safari"), bundle ID ("com.apple.Safari"), or path.
344
- On Linux: executable name or .desktop file.
345
- On Windows: executable name or Start Menu shortcut.
346
- `,
347
- )
348
- .option('--wait', 'Wait for the application window to appear before returning')
349
- .action((name, options) => {
350
- console.log('not implemented')
351
- })
352
-
353
- cli
354
- .command(
355
- 'app quit <name>',
356
- 'Quit an application gracefully by name or process ID.',
357
- )
358
- .option('--force', 'Force-kill the application if it does not quit gracefully')
359
- .action((name, options) => {
360
- console.log('not implemented')
361
- })
362
-
363
- // ─── Clipboard ──────────────────────────────────────────────────────────
364
-
365
- cli
366
- .command(
367
- 'clipboard get',
368
- 'Print the current clipboard text content.',
369
- )
370
- .action(() => {
371
- console.log('not implemented')
372
- })
373
-
374
- cli
375
- .command(
376
- 'clipboard set <text>',
377
- 'Set the clipboard content to the given text.',
378
- )
379
- .action((text) => {
380
- console.log('not implemented')
381
- })
382
-
383
- // ─── Wait ───────────────────────────────────────────────────────────────
384
-
385
- cli
386
- .command(
387
- 'wait <target>',
388
- dedent`
389
- Wait for a condition before continuing.
390
-
391
- Target can be:
392
- - Milliseconds: "2000" (wait 2 seconds)
393
- - Accessibility ref: "@e5" (wait for element to appear)
394
- - Window title: "--window Untitled" (wait for window to appear)
395
- `,
396
- )
397
- .option('-w, --window [window]', z.string().describe('Wait for a window with this title to appear'))
398
- .option('--timeout [timeout]', z.number().default(30000).describe('Maximum wait time in milliseconds'))
399
- .example('# Wait 2 seconds')
400
- .example('usecomputer wait 2000')
401
- .example('# Wait for an element to appear')
402
- .example('usecomputer wait @e5')
403
- .example('# Wait for a window to appear')
404
- .example('usecomputer wait --window "Save As"')
405
- .action((target, options) => {
406
- console.log('not implemented')
407
- })
408
-
409
- // ─── Display ────────────────────────────────────────────────────────────
410
-
411
- cli
412
- .command(
413
- 'display list',
414
- 'List connected displays with their resolutions, positions, and scale factors.',
415
- )
416
- .option('--json', 'Output as JSON')
417
- .action((options) => {
418
- console.log('not implemented')
419
- })
420
-
421
- // ─── Find Elements ──────────────────────────────────────────────────────
422
-
423
- cli
424
- .command(
425
- 'find <query>',
426
- dedent`
427
- Search for UI elements matching a text query across the accessibility tree.
428
-
429
- Returns matching elements with their refs, roles, and positions.
430
- Useful for locating elements before clicking or typing.
431
- `,
432
- )
433
- .option('-w, --window [window]', z.string().describe('Scope search to a specific window'))
434
- .option('-a, --app [app]', z.string().describe('Scope search to a specific application'))
435
- .option('--role [role]', z.string().describe('Filter by accessibility role (button, textField, link, etc.)'))
436
- .option('--limit [limit]', z.number().default(20).describe('Maximum number of results'))
437
- .example('# Find all buttons with "Save" in the name')
438
- .example('usecomputer find "Save" --role button')
439
- .example('# Find elements in a specific app')
440
- .example('usecomputer find "File" --app "Visual Studio Code"')
441
- .action((query, options) => {
442
- console.log('not implemented')
443
- })
444
-
445
- // ─── Diff ───────────────────────────────────────────────────────────────
446
-
447
- cli
448
- .command(
449
- 'diff snapshot',
450
- 'Compare the current accessibility snapshot against the previous one. Shows added, removed, and changed elements.',
451
- )
452
- .option('-w, --window [window]', z.string().describe('Scope to a specific window'))
453
- .option('-a, --app [app]', z.string().describe('Scope to a specific application'))
454
- .action((options) => {
455
- console.log('not implemented')
456
- })
457
-
458
- cli
459
- .command(
460
- 'diff screenshot',
461
- 'Compare the current screenshot against a baseline image. Highlights visual differences.',
462
- )
463
- .option('--baseline <baseline>', z.string().describe('Path to the baseline screenshot'))
464
- .option('--threshold [threshold]', z.number().default(0.1).describe('Pixel difference threshold (0-1)'))
465
- .action((options) => {
466
- console.log('not implemented')
467
- })
468
-
469
- // ─── Global Options ─────────────────────────────────────────────────────
470
-
471
- cli.option('--json', 'Output as JSON')
472
- cli.option('--display [display]', z.number().describe('Target display/monitor index for multi-monitor setups'))
473
- cli.option('--timeout [timeout]', z.number().default(25000).describe('Default timeout for operations in milliseconds'))
474
- cli.option('--debug', 'Enable debug output')
475
-
476
- cli.help()
477
- cli.version(pkg.version)
478
- cli.parse()
332
+ if (isDirectEntrypoint) {
333
+ runCli()
334
+ }