usecomputer 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,19 @@
4
4
 
5
5
  All notable changes to `usecomputer` will be documented in this file.
6
6
 
7
+ ## 0.0.3
8
+
9
+ - Implement real screenshot capture + PNG file writing on macOS.
10
+ - Screenshot path handling now uses the requested output path reliably.
11
+ - Unimplemented commands now return explicit `TODO not implemented: ...` errors.
12
+ - Clarify `--display` index behavior as 0-based in help/docs.
13
+
14
+ ## 0.0.2
15
+
16
+ - Publish macOS native binaries for both `darwin-arm64` and `darwin-x64`.
17
+ - Add package metadata/docs for npm distribution.
18
+ - Improve CLI coordinate input with `-x` / `-y` flags.
19
+
7
20
  ## 0.0.1
8
21
 
9
22
  - Initial npm package release for macOS.
package/README.md CHANGED
@@ -35,3 +35,17 @@ Commands that target coordinates accept `-x` and `-y` flags:
35
35
  - `usecomputer mouse move -x <n> -y <n>`
36
36
 
37
37
  Legacy coordinate forms are also accepted where available.
38
+
39
+ ## Display index options
40
+
41
+ For commands that accept `--display`, the index is 0-based:
42
+
43
+ - `0` = first display
44
+ - `1` = second display
45
+ - `2` = third display
46
+
47
+ Example:
48
+
49
+ ```bash
50
+ usecomputer screenshot ./shot.png --display 0 --json
51
+ ```
package/build.zig CHANGED
@@ -29,6 +29,7 @@ pub fn build(b: *std.Build) void {
29
29
  if (target.result.os.tag == .macos) {
30
30
  lib.root_module.linkFramework("CoreGraphics", .{});
31
31
  lib.root_module.linkFramework("CoreFoundation", .{});
32
+ lib.root_module.linkFramework("ImageIO", .{});
32
33
  }
33
34
 
34
35
  napigen.setup(lib);
package/dist/cli.d.ts.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":"AASA,OAAO,KAAK,EAAsB,iBAAiB,EAAE,MAAM,YAAY,CAAA;AAuDvE,wBAAgB,SAAS,CAAC,EAAE,MAAuB,EAAE,GAAE;IAAE,MAAM,CAAC,EAAE,iBAAiB,CAAA;CAAO,2BA4PzF;AAED,wBAAgB,MAAM,IAAI,IAAI,CAG7B"}
1
+ {"version":3,"file":"cli.d.ts","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAsB,iBAAiB,EAAE,MAAM,YAAY,CAAA;AAuDvE,wBAAgB,SAAS,CAAC,EAAE,MAAuB,EAAE,GAAE;IAAE,MAAM,CAAC,EAAE,iBAAiB,CAAA;CAAO,2BAyQzF;AAED,wBAAgB,MAAM,IAAI,IAAI,CAG7B"}
package/dist/cli.js CHANGED
@@ -3,6 +3,8 @@ import { goke } from 'goke';
3
3
  import { z } from 'zod';
4
4
  import dedent from 'string-dedent';
5
5
  import { createRequire } from 'node:module';
6
+ import fs from 'node:fs';
7
+ import pathModule from 'node:path';
6
8
  import url from 'node:url';
7
9
  import { createBridge } from './bridge.js';
8
10
  import { parseDirection, parseModifiers, parsePoint, parseRegion } from './command-parsers.js';
@@ -40,7 +42,7 @@ function parseButton(input) {
40
42
  return 'left';
41
43
  }
42
44
  function notImplemented({ command }) {
43
- throw new Error(`Command \"${command}\" is not implemented yet`);
45
+ throw new Error(`TODO not implemented: ${command}`);
44
46
  }
45
47
  export function createCli({ bridge = createBridge() } = {}) {
46
48
  const cli = goke('usecomputer');
@@ -51,16 +53,25 @@ export function createCli({ bridge = createBridge() } = {}) {
51
53
  This command uses a native Zig backend over macOS APIs.
52
54
  `)
53
55
  .option('-r, --region [region]', z.string().describe('Capture region as x,y,width,height'))
54
- .option('--display [display]', z.number().describe('Display index for multi-monitor setups'))
56
+ .option('--display [display]', z.number().describe('Display index for multi-monitor setups (0-based: first display is index 0)'))
55
57
  .option('--annotate', 'Annotate screenshot with labels')
56
58
  .option('--json', 'Output as JSON')
57
59
  .action(async (path, options) => {
60
+ const outputPath = path
61
+ ? path.startsWith('/')
62
+ ? path
63
+ : `${process.cwd()}/${path}`
64
+ : undefined;
65
+ if (path) {
66
+ const parentDirectory = pathModule.dirname(outputPath);
67
+ fs.mkdirSync(parentDirectory, { recursive: true });
68
+ }
58
69
  const region = options.region ? parseRegion(options.region) : undefined;
59
70
  if (region instanceof Error) {
60
71
  throw region;
61
72
  }
62
73
  const result = await bridge.screenshot({
63
- path,
74
+ path: outputPath,
64
75
  region,
65
76
  display: options.display,
66
77
  annotate: options.annotate,
Binary file
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "usecomputer",
3
- "version": "0.0.2",
3
+ "version": "0.0.3",
4
4
  "type": "module",
5
5
  "description": "Fast computer automation CLI for AI agents. Control any desktop with accessibility snapshots, clicks, typing, scrolling, and more.",
6
6
  "bin": "./bin.js",
package/src/cli.ts CHANGED
@@ -4,6 +4,8 @@ import { goke } from 'goke'
4
4
  import { z } from 'zod'
5
5
  import dedent from 'string-dedent'
6
6
  import { createRequire } from 'node:module'
7
+ import fs from 'node:fs'
8
+ import pathModule from 'node:path'
7
9
  import url from 'node:url'
8
10
  import { createBridge } from './bridge.js'
9
11
  import { parseDirection, parseModifiers, parsePoint, parseRegion } from './command-parsers.js'
@@ -59,7 +61,7 @@ function parseButton(input?: string): MouseButton {
59
61
  }
60
62
 
61
63
  function notImplemented({ command }: { command: string }): never {
62
- throw new Error(`Command \"${command}\" is not implemented yet`)
64
+ throw new Error(`TODO not implemented: ${command}`)
63
65
  }
64
66
 
65
67
  export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBridge } = {}) {
@@ -75,16 +77,29 @@ export function createCli({ bridge = createBridge() }: { bridge?: UseComputerBri
75
77
  `,
76
78
  )
77
79
  .option('-r, --region [region]', z.string().describe('Capture region as x,y,width,height'))
78
- .option('--display [display]', z.number().describe('Display index for multi-monitor setups'))
80
+ .option(
81
+ '--display [display]',
82
+ z.number().describe('Display index for multi-monitor setups (0-based: first display is index 0)'),
83
+ )
79
84
  .option('--annotate', 'Annotate screenshot with labels')
80
85
  .option('--json', 'Output as JSON')
81
86
  .action(async (path, options) => {
87
+ const outputPath = path
88
+ ? path.startsWith('/')
89
+ ? path
90
+ : `${process.cwd()}/${path}`
91
+ : undefined
92
+
93
+ if (path) {
94
+ const parentDirectory = pathModule.dirname(outputPath)
95
+ fs.mkdirSync(parentDirectory, { recursive: true })
96
+ }
82
97
  const region = options.region ? parseRegion(options.region) : undefined
83
98
  if (region instanceof Error) {
84
99
  throw region
85
100
  }
86
101
  const result = await bridge.screenshot({
87
- path,
102
+ path: outputPath,
88
103
  region,
89
104
  display: options.display,
90
105
  annotate: options.annotate,
package/zig/src/lib.zig CHANGED
@@ -8,6 +8,7 @@ const napigen = if (builtin.is_test) undefined else @import("napigen");
8
8
  const c = if (builtin.target.os.tag == .macos) @cImport({
9
9
  @cInclude("CoreGraphics/CoreGraphics.h");
10
10
  @cInclude("CoreFoundation/CoreFoundation.h");
11
+ @cInclude("ImageIO/ImageIO.h");
11
12
  }) else struct {};
12
13
 
13
14
  pub const std_options: std.Options = .{
@@ -54,7 +55,7 @@ fn execute(command: []const u8, payload_json: []const u8) ![]const u8 {
54
55
  return makeOkJson(allocator, "{\"text\":\"\"}");
55
56
  }
56
57
  if (std.mem.eql(u8, command, "screenshot")) {
57
- return makeOkJson(allocator, "{\"path\":\"./screenshot.png\"}");
58
+ return executeScreenshotCommand(allocator, payload_json);
58
59
  }
59
60
 
60
61
  if (
@@ -63,7 +64,8 @@ fn execute(command: []const u8, payload_json: []const u8) ![]const u8 {
63
64
  std.mem.eql(u8, command, "scroll") or
64
65
  std.mem.eql(u8, command, "clipboard-set")
65
66
  ) {
66
- return makeOkJson(allocator, "null");
67
+ const message = try std.fmt.allocPrint(allocator, "TODO not implemented: {s}", .{command});
68
+ return makeErrorJson(allocator, message);
67
69
  }
68
70
 
69
71
  return makeErrorJson(allocator, "unknown command");
@@ -140,6 +142,130 @@ const DragPayload = struct {
140
142
  button: ?[]const u8 = null,
141
143
  };
142
144
 
145
+ const ScreenshotRegion = struct {
146
+ x: f64,
147
+ y: f64,
148
+ width: f64,
149
+ height: f64,
150
+ };
151
+
152
+ const ScreenshotPayload = struct {
153
+ path: ?[]const u8 = null,
154
+ display: ?usize = null,
155
+ region: ?ScreenshotRegion = null,
156
+ };
157
+
158
+ fn executeScreenshotCommand(allocator: std.mem.Allocator, payload_json: []const u8) ![]const u8 {
159
+ if (builtin.target.os.tag != .macos) {
160
+ return makeErrorJson(allocator, "screenshot is only supported on macOS");
161
+ }
162
+
163
+ var parsed = std.json.parseFromSlice(ScreenshotPayload, allocator, payload_json, .{
164
+ .ignore_unknown_fields = true,
165
+ }) catch {
166
+ return makeErrorJson(allocator, "invalid screenshot payload json");
167
+ };
168
+ defer parsed.deinit();
169
+
170
+ const screenshot_payload = parsed.value;
171
+ const output_path = screenshot_payload.path orelse "./screenshot.png";
172
+
173
+ const image = createScreenshotImage(.{
174
+ .display_index = screenshot_payload.display,
175
+ .region = screenshot_payload.region,
176
+ }) catch {
177
+ return makeErrorJson(allocator, "failed to capture screenshot image");
178
+ };
179
+ defer c.CFRelease(image);
180
+
181
+ writeScreenshotPng(.{
182
+ .image = image,
183
+ .output_path = output_path,
184
+ }) catch {
185
+ return makeErrorJson(allocator, "failed to write screenshot file");
186
+ };
187
+
188
+ const path_json = try std.fmt.allocPrint(allocator, "\"{s}\"", .{output_path});
189
+ const payload_json_response = try std.fmt.allocPrint(allocator, "{{\"path\":{s}}}", .{path_json});
190
+ return makeOkJson(allocator, payload_json_response);
191
+ }
192
+
193
+ fn createScreenshotImage(input: struct {
194
+ display_index: ?usize,
195
+ region: ?ScreenshotRegion,
196
+ }) !c.CGImageRef {
197
+ const display_id = resolveDisplayId(input.display_index) catch {
198
+ return error.DisplayResolutionFailed;
199
+ };
200
+
201
+ if (input.region) |region| {
202
+ const rect: c.CGRect = .{
203
+ .origin = .{ .x = region.x, .y = region.y },
204
+ .size = .{ .width = region.width, .height = region.height },
205
+ };
206
+ const region_image = c.CGDisplayCreateImageForRect(display_id, rect);
207
+ if (region_image == null) {
208
+ return error.CaptureFailed;
209
+ }
210
+ return region_image;
211
+ }
212
+
213
+ const full_image = c.CGDisplayCreateImage(display_id);
214
+ if (full_image == null) {
215
+ return error.CaptureFailed;
216
+ }
217
+ return full_image;
218
+ }
219
+
220
+ fn resolveDisplayId(display_index: ?usize) !c.CGDirectDisplayID {
221
+ const selected_index = display_index orelse 0;
222
+ var display_ids: [16]c.CGDirectDisplayID = undefined;
223
+ var display_count: u32 = 0;
224
+ const list_result = c.CGGetActiveDisplayList(display_ids.len, &display_ids, &display_count);
225
+ if (list_result != c.kCGErrorSuccess) {
226
+ return error.DisplayQueryFailed;
227
+ }
228
+ if (selected_index >= display_count) {
229
+ return error.InvalidDisplayIndex;
230
+ }
231
+ return display_ids[selected_index];
232
+ }
233
+
234
+ fn writeScreenshotPng(input: struct {
235
+ image: c.CGImageRef,
236
+ output_path: []const u8,
237
+ }) !void {
238
+ const path_as_u8: [*]const u8 = @ptrCast(input.output_path.ptr);
239
+ const file_url = c.CFURLCreateFromFileSystemRepresentation(
240
+ null,
241
+ path_as_u8,
242
+ @as(c_long, @intCast(input.output_path.len)),
243
+ 0,
244
+ );
245
+ if (file_url == null) {
246
+ return error.FileUrlCreateFailed;
247
+ }
248
+ defer c.CFRelease(file_url);
249
+
250
+ const png_type = c.CFStringCreateWithCString(null, "public.png", c.kCFStringEncodingUTF8);
251
+ if (png_type == null) {
252
+ return error.PngTypeCreateFailed;
253
+ }
254
+ defer c.CFRelease(png_type);
255
+
256
+ const destination = c.CGImageDestinationCreateWithURL(file_url, png_type, 1, null);
257
+ if (destination == null) {
258
+ return error.ImageDestinationCreateFailed;
259
+ }
260
+ defer c.CFRelease(destination);
261
+
262
+ c.CGImageDestinationAddImage(destination, input.image, null);
263
+ const did_finalize = c.CGImageDestinationFinalize(destination);
264
+ if (!did_finalize) {
265
+ return error.ImageDestinationFinalizeFailed;
266
+ }
267
+ }
268
+
143
269
  fn executeMouseMoveCommand(allocator: std.mem.Allocator, payload_json: []const u8) ![]const u8 {
144
270
  if (builtin.target.os.tag != .macos) {
145
271
  return makeErrorJson(allocator, "mouse-move is only supported on macOS");