usecomputer 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +67 -0
  2. package/README.md +324 -0
  3. package/build.zig +95 -11
  4. package/build.zig.zon +5 -0
  5. package/dist/bridge-contract.test.js +61 -67
  6. package/dist/bridge.d.ts.map +1 -1
  7. package/dist/bridge.js +241 -46
  8. package/dist/cli-parsing.test.js +34 -11
  9. package/dist/cli.d.ts.map +1 -1
  10. package/dist/cli.js +323 -28
  11. package/dist/coord-map.d.ts +14 -0
  12. package/dist/coord-map.d.ts.map +1 -0
  13. package/dist/coord-map.js +75 -0
  14. package/dist/coord-map.test.d.ts +2 -0
  15. package/dist/coord-map.test.d.ts.map +1 -0
  16. package/dist/coord-map.test.js +157 -0
  17. package/dist/darwin-arm64/usecomputer.node +0 -0
  18. package/dist/darwin-x64/usecomputer.node +0 -0
  19. package/dist/debug-point-image.d.ts +8 -0
  20. package/dist/debug-point-image.d.ts.map +1 -0
  21. package/dist/debug-point-image.js +43 -0
  22. package/dist/debug-point-image.test.d.ts +2 -0
  23. package/dist/debug-point-image.test.d.ts.map +1 -0
  24. package/dist/debug-point-image.test.js +44 -0
  25. package/dist/index.d.ts +2 -0
  26. package/dist/index.d.ts.map +1 -1
  27. package/dist/index.js +3 -1
  28. package/dist/lib.d.ts +26 -0
  29. package/dist/lib.d.ts.map +1 -0
  30. package/dist/lib.js +88 -0
  31. package/dist/native-click-smoke.test.js +69 -29
  32. package/dist/native-lib.d.ts +59 -1
  33. package/dist/native-lib.d.ts.map +1 -1
  34. package/dist/terminal-table.d.ts +10 -0
  35. package/dist/terminal-table.d.ts.map +1 -0
  36. package/dist/terminal-table.js +55 -0
  37. package/dist/terminal-table.test.d.ts +2 -0
  38. package/dist/terminal-table.test.d.ts.map +1 -0
  39. package/dist/terminal-table.test.js +41 -0
  40. package/dist/types.d.ts +45 -0
  41. package/dist/types.d.ts.map +1 -1
  42. package/package.json +19 -5
  43. package/src/bridge-contract.test.ts +68 -73
  44. package/src/bridge.ts +293 -53
  45. package/src/cli-parsing.test.ts +61 -0
  46. package/src/cli.ts +393 -32
  47. package/src/coord-map.test.ts +178 -0
  48. package/src/coord-map.ts +105 -0
  49. package/src/debug-point-image.test.ts +50 -0
  50. package/src/debug-point-image.ts +69 -0
  51. package/src/index.ts +3 -1
  52. package/src/lib.ts +125 -0
  53. package/src/native-click-smoke.test.ts +81 -63
  54. package/src/native-lib.ts +39 -1
  55. package/src/terminal-table.test.ts +44 -0
  56. package/src/terminal-table.ts +88 -0
  57. package/src/types.ts +50 -0
  58. package/zig/src/lib.zig +1966 -270
  59. package/zig/src/main.zig +382 -0
  60. package/zig/src/scroll.zig +213 -0
  61. package/zig/src/window.zig +123 -0
package/zig/src/lib.zig CHANGED
@@ -1,421 +1,2028 @@
1
- // Native N-API module for usecomputer commands on macOS using Zig.
2
- // First implementation step translates CUA macOS click semantics to Quartz
3
- // events: post mouse down/up pairs at absolute coordinates with click state.
1
+ // Native N-API module for usecomputer desktop automation commands.
2
+ // Exports direct typed methods (no string command dispatcher) so TS can call
3
+ // high-level native functions and receive structured error objects.
4
4
 
5
5
  const std = @import("std");
6
6
  const builtin = @import("builtin");
7
- const napigen = if (builtin.is_test) undefined else @import("napigen");
8
- const c = if (builtin.target.os.tag == .macos) @cImport({
7
+ const scroll_impl = @import("scroll.zig");
8
+ const window = @import("window.zig");
9
+ // napigen is only available when building as N-API library.
10
+ // The build system provides a "napigen" module for the library target but not
11
+ // for the standalone exe or test targets. We detect availability at comptime
12
+ // via the build options module.
13
+ const build_options = @import("build_options");
14
+ const napigen = if (build_options.enable_napigen) @import("napigen") else undefined;
15
+ const c_macos = if (builtin.target.os.tag == .macos) @cImport({
9
16
  @cInclude("CoreGraphics/CoreGraphics.h");
10
17
  @cInclude("CoreFoundation/CoreFoundation.h");
11
18
  @cInclude("ImageIO/ImageIO.h");
12
19
  }) else struct {};
13
20
 
14
- pub const std_options: std.Options = .{
15
- .log_level = .err,
16
- };
17
-
18
- fn makeOkJson(allocator: std.mem.Allocator, data_json: []const u8) ![]const u8 {
19
- return std.fmt.allocPrint(allocator, "{{\"ok\":true,\"data\":{s}}}", .{data_json});
20
- }
21
-
22
- fn makeErrorJson(allocator: std.mem.Allocator, message: []const u8) ![]const u8 {
23
- return std.fmt.allocPrint(allocator, "{{\"ok\":false,\"error\":\"{s}\"}}", .{message});
24
- }
25
-
26
- fn execute(command: []const u8, payload_json: []const u8) ![]const u8 {
27
- const allocator = std.heap.c_allocator;
21
+ const c_windows = if (builtin.target.os.tag == .windows) @cImport({
22
+ @cInclude("windows.h");
23
+ }) else struct {};
28
24
 
29
- if (std.mem.eql(u8, command, "click")) {
30
- return executeClickCommand(allocator, payload_json);
31
- }
32
- if (std.mem.eql(u8, command, "mouse-move")) {
33
- return executeMouseMoveCommand(allocator, payload_json);
34
- }
35
- if (std.mem.eql(u8, command, "mouse-down")) {
36
- return executeMouseDownCommand(allocator, payload_json);
37
- }
38
- if (std.mem.eql(u8, command, "mouse-up")) {
39
- return executeMouseUpCommand(allocator, payload_json);
40
- }
41
- if (std.mem.eql(u8, command, "mouse-position")) {
42
- return executeMousePositionCommand(allocator);
43
- }
44
- if (std.mem.eql(u8, command, "hover")) {
45
- return executeHoverCommand(allocator, payload_json);
46
- }
47
- if (std.mem.eql(u8, command, "drag")) {
48
- return executeDragCommand(allocator, payload_json);
49
- }
25
+ const c_x11 = if (builtin.target.os.tag == .linux) @cImport({
26
+ @cInclude("X11/Xlib.h");
27
+ @cInclude("X11/Xutil.h");
28
+ @cInclude("X11/keysym.h");
29
+ @cInclude("X11/extensions/XShm.h");
30
+ @cInclude("X11/extensions/XTest.h");
31
+ @cInclude("sys/ipc.h");
32
+ @cInclude("sys/shm.h");
33
+ @cInclude("png.h");
34
+ }) else struct {};
50
35
 
51
- if (std.mem.eql(u8, command, "display-list")) {
52
- return makeOkJson(allocator, "[]");
53
- }
54
- if (std.mem.eql(u8, command, "clipboard-get")) {
55
- return makeOkJson(allocator, "{\"text\":\"\"}");
56
- }
57
- if (std.mem.eql(u8, command, "screenshot")) {
58
- return executeScreenshotCommand(allocator, payload_json);
59
- }
36
+ const c = c_macos;
37
+ const screenshot_max_long_edge_px: f64 = 1568;
38
+
39
+ const mac_keycode = struct {
40
+ const a = 0x00;
41
+ const s = 0x01;
42
+ const d = 0x02;
43
+ const f = 0x03;
44
+ const h = 0x04;
45
+ const g = 0x05;
46
+ const z = 0x06;
47
+ const x = 0x07;
48
+ const c = 0x08;
49
+ const v = 0x09;
50
+ const b = 0x0B;
51
+ const q = 0x0C;
52
+ const w = 0x0D;
53
+ const e = 0x0E;
54
+ const r = 0x0F;
55
+ const y = 0x10;
56
+ const t = 0x11;
57
+ const one = 0x12;
58
+ const two = 0x13;
59
+ const three = 0x14;
60
+ const four = 0x15;
61
+ const six = 0x16;
62
+ const five = 0x17;
63
+ const equal = 0x18;
64
+ const nine = 0x19;
65
+ const seven = 0x1A;
66
+ const minus = 0x1B;
67
+ const eight = 0x1C;
68
+ const zero = 0x1D;
69
+ const right_bracket = 0x1E;
70
+ const o = 0x1F;
71
+ const u = 0x20;
72
+ const left_bracket = 0x21;
73
+ const i = 0x22;
74
+ const p = 0x23;
75
+ const l = 0x25;
76
+ const j = 0x26;
77
+ const quote = 0x27;
78
+ const k = 0x28;
79
+ const semicolon = 0x29;
80
+ const backslash = 0x2A;
81
+ const comma = 0x2B;
82
+ const slash = 0x2C;
83
+ const n = 0x2D;
84
+ const m = 0x2E;
85
+ const period = 0x2F;
86
+ const tab = 0x30;
87
+ const space = 0x31;
88
+ const grave = 0x32;
89
+ const delete = 0x33;
90
+ const enter = 0x24;
91
+ const escape = 0x35;
92
+ const command = 0x37;
93
+ const shift = 0x38;
94
+ const option = 0x3A;
95
+ const control = 0x3B;
96
+ const fn_key = 0x3F;
97
+ const f1 = 0x7A;
98
+ const f2 = 0x78;
99
+ const f3 = 0x63;
100
+ const f4 = 0x76;
101
+ const f5 = 0x60;
102
+ const f6 = 0x61;
103
+ const f7 = 0x62;
104
+ const f8 = 0x64;
105
+ const f9 = 0x65;
106
+ const f10 = 0x6D;
107
+ const f11 = 0x67;
108
+ const f12 = 0x6F;
109
+ const home = 0x73;
110
+ const page_up = 0x74;
111
+ const forward_delete = 0x75;
112
+ const end = 0x77;
113
+ const page_down = 0x79;
114
+ const left_arrow = 0x7B;
115
+ const right_arrow = 0x7C;
116
+ const down_arrow = 0x7D;
117
+ const up_arrow = 0x7E;
118
+ };
60
119
 
61
- if (
62
- std.mem.eql(u8, command, "type-text") or
63
- std.mem.eql(u8, command, "press") or
64
- std.mem.eql(u8, command, "scroll") or
65
- std.mem.eql(u8, command, "clipboard-set")
66
- ) {
67
- const message = try std.fmt.allocPrint(allocator, "TODO not implemented: {s}", .{command});
68
- return makeErrorJson(allocator, message);
69
- }
120
+ pub const std_options: std.Options = .{
121
+ .log_level = .err,
122
+ };
70
123
 
71
- return makeErrorJson(allocator, "unknown command");
72
- }
124
+ const DisplayInfoOutput = struct {
125
+ id: u32,
126
+ index: u32,
127
+ name: []const u8,
128
+ x: f64,
129
+ y: f64,
130
+ width: f64,
131
+ height: f64,
132
+ scale: f64,
133
+ isPrimary: bool,
134
+ };
73
135
 
74
- const ClickPoint = struct {
136
+ const WindowInfoOutput = struct {
137
+ id: u32,
138
+ ownerPid: i32,
139
+ ownerName: []const u8,
140
+ title: []const u8,
75
141
  x: f64,
76
142
  y: f64,
143
+ width: f64,
144
+ height: f64,
145
+ desktopIndex: u32,
77
146
  };
78
147
 
79
- const ClickPayload = struct {
80
- point: ClickPoint,
81
- button: ?[]const u8 = null,
82
- count: ?u32 = null,
148
+ const NativeErrorObject = struct {
149
+ code: []const u8,
150
+ message: []const u8,
151
+ command: []const u8,
83
152
  };
84
153
 
85
- fn executeClickCommand(allocator: std.mem.Allocator, payload_json: []const u8) ![]const u8 {
86
- if (builtin.target.os.tag != .macos) {
87
- return makeErrorJson(allocator, "click is only supported on macOS");
88
- }
154
+ const CommandResult = struct {
155
+ ok: bool,
156
+ @"error": ?NativeErrorObject = null,
157
+ };
89
158
 
90
- var parsed = std.json.parseFromSlice(ClickPayload, allocator, payload_json, .{
91
- .ignore_unknown_fields = true,
92
- }) catch {
93
- return makeErrorJson(allocator, "invalid click payload json");
159
+ fn DataResult(comptime T: type) type {
160
+ return struct {
161
+ ok: bool,
162
+ data: ?T = null,
163
+ @"error": ?NativeErrorObject = null,
94
164
  };
95
- defer parsed.deinit();
165
+ }
96
166
 
97
- const click_payload = parsed.value;
98
- const click_count: u32 = if (click_payload.count) |count| blk: {
99
- if (count == 0) {
100
- break :blk 1;
101
- }
102
- break :blk count;
103
- } else 1;
167
+ fn okCommand() CommandResult {
168
+ return .{ .ok = true };
169
+ }
104
170
 
105
- const button_kind = resolveMouseButton(click_payload.button orelse "left") catch {
106
- return makeErrorJson(allocator, "invalid click button");
171
+ fn failCommand(command: []const u8, code: []const u8, message: []const u8) CommandResult {
172
+ return .{
173
+ .ok = false,
174
+ .@"error" = .{
175
+ .code = code,
176
+ .message = message,
177
+ .command = command,
178
+ },
107
179
  };
180
+ }
108
181
 
109
- const point: c.CGPoint = .{
110
- .x = click_payload.point.x,
111
- .y = click_payload.point.y,
182
+ fn okData(comptime T: type, value: T) DataResult(T) {
183
+ return .{
184
+ .ok = true,
185
+ .data = value,
112
186
  };
187
+ }
113
188
 
114
- var index: u32 = 0;
115
- while (index < click_count) : (index += 1) {
116
- const click_state = @as(i64, @intCast(index + 1));
117
- postClickPair(point, button_kind, click_state) catch {
118
- return makeErrorJson(allocator, "failed to post click event");
119
- };
120
-
121
- if (index + 1 < click_count) {
122
- std.Thread.sleep(80 * std.time.ns_per_ms);
123
- }
124
- }
189
+ fn failData(comptime T: type, command: []const u8, code: []const u8, message: []const u8) DataResult(T) {
190
+ return .{
191
+ .ok = false,
192
+ .@"error" = .{
193
+ .code = code,
194
+ .message = message,
195
+ .command = command,
196
+ },
197
+ };
198
+ }
125
199
 
126
- return makeOkJson(allocator, "null");
200
+ fn todoNotImplemented(command: []const u8) CommandResult {
201
+ return failCommand(command, "TODO_NOT_IMPLEMENTED", "TODO not implemented");
127
202
  }
128
203
 
129
- const MouseMovePayload = struct {
204
+ pub const Point = struct {
130
205
  x: f64,
131
206
  y: f64,
132
207
  };
133
208
 
134
- const MouseButtonPayload = struct {
209
+ const MouseButtonKind = enum {
210
+ left,
211
+ right,
212
+ middle,
213
+ };
214
+
215
+ const ClickInput = struct {
216
+ point: Point,
135
217
  button: ?[]const u8 = null,
218
+ count: ?f64 = null,
136
219
  };
137
220
 
138
- const DragPayload = struct {
139
- from: ClickPoint,
140
- to: ClickPoint,
141
- durationMs: ?u64 = null,
221
+ const MouseMoveInput = Point;
222
+
223
+ const MouseButtonInput = struct {
142
224
  button: ?[]const u8 = null,
143
225
  };
144
226
 
145
- const ScreenshotRegion = struct {
227
+ const DragInput = struct {
228
+ from: Point,
229
+ to: Point,
230
+ durationMs: ?f64 = null,
231
+ button: ?[]const u8 = null,
232
+ };
233
+
234
+ pub const ScreenshotRegion = struct {
146
235
  x: f64,
147
236
  y: f64,
148
237
  width: f64,
149
238
  height: f64,
150
239
  };
151
240
 
152
- const ScreenshotPayload = struct {
241
+ const ScreenshotInput = struct {
153
242
  path: ?[]const u8 = null,
154
- display: ?usize = null,
243
+ display: ?f64 = null,
244
+ window: ?f64 = null,
155
245
  region: ?ScreenshotRegion = null,
246
+ annotate: ?bool = null,
247
+ };
248
+
249
+ pub const ScreenshotOutput = struct {
250
+ path: []const u8,
251
+ desktopIndex: f64,
252
+ captureX: f64,
253
+ captureY: f64,
254
+ captureWidth: f64,
255
+ captureHeight: f64,
256
+ imageWidth: f64,
257
+ imageHeight: f64,
258
+ };
259
+
260
+ const SelectedDisplay = if (builtin.target.os.tag == .macos) struct {
261
+ id: c.CGDirectDisplayID,
262
+ index: usize,
263
+ bounds: c.CGRect,
264
+ } else struct {
265
+ id: u32,
266
+ index: usize,
267
+ bounds: struct {
268
+ x: f64,
269
+ y: f64,
270
+ width: f64,
271
+ height: f64,
272
+ },
273
+ };
274
+
275
+ const ScreenshotCapture = if (builtin.target.os.tag == .macos) struct {
276
+ image: c.CGImageRef,
277
+ capture_x: f64,
278
+ capture_y: f64,
279
+ capture_width: f64,
280
+ capture_height: f64,
281
+ desktop_index: usize,
282
+ } else struct {
283
+ image: RawRgbaImage,
284
+ capture_x: f64,
285
+ capture_y: f64,
286
+ capture_width: f64,
287
+ capture_height: f64,
288
+ desktop_index: usize,
289
+ };
290
+
291
+ const ScaledScreenshotImage = if (builtin.target.os.tag == .macos) struct {
292
+ image: c.CGImageRef,
293
+ width: f64,
294
+ height: f64,
295
+ } else struct {
296
+ image: RawRgbaImage,
297
+ width: f64,
298
+ height: f64,
156
299
  };
157
300
 
158
- fn executeScreenshotCommand(allocator: std.mem.Allocator, payload_json: []const u8) ![]const u8 {
301
+ const RawRgbaImage = struct {
302
+ pixels: []u8,
303
+ width: usize,
304
+ height: usize,
305
+ };
306
+
307
+ const TypeTextInput = struct {
308
+ text: []const u8,
309
+ delayMs: ?f64 = null,
310
+ };
311
+
312
+ const PressInput = struct {
313
+ key: []const u8,
314
+ count: ?f64 = null,
315
+ delayMs: ?f64 = null,
316
+ };
317
+
318
+ const ScrollInput = struct {
319
+ direction: []const u8,
320
+ amount: f64,
321
+ at: ?Point = null,
322
+ };
323
+
324
+ const ClipboardSetInput = struct {
325
+ text: []const u8,
326
+ };
327
+
328
+ pub fn screenshot(input: ScreenshotInput) DataResult(ScreenshotOutput) {
329
+ _ = input.annotate;
330
+ const output_path = input.path orelse "./screenshot.png";
331
+
332
+ if (builtin.target.os.tag == .linux) {
333
+ if (input.window != null) {
334
+ return failData(ScreenshotOutput, "screenshot", "UNSUPPORTED_INPUT", "window screenshots are not supported on Linux yet");
335
+ }
336
+
337
+ const capture = createLinuxScreenshotImage(.{
338
+ .display_index = input.display,
339
+ .region = input.region,
340
+ }) catch |err| {
341
+ return failData(ScreenshotOutput, "screenshot", linuxScreenshotErrorCode(err), linuxScreenshotErrorMessage(err));
342
+ };
343
+ defer std.heap.c_allocator.free(capture.image.pixels);
344
+
345
+ const scaled_image = scaleLinuxScreenshotImageIfNeeded(capture.image) catch {
346
+ return failData(ScreenshotOutput, "screenshot", "SCALE_FAILED", "failed to scale screenshot image");
347
+ };
348
+ defer std.heap.c_allocator.free(scaled_image.image.pixels);
349
+
350
+ writeLinuxScreenshotPng(.{
351
+ .image = scaled_image.image,
352
+ .output_path = output_path,
353
+ }) catch {
354
+ return failData(ScreenshotOutput, "screenshot", "WRITE_FAILED", "failed to write screenshot file");
355
+ };
356
+
357
+ return okData(ScreenshotOutput, .{
358
+ .path = output_path,
359
+ .desktopIndex = @floatFromInt(capture.desktop_index),
360
+ .captureX = capture.capture_x,
361
+ .captureY = capture.capture_y,
362
+ .captureWidth = capture.capture_width,
363
+ .captureHeight = capture.capture_height,
364
+ .imageWidth = scaled_image.width,
365
+ .imageHeight = scaled_image.height,
366
+ });
367
+ }
368
+
159
369
  if (builtin.target.os.tag != .macos) {
160
- return makeErrorJson(allocator, "screenshot is only supported on macOS");
370
+ return failData(ScreenshotOutput, "screenshot", "UNSUPPORTED_PLATFORM", "screenshot is only supported on macOS and Linux X11");
161
371
  }
162
372
 
163
- var parsed = std.json.parseFromSlice(ScreenshotPayload, allocator, payload_json, .{
164
- .ignore_unknown_fields = true,
373
+ const capture = createScreenshotImage(.{
374
+ .display_index = input.display,
375
+ .window_id = input.window,
376
+ .region = input.region,
165
377
  }) catch {
166
- return makeErrorJson(allocator, "invalid screenshot payload json");
378
+ return failData(ScreenshotOutput, "screenshot", "CAPTURE_FAILED", "failed to capture screenshot image");
167
379
  };
168
- defer parsed.deinit();
380
+ defer c.CFRelease(capture.image);
169
381
 
170
- const screenshot_payload = parsed.value;
171
- const output_path = screenshot_payload.path orelse "./screenshot.png";
172
-
173
- const image = createScreenshotImage(.{
174
- .display_index = screenshot_payload.display,
175
- .region = screenshot_payload.region,
176
- }) catch {
177
- return makeErrorJson(allocator, "failed to capture screenshot image");
382
+ const scaled_image = scaleScreenshotImageIfNeeded(capture.image) catch {
383
+ return failData(ScreenshotOutput, "screenshot", "SCALE_FAILED", "failed to scale screenshot image");
178
384
  };
179
- defer c.CFRelease(image);
385
+ defer c.CFRelease(scaled_image.image);
180
386
 
181
387
  writeScreenshotPng(.{
182
- .image = image,
388
+ .image = scaled_image.image,
183
389
  .output_path = output_path,
184
390
  }) catch {
185
- return makeErrorJson(allocator, "failed to write screenshot file");
391
+ return failData(ScreenshotOutput, "screenshot", "WRITE_FAILED", "failed to write screenshot file");
186
392
  };
187
393
 
188
- const path_json = try std.fmt.allocPrint(allocator, "\"{s}\"", .{output_path});
189
- const payload_json_response = try std.fmt.allocPrint(allocator, "{{\"path\":{s}}}", .{path_json});
190
- return makeOkJson(allocator, payload_json_response);
394
+ return okData(ScreenshotOutput, .{
395
+ .path = output_path,
396
+ .desktopIndex = @as(f64, @floatFromInt(capture.desktop_index)),
397
+ .captureX = capture.capture_x,
398
+ .captureY = capture.capture_y,
399
+ .captureWidth = capture.capture_width,
400
+ .captureHeight = capture.capture_height,
401
+ .imageWidth = scaled_image.width,
402
+ .imageHeight = scaled_image.height,
403
+ });
191
404
  }
192
405
 
193
- fn createScreenshotImage(input: struct {
194
- display_index: ?usize,
406
+ fn linuxScreenshotErrorCode(err: anyerror) []const u8 {
407
+ return switch (err) {
408
+ error.InvalidDisplayIndex, error.InvalidRegion, error.RegionOutOfBounds => "INVALID_INPUT",
409
+ error.DisplayOpenFailed, error.MissingDisplayEnv, error.NoScreens, error.XShmUnavailable => "X11_UNAVAILABLE",
410
+ error.CaptureFailed, error.ImageCreateFailed, error.ShmGetFailed, error.ShmAttachFailed, error.ShmAllocFailed => "CAPTURE_FAILED",
411
+ else => "CAPTURE_FAILED",
412
+ };
413
+ }
414
+
415
+ fn linuxScreenshotErrorMessage(err: anyerror) []const u8 {
416
+ return switch (err) {
417
+ error.InvalidDisplayIndex => "Linux screenshots currently support only display 0",
418
+ error.InvalidRegion => "invalid screenshot region",
419
+ error.RegionOutOfBounds => "screenshot region is outside the X11 root window bounds",
420
+ error.MissingDisplayEnv => "DISPLAY is not set; Linux screenshots require an X11 session",
421
+ error.DisplayOpenFailed => "failed to open X11 display",
422
+ error.NoScreens => "X11 display has no screens",
423
+ error.XShmUnavailable => "X11 shared memory extension is unavailable",
424
+ error.ImageCreateFailed, error.ShmAllocFailed, error.ShmAttachFailed, error.ShmGetFailed, error.CaptureFailed => "failed to capture screenshot image",
425
+ else => "failed to capture screenshot image",
426
+ };
427
+ }
428
+
429
+ fn createLinuxScreenshotImage(input: struct {
430
+ display_index: ?f64,
195
431
  region: ?ScreenshotRegion,
196
- }) !c.CGImageRef {
197
- const display_id = resolveDisplayId(input.display_index) catch {
198
- return error.DisplayResolutionFailed;
432
+ }) !ScreenshotCapture {
433
+ if (builtin.target.os.tag != .linux) {
434
+ return error.UnsupportedPlatform;
435
+ }
436
+ if (input.display_index) |value| {
437
+ const normalized = @as(i64, @intFromFloat(std.math.round(value)));
438
+ if (normalized != 0) {
439
+ return error.InvalidDisplayIndex;
440
+ }
441
+ }
442
+ if (std.posix.getenv("DISPLAY") == null) {
443
+ return error.MissingDisplayEnv;
444
+ }
445
+
446
+ const display = c_x11.XOpenDisplay(null) orelse return error.DisplayOpenFailed;
447
+ defer _ = c_x11.XCloseDisplay(display);
448
+
449
+ const screen_index = c_x11.XDefaultScreen(display);
450
+ if (screen_index < 0) {
451
+ return error.NoScreens;
452
+ }
453
+ const root = c_x11.XRootWindow(display, screen_index);
454
+ const screen_width_i = c_x11.XDisplayWidth(display, screen_index);
455
+ const screen_height_i = c_x11.XDisplayHeight(display, screen_index);
456
+ if (screen_width_i <= 0 or screen_height_i <= 0) {
457
+ return error.CaptureFailed;
458
+ }
459
+
460
+ const screen_width = @as(usize, @intCast(screen_width_i));
461
+ const screen_height = @as(usize, @intCast(screen_height_i));
462
+ const capture_rect = try resolveLinuxCaptureRect(.{
463
+ .screen_width = screen_width,
464
+ .screen_height = screen_height,
465
+ .region = input.region,
466
+ });
467
+
468
+ // Try XShm first (fast), fall back to XGetImage (slow but always works).
469
+ // XShm fails on XWayland when processes don't share SHM namespaces.
470
+ const image = captureWithXShm(display, screen_index, root, capture_rect) orelse
471
+ captureWithXGetImage(display, root, capture_rect) orelse
472
+ return error.CaptureFailed;
473
+ // XDestroyImage is a C macro: ((*((ximage)->f.destroy_image))((ximage)))
474
+ // Zig's @cImport can't translate it, so call the function pointer directly.
475
+ defer _ = image.*.f.destroy_image.?(image);
476
+
477
+ const rgba = try convertX11ImageToRgba(image, capture_rect.width, capture_rect.height);
478
+ return .{
479
+ .image = rgba,
480
+ .capture_x = @floatFromInt(capture_rect.x),
481
+ .capture_y = @floatFromInt(capture_rect.y),
482
+ .capture_width = @floatFromInt(capture_rect.width),
483
+ .capture_height = @floatFromInt(capture_rect.height),
484
+ .desktop_index = 0,
199
485
  };
486
+ }
487
+
488
+ const LinuxCaptureRect = struct {
489
+ x: usize,
490
+ y: usize,
491
+ width: usize,
492
+ height: usize,
493
+ };
200
494
 
495
+ // X error handler state for detecting X errors during screenshot capture.
496
+ // XSetErrorHandler is process-global, so this is necessarily a global.
497
+ var x_capture_error_occurred: bool = false;
498
+
499
+ fn captureErrorHandler(_: ?*c_x11.Display, _: ?*c_x11.XErrorEvent) callconv(.c) c_int {
500
+ x_capture_error_occurred = true;
501
+ return 0;
502
+ }
503
+
504
+ /// Fast screenshot path using XShm (shared memory). Returns null if XShm is
505
+ /// unavailable or fails (common on XWayland with different SHM namespaces).
506
+ fn captureWithXShm(
507
+ display: *c_x11.Display,
508
+ screen_index: c_int,
509
+ root: c_x11.Window,
510
+ capture_rect: LinuxCaptureRect,
511
+ ) ?*c_x11.XImage {
512
+ if (c_x11.XShmQueryExtension(display) == 0) {
513
+ return null;
514
+ }
515
+
516
+ const visual = c_x11.XDefaultVisual(display, screen_index);
517
+ const depth = @as(c_uint, @intCast(c_x11.XDefaultDepth(display, screen_index)));
518
+ var shm_info: c_x11.XShmSegmentInfo = undefined;
519
+ shm_info.shmid = -1;
520
+ shm_info.shmaddr = null;
521
+ shm_info.readOnly = 0;
522
+
523
+ const image = c_x11.XShmCreateImage(
524
+ display,
525
+ visual,
526
+ depth,
527
+ c_x11.ZPixmap,
528
+ null,
529
+ &shm_info,
530
+ @as(c_uint, @intCast(capture_rect.width)),
531
+ @as(c_uint, @intCast(capture_rect.height)),
532
+ ) orelse return null;
533
+
534
+ const bytes_per_image = @as(usize, @intCast(image.*.bytes_per_line)) * capture_rect.height;
535
+ const shmget_result = c_x11.shmget(c_x11.IPC_PRIVATE, bytes_per_image, c_x11.IPC_CREAT | 0o600);
536
+ if (shmget_result < 0) {
537
+ image.*.data = null;
538
+ _ = image.*.f.destroy_image.?(image);
539
+ return null;
540
+ }
541
+ shm_info.shmid = shmget_result;
542
+
543
+ const shmaddr = c_x11.shmat(shm_info.shmid, null, 0);
544
+ if (@intFromPtr(shmaddr) == std.math.maxInt(usize)) {
545
+ _ = c_x11.shmctl(shm_info.shmid, c_x11.IPC_RMID, null);
546
+ image.*.data = null;
547
+ _ = image.*.f.destroy_image.?(image);
548
+ return null;
549
+ }
550
+ shm_info.shmaddr = @ptrCast(shmaddr);
551
+ image.*.data = shm_info.shmaddr;
552
+
553
+ // Install custom error handler to catch BadAccess from XShmAttach
554
+ // (happens on XWayland when SHM namespaces don't match).
555
+ x_capture_error_occurred = false;
556
+ const old_handler = c_x11.XSetErrorHandler(captureErrorHandler);
557
+
558
+ _ = c_x11.XShmAttach(display, &shm_info);
559
+ _ = c_x11.XSync(display, 0);
560
+
561
+ if (x_capture_error_occurred) {
562
+ // Restore original handler and clean up
563
+ _ = c_x11.XSetErrorHandler(old_handler);
564
+ _ = c_x11.shmdt(shmaddr);
565
+ _ = c_x11.shmctl(shm_info.shmid, c_x11.IPC_RMID, null);
566
+ image.*.data = null;
567
+ _ = image.*.f.destroy_image.?(image);
568
+ return null;
569
+ }
570
+
571
+ if (c_x11.XShmGetImage(
572
+ display,
573
+ root,
574
+ image,
575
+ @as(c_int, @intCast(capture_rect.x)),
576
+ @as(c_int, @intCast(capture_rect.y)),
577
+ c_x11.AllPlanes,
578
+ ) == 0) {
579
+ _ = c_x11.XSetErrorHandler(old_handler);
580
+ _ = c_x11.XShmDetach(display, &shm_info);
581
+ _ = c_x11.shmdt(shmaddr);
582
+ _ = c_x11.shmctl(shm_info.shmid, c_x11.IPC_RMID, null);
583
+ image.*.data = null;
584
+ _ = image.*.f.destroy_image.?(image);
585
+ return null;
586
+ }
587
+
588
+ // Copy image data to a separate allocation so we can detach SHM.
589
+ // The caller owns the XImage and will free it via destroy_image.
590
+ const data_copy = std.heap.c_allocator.alloc(u8, bytes_per_image) catch {
591
+ _ = c_x11.XSetErrorHandler(old_handler);
592
+ _ = c_x11.XShmDetach(display, &shm_info);
593
+ _ = c_x11.shmdt(shmaddr);
594
+ _ = c_x11.shmctl(shm_info.shmid, c_x11.IPC_RMID, null);
595
+ image.*.data = null;
596
+ _ = image.*.f.destroy_image.?(image);
597
+ return null;
598
+ };
599
+ @memcpy(data_copy, @as([*]const u8, @ptrCast(shmaddr))[0..bytes_per_image]);
600
+ image.*.data = @ptrCast(data_copy.ptr);
601
+
602
+ _ = c_x11.XSetErrorHandler(old_handler);
603
+ _ = c_x11.XShmDetach(display, &shm_info);
604
+ _ = c_x11.shmdt(shmaddr);
605
+ _ = c_x11.shmctl(shm_info.shmid, c_x11.IPC_RMID, null);
606
+
607
+ return image;
608
+ }
609
+
610
+ /// Slow but reliable fallback: XGetImage copies pixels over the X connection.
611
+ /// Works everywhere including XWayland regardless of SHM namespace.
612
+ /// Installs a temporary X error handler to catch BadMatch errors (common
613
+ /// on XWayland when the capture region doesn't match the root drawable).
614
+ fn captureWithXGetImage(
615
+ display: *c_x11.Display,
616
+ root: c_x11.Window,
617
+ capture_rect: LinuxCaptureRect,
618
+ ) ?*c_x11.XImage {
619
+ x_capture_error_occurred = false;
620
+ const old_handler = c_x11.XSetErrorHandler(captureErrorHandler);
621
+ defer _ = c_x11.XSetErrorHandler(old_handler);
622
+
623
+ const image = c_x11.XGetImage(
624
+ display,
625
+ root,
626
+ @as(c_int, @intCast(capture_rect.x)),
627
+ @as(c_int, @intCast(capture_rect.y)),
628
+ @as(c_uint, @intCast(capture_rect.width)),
629
+ @as(c_uint, @intCast(capture_rect.height)),
630
+ c_x11.AllPlanes,
631
+ c_x11.ZPixmap,
632
+ );
633
+ _ = c_x11.XSync(display, 0);
634
+
635
+ if (x_capture_error_occurred) {
636
+ if (image) |img| {
637
+ _ = img.*.f.destroy_image.?(img);
638
+ }
639
+ return null;
640
+ }
641
+ return image;
642
+ }
643
+
644
+ fn resolveLinuxCaptureRect(input: struct {
645
+ screen_width: usize,
646
+ screen_height: usize,
647
+ region: ?ScreenshotRegion,
648
+ }) !LinuxCaptureRect {
201
649
  if (input.region) |region| {
202
- const rect: c.CGRect = .{
203
- .origin = .{ .x = region.x, .y = region.y },
204
- .size = .{ .width = region.width, .height = region.height },
650
+ const x = @as(i64, @intFromFloat(std.math.round(region.x)));
651
+ const y = @as(i64, @intFromFloat(std.math.round(region.y)));
652
+ const width = @as(i64, @intFromFloat(std.math.round(region.width)));
653
+ const height = @as(i64, @intFromFloat(std.math.round(region.height)));
654
+ if (x < 0 or y < 0 or width <= 0 or height <= 0) {
655
+ return error.InvalidRegion;
656
+ }
657
+ const max_x = x + width;
658
+ const max_y = y + height;
659
+ if (max_x > input.screen_width or max_y > input.screen_height) {
660
+ return error.RegionOutOfBounds;
661
+ }
662
+ return .{
663
+ .x = @as(usize, @intCast(x)),
664
+ .y = @as(usize, @intCast(y)),
665
+ .width = @as(usize, @intCast(width)),
666
+ .height = @as(usize, @intCast(height)),
205
667
  };
206
- const region_image = c.CGDisplayCreateImageForRect(display_id, rect);
207
- if (region_image == null) {
208
- return error.CaptureFailed;
668
+ }
669
+
670
+ return .{
671
+ .x = 0,
672
+ .y = 0,
673
+ .width = input.screen_width,
674
+ .height = input.screen_height,
675
+ };
676
+ }
677
+
678
+ fn convertX11ImageToRgba(image: *c_x11.XImage, width: usize, height: usize) !RawRgbaImage {
679
+ const pixels = try std.heap.c_allocator.alloc(u8, width * height * 4);
680
+ errdefer std.heap.c_allocator.free(pixels);
681
+
682
+ var y: usize = 0;
683
+ while (y < height) : (y += 1) {
684
+ var x: usize = 0;
685
+ while (x < width) : (x += 1) {
686
+ // XGetPixel is a C macro: ((*((ximage)->f.get_pixel))((ximage), (x), (y)))
687
+ const pixel = image.*.f.get_pixel.?(image, @as(c_int, @intCast(x)), @as(c_int, @intCast(y)));
688
+ const red = normalizeX11Channel(.{ .pixel = pixel, .mask = image.*.red_mask });
689
+ const green = normalizeX11Channel(.{ .pixel = pixel, .mask = image.*.green_mask });
690
+ const blue = normalizeX11Channel(.{ .pixel = pixel, .mask = image.*.blue_mask });
691
+ const offset = (y * width + x) * 4;
692
+ pixels[offset] = red;
693
+ pixels[offset + 1] = green;
694
+ pixels[offset + 2] = blue;
695
+ pixels[offset + 3] = 255;
209
696
  }
210
- return region_image;
211
697
  }
212
698
 
213
- const full_image = c.CGDisplayCreateImage(display_id);
214
- if (full_image == null) {
215
- return error.CaptureFailed;
699
+ return .{ .pixels = pixels, .width = width, .height = height };
700
+ }
701
+
702
+ fn normalizeX11Channel(input: struct {
703
+ pixel: c_ulong,
704
+ mask: c_ulong,
705
+ }) u8 {
706
+ if (input.mask == 0) {
707
+ return 0;
708
+ }
709
+ // @ctz returns u7 on 64-bit c_ulong (aarch64-linux), but >> needs u6.
710
+ // The shift can't exceed 63 since mask != 0 and is at most 64 bits.
711
+ const shift: std.math.Log2Int(c_ulong) = @intCast(@ctz(input.mask));
712
+ const bits: std.math.Log2Int(c_ulong) = @intCast(@min(@popCount(input.mask), @bitSizeOf(c_ulong) - 1));
713
+ const raw = (input.pixel & input.mask) >> shift;
714
+ const max_value = (@as(u64, 1) << @intCast(bits)) - 1;
715
+ if (max_value == 0) {
716
+ return 0;
216
717
  }
217
- return full_image;
718
+ return @as(u8, @intCast((raw * 255) / max_value));
218
719
  }
219
720
 
220
- fn resolveDisplayId(display_index: ?usize) !c.CGDirectDisplayID {
221
- const selected_index = display_index orelse 0;
222
- var display_ids: [16]c.CGDirectDisplayID = undefined;
223
- var display_count: u32 = 0;
224
- const list_result = c.CGGetActiveDisplayList(display_ids.len, &display_ids, &display_count);
225
- if (list_result != c.kCGErrorSuccess) {
226
- return error.DisplayQueryFailed;
721
+ fn scaleLinuxScreenshotImageIfNeeded(image: RawRgbaImage) !ScaledScreenshotImage {
722
+ const image_width = @as(f64, @floatFromInt(image.width));
723
+ const image_height = @as(f64, @floatFromInt(image.height));
724
+ const long_edge = @max(image_width, image_height);
725
+ if (long_edge <= screenshot_max_long_edge_px) {
726
+ const copy = try std.heap.c_allocator.dupe(u8, image.pixels);
727
+ return .{
728
+ .image = .{ .pixels = copy, .width = image.width, .height = image.height },
729
+ .width = image_width,
730
+ .height = image_height,
731
+ };
227
732
  }
228
- if (selected_index >= display_count) {
229
- return error.InvalidDisplayIndex;
733
+
734
+ const scale = screenshot_max_long_edge_px / long_edge;
735
+ const target_width = @max(1, @as(usize, @intFromFloat(std.math.round(image_width * scale))));
736
+ const target_height = @max(1, @as(usize, @intFromFloat(std.math.round(image_height * scale))));
737
+ const scaled_pixels = try std.heap.c_allocator.alloc(u8, target_width * target_height * 4);
738
+ errdefer std.heap.c_allocator.free(scaled_pixels);
739
+
740
+ var y: usize = 0;
741
+ while (y < target_height) : (y += 1) {
742
+ const source_y = @min(image.height - 1, @as(usize, @intFromFloat((@as(f64, @floatFromInt(y)) * image_height) / @as(f64, @floatFromInt(target_height)))));
743
+ var x: usize = 0;
744
+ while (x < target_width) : (x += 1) {
745
+ const source_x = @min(image.width - 1, @as(usize, @intFromFloat((@as(f64, @floatFromInt(x)) * image_width) / @as(f64, @floatFromInt(target_width)))));
746
+ const source_offset = (source_y * image.width + source_x) * 4;
747
+ const target_offset = (y * target_width + x) * 4;
748
+ @memcpy(scaled_pixels[target_offset .. target_offset + 4], image.pixels[source_offset .. source_offset + 4]);
749
+ }
230
750
  }
231
- return display_ids[selected_index];
751
+
752
+ return .{
753
+ .image = .{ .pixels = scaled_pixels, .width = target_width, .height = target_height },
754
+ .width = @floatFromInt(target_width),
755
+ .height = @floatFromInt(target_height),
756
+ };
232
757
  }
233
758
 
234
- fn writeScreenshotPng(input: struct {
235
- image: c.CGImageRef,
759
+ fn writeLinuxScreenshotPng(input: struct {
760
+ image: RawRgbaImage,
236
761
  output_path: []const u8,
237
762
  }) !void {
238
- const path_as_u8: [*]const u8 = @ptrCast(input.output_path.ptr);
239
- const file_url = c.CFURLCreateFromFileSystemRepresentation(
240
- null,
241
- path_as_u8,
242
- @as(c_long, @intCast(input.output_path.len)),
763
+ var png: c_x11.png_image = std.mem.zeroes(c_x11.png_image);
764
+ png.version = c_x11.PNG_IMAGE_VERSION;
765
+ png.width = @as(c_x11.png_uint_32, @intCast(input.image.width));
766
+ png.height = @as(c_x11.png_uint_32, @intCast(input.image.height));
767
+ png.format = c_x11.PNG_FORMAT_RGBA;
768
+
769
+ const output_path_z = try std.heap.c_allocator.dupeZ(u8, input.output_path);
770
+ defer std.heap.c_allocator.free(output_path_z);
771
+
772
+ const write_result = c_x11.png_image_write_to_file(
773
+ &png,
774
+ output_path_z.ptr,
243
775
  0,
776
+ input.image.pixels.ptr,
777
+ @as(c_int, @intCast(input.image.width * 4)),
778
+ null,
244
779
  );
245
- if (file_url == null) {
246
- return error.FileUrlCreateFailed;
780
+ if (write_result == 0) {
781
+ c_x11.png_image_free(&png);
782
+ return error.PngWriteFailed;
247
783
  }
248
- defer c.CFRelease(file_url);
784
+ c_x11.png_image_free(&png);
785
+ }
249
786
 
250
- const png_type = c.CFStringCreateWithCString(null, "public.png", c.kCFStringEncodingUTF8);
251
- if (png_type == null) {
252
- return error.PngTypeCreateFailed;
253
- }
254
- defer c.CFRelease(png_type);
787
+ pub fn click(input: ClickInput) CommandResult {
788
+ const click_count: u32 = if (input.count) |count| blk: {
789
+ const normalized = @as(i64, @intFromFloat(std.math.round(count)));
790
+ if (normalized <= 0) {
791
+ break :blk 1;
792
+ }
793
+ break :blk @as(u32, @intCast(normalized));
794
+ } else 1;
255
795
 
256
- const destination = c.CGImageDestinationCreateWithURL(file_url, png_type, 1, null);
257
- if (destination == null) {
258
- return error.ImageDestinationCreateFailed;
259
- }
260
- defer c.CFRelease(destination);
796
+ const button_kind = resolveMouseButton(input.button orelse "left") catch {
797
+ return failCommand("click", "INVALID_INPUT", "invalid click button");
798
+ };
261
799
 
262
- c.CGImageDestinationAddImage(destination, input.image, null);
263
- const did_finalize = c.CGImageDestinationFinalize(destination);
264
- if (!did_finalize) {
265
- return error.ImageDestinationFinalizeFailed;
800
+ switch (builtin.target.os.tag) {
801
+ .macos => {
802
+ const point: c.CGPoint = .{
803
+ .x = input.point.x,
804
+ .y = input.point.y,
805
+ };
806
+
807
+ var index: u32 = 0;
808
+ while (index < click_count) : (index += 1) {
809
+ const click_state = @as(i64, @intCast(index + 1));
810
+ postClickPair(point, button_kind, click_state) catch {
811
+ return failCommand("click", "EVENT_POST_FAILED", "failed to post click event");
812
+ };
813
+
814
+ if (index + 1 < click_count) {
815
+ std.Thread.sleep(80 * std.time.ns_per_ms);
816
+ }
817
+ }
818
+
819
+ return okCommand();
820
+ },
821
+ .linux => {
822
+ const display = openX11Display() catch {
823
+ return failCommand("click", "EVENT_POST_FAILED", "failed to open X11 display");
824
+ };
825
+ defer _ = c_x11.XCloseDisplay(display);
826
+
827
+ moveCursorToPointX11(.{ .x = input.point.x, .y = input.point.y }, display) catch {
828
+ return failCommand("click", "EVENT_POST_FAILED", "failed to move mouse cursor");
829
+ };
830
+
831
+ var index: u32 = 0;
832
+ while (index < click_count) : (index += 1) {
833
+ postClickPairX11(.{ .x = input.point.x, .y = input.point.y }, button_kind, display) catch {
834
+ return failCommand("click", "EVENT_POST_FAILED", "failed to post click event");
835
+ };
836
+
837
+ if (index + 1 < click_count) {
838
+ std.Thread.sleep(80 * std.time.ns_per_ms);
839
+ }
840
+ }
841
+
842
+ _ = c_x11.XFlush(display);
843
+ return okCommand();
844
+ },
845
+ else => {
846
+ return failCommand("click", "UNSUPPORTED_PLATFORM", "click is unsupported on this platform");
847
+ },
266
848
  }
267
849
  }
268
850
 
269
- fn executeMouseMoveCommand(allocator: std.mem.Allocator, payload_json: []const u8) ![]const u8 {
270
- if (builtin.target.os.tag != .macos) {
271
- return makeErrorJson(allocator, "mouse-move is only supported on macOS");
851
+ pub fn mouseMove(input: MouseMoveInput) CommandResult {
852
+ switch (builtin.target.os.tag) {
853
+ .macos => {
854
+ const point: c.CGPoint = .{
855
+ .x = input.x,
856
+ .y = input.y,
857
+ };
858
+ moveCursorToPoint(point) catch {
859
+ return failCommand("mouse-move", "EVENT_POST_FAILED", "failed to move mouse cursor");
860
+ };
861
+
862
+ return okCommand();
863
+ },
864
+ .linux => {
865
+ const display = openX11Display() catch {
866
+ return failCommand("mouse-move", "EVENT_POST_FAILED", "failed to open X11 display");
867
+ };
868
+ defer _ = c_x11.XCloseDisplay(display);
869
+
870
+ moveCursorToPointX11(.{ .x = input.x, .y = input.y }, display) catch {
871
+ return failCommand("mouse-move", "EVENT_POST_FAILED", "failed to move mouse cursor");
872
+ };
873
+ _ = c_x11.XFlush(display);
874
+ return okCommand();
875
+ },
876
+ else => {
877
+ return failCommand("mouse-move", "UNSUPPORTED_PLATFORM", "mouse-move is unsupported on this platform");
878
+ },
272
879
  }
880
+ }
273
881
 
274
- var parsed = std.json.parseFromSlice(MouseMovePayload, allocator, payload_json, .{}) catch {
275
- return makeErrorJson(allocator, "invalid mouse-move payload json");
276
- };
277
- defer parsed.deinit();
882
+ pub fn mouseDown(input: MouseButtonInput) CommandResult {
883
+ return handleMouseButtonInput(.{ .input = input, .is_down = true });
884
+ }
278
885
 
279
- const point: c.CGPoint = .{
280
- .x = parsed.value.x,
281
- .y = parsed.value.y,
282
- };
283
- moveCursorToPoint(point) catch {
284
- return makeErrorJson(allocator, "failed to move mouse cursor");
886
+ pub fn mouseUp(input: MouseButtonInput) CommandResult {
887
+ return handleMouseButtonInput(.{ .input = input, .is_down = false });
888
+ }
889
+
890
+ fn handleMouseButtonInput(args: struct {
891
+ input: MouseButtonInput,
892
+ is_down: bool,
893
+ }) CommandResult {
894
+ const button_kind = resolveMouseButton(args.input.button orelse "left") catch {
895
+ return failCommand("mouse-button", "INVALID_INPUT", "invalid mouse button");
285
896
  };
286
897
 
287
- return makeOkJson(allocator, "null");
898
+ switch (builtin.target.os.tag) {
899
+ .macos => {
900
+ const point = currentCursorPoint() catch {
901
+ return failCommand("mouse-button", "CURSOR_READ_FAILED", "failed to read cursor position");
902
+ };
903
+
904
+ postMouseButtonEvent(point, button_kind, args.is_down, 1) catch {
905
+ return failCommand("mouse-button", "EVENT_POST_FAILED", "failed to post mouse button event");
906
+ };
907
+
908
+ return okCommand();
909
+ },
910
+ .linux => {
911
+ const display = openX11Display() catch {
912
+ return failCommand("mouse-button", "EVENT_POST_FAILED", "failed to open X11 display");
913
+ };
914
+ defer _ = c_x11.XCloseDisplay(display);
915
+
916
+ postMouseButtonEventX11(button_kind, args.is_down, display) catch {
917
+ return failCommand("mouse-button", "EVENT_POST_FAILED", "failed to post mouse button event");
918
+ };
919
+ _ = c_x11.XFlush(display);
920
+
921
+ return okCommand();
922
+ },
923
+ else => {
924
+ return failCommand("mouse-button", "UNSUPPORTED_PLATFORM", "mouse button events are unsupported on this platform");
925
+ },
926
+ }
927
+ }
928
+
929
+ pub fn mousePosition() DataResult(Point) {
930
+ switch (builtin.target.os.tag) {
931
+ .macos => {
932
+ const point = currentCursorPoint() catch {
933
+ return failData(Point, "mouse-position", "CURSOR_READ_FAILED", "failed to read cursor position");
934
+ };
935
+
936
+ return okData(Point, .{ .x = std.math.round(point.x), .y = std.math.round(point.y) });
937
+ },
938
+ .linux => {
939
+ const display = openX11Display() catch {
940
+ return failData(Point, "mouse-position", "EVENT_POST_FAILED", "failed to open X11 display");
941
+ };
942
+ defer _ = c_x11.XCloseDisplay(display);
943
+
944
+ const point = currentCursorPointX11(display) catch {
945
+ return failData(Point, "mouse-position", "CURSOR_READ_FAILED", "failed to read cursor position");
946
+ };
947
+
948
+ return okData(Point, .{ .x = @floatFromInt(point.x), .y = @floatFromInt(point.y) });
949
+ },
950
+ else => {
951
+ return failData(Point, "mouse-position", "UNSUPPORTED_PLATFORM", "mouse-position is unsupported on this platform");
952
+ },
953
+ }
288
954
  }
289
955
 
290
- fn executeMouseDownCommand(allocator: std.mem.Allocator, payload_json: []const u8) ![]const u8 {
291
- return executeMouseButtonCommand(allocator, payload_json, true);
956
+ pub fn hover(input: Point) CommandResult {
957
+ return mouseMove(input);
292
958
  }
293
959
 
294
- fn executeMouseUpCommand(allocator: std.mem.Allocator, payload_json: []const u8) ![]const u8 {
295
- return executeMouseButtonCommand(allocator, payload_json, false);
960
+ pub fn drag(input: DragInput) CommandResult {
961
+ const button_kind = resolveMouseButton(input.button orelse "left") catch {
962
+ return failCommand("drag", "INVALID_INPUT", "invalid drag button");
963
+ };
964
+ const duration_ms = if (input.durationMs) |value| blk: {
965
+ const normalized = @as(i64, @intFromFloat(std.math.round(value)));
966
+ if (normalized <= 0) {
967
+ break :blk 400;
968
+ }
969
+ break :blk normalized;
970
+ } else 400;
971
+ const total_duration_ns = @as(u64, @intCast(duration_ms)) * std.time.ns_per_ms;
972
+ const step_count: u64 = 16;
973
+ const step_duration_ns = if (step_count == 0) 0 else total_duration_ns / step_count;
974
+
975
+ switch (builtin.target.os.tag) {
976
+ .macos => {
977
+ const from: c.CGPoint = .{ .x = input.from.x, .y = input.from.y };
978
+ const to: c.CGPoint = .{ .x = input.to.x, .y = input.to.y };
979
+
980
+ moveCursorToPoint(from) catch {
981
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to move cursor to drag origin");
982
+ };
983
+
984
+ postMouseButtonEvent(from, button_kind, true, 1) catch {
985
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to post drag mouse-down");
986
+ };
987
+
988
+ var index: u64 = 1;
989
+ while (index <= step_count) : (index += 1) {
990
+ const fraction = @as(f64, @floatFromInt(index)) / @as(f64, @floatFromInt(step_count));
991
+ const next_point: c.CGPoint = .{
992
+ .x = from.x + (to.x - from.x) * fraction,
993
+ .y = from.y + (to.y - from.y) * fraction,
994
+ };
995
+
996
+ moveCursorToPoint(next_point) catch {
997
+ return failCommand("drag", "EVENT_POST_FAILED", "failed during drag cursor movement");
998
+ };
999
+
1000
+ if (step_duration_ns > 0 and index < step_count) {
1001
+ std.Thread.sleep(step_duration_ns);
1002
+ }
1003
+ }
1004
+
1005
+ postMouseButtonEvent(to, button_kind, false, 1) catch {
1006
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to post drag mouse-up");
1007
+ };
1008
+
1009
+ return okCommand();
1010
+ },
1011
+ .linux => {
1012
+ const display = openX11Display() catch {
1013
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to open X11 display");
1014
+ };
1015
+ defer _ = c_x11.XCloseDisplay(display);
1016
+
1017
+ moveCursorToPointX11(.{ .x = input.from.x, .y = input.from.y }, display) catch {
1018
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to move cursor to drag origin");
1019
+ };
1020
+
1021
+ postMouseButtonEventX11(button_kind, true, display) catch {
1022
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to post drag mouse-down");
1023
+ };
1024
+
1025
+ var index: u64 = 1;
1026
+ while (index <= step_count) : (index += 1) {
1027
+ const fraction = @as(f64, @floatFromInt(index)) / @as(f64, @floatFromInt(step_count));
1028
+ const next_point = Point{
1029
+ .x = input.from.x + (input.to.x - input.from.x) * fraction,
1030
+ .y = input.from.y + (input.to.y - input.from.y) * fraction,
1031
+ };
1032
+
1033
+ moveCursorToPointX11(next_point, display) catch {
1034
+ return failCommand("drag", "EVENT_POST_FAILED", "failed during drag cursor movement");
1035
+ };
1036
+
1037
+ if (step_duration_ns > 0 and index < step_count) {
1038
+ std.Thread.sleep(step_duration_ns);
1039
+ }
1040
+ }
1041
+
1042
+ postMouseButtonEventX11(button_kind, false, display) catch {
1043
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to post drag mouse-up");
1044
+ };
1045
+ _ = c_x11.XFlush(display);
1046
+
1047
+ return okCommand();
1048
+ },
1049
+ else => {
1050
+ return failCommand("drag", "UNSUPPORTED_PLATFORM", "drag is unsupported on this platform");
1051
+ },
1052
+ }
296
1053
  }
297
1054
 
298
- fn executeMouseButtonCommand(allocator: std.mem.Allocator, payload_json: []const u8, is_down: bool) ![]const u8 {
1055
+ pub fn displayList() DataResult([]const u8) {
1056
+ if (builtin.target.os.tag == .linux) {
1057
+ const display = openX11Display() catch {
1058
+ return failData([]const u8, "display-list", "DISPLAY_QUERY_FAILED", "failed to open X11 display");
1059
+ };
1060
+ defer _ = c_x11.XCloseDisplay(display);
1061
+
1062
+ const screen_count: usize = @intCast(c_x11.XScreenCount(display));
1063
+ if (screen_count == 0) {
1064
+ return failData([]const u8, "display-list", "DISPLAY_QUERY_FAILED", "failed to query active displays");
1065
+ }
1066
+
1067
+ const primary_screen = c_x11.XDefaultScreen(display);
1068
+
1069
+ var write_buffer: [32 * 1024]u8 = undefined;
1070
+ var stream = std.io.fixedBufferStream(&write_buffer);
1071
+ const writer = stream.writer();
1072
+
1073
+ writer.writeByte('[') catch {
1074
+ return failData([]const u8, "display-list", "SERIALIZE_FAILED", "failed to serialize display list");
1075
+ };
1076
+
1077
+ var i: usize = 0;
1078
+ while (i < screen_count) : (i += 1) {
1079
+ if (i > 0) {
1080
+ writer.writeByte(',') catch {
1081
+ return failData([]const u8, "display-list", "SERIALIZE_FAILED", "failed to serialize display list");
1082
+ };
1083
+ }
1084
+
1085
+ var name_buffer: [64]u8 = undefined;
1086
+ const display_name = std.fmt.bufPrint(&name_buffer, "Display {d}", .{i}) catch "Display";
1087
+ const screen_index: c_int = @intCast(i);
1088
+ const root = c_x11.XRootWindow(display, screen_index);
1089
+ const width = c_x11.XDisplayWidth(display, screen_index);
1090
+ const height = c_x11.XDisplayHeight(display, screen_index);
1091
+
1092
+ const item = DisplayInfoOutput{
1093
+ .id = @as(u32, @truncate(@as(u64, @intCast(root)))),
1094
+ .index = @intCast(i),
1095
+ .name = display_name,
1096
+ .x = 0,
1097
+ .y = 0,
1098
+ .width = @floatFromInt(width),
1099
+ .height = @floatFromInt(height),
1100
+ .scale = 1,
1101
+ .isPrimary = screen_index == primary_screen,
1102
+ };
1103
+
1104
+ writer.print("{f}", .{std.json.fmt(item, .{})}) catch {
1105
+ return failData([]const u8, "display-list", "SERIALIZE_FAILED", "failed to serialize display list");
1106
+ };
1107
+ }
1108
+
1109
+ writer.writeByte(']') catch {
1110
+ return failData([]const u8, "display-list", "SERIALIZE_FAILED", "failed to serialize display list");
1111
+ };
1112
+
1113
+ const payload = std.heap.c_allocator.dupe(u8, stream.getWritten()) catch {
1114
+ return failData([]const u8, "display-list", "ALLOC_FAILED", "failed to allocate display list response");
1115
+ };
1116
+ return okData([]const u8, payload);
1117
+ }
1118
+
299
1119
  if (builtin.target.os.tag != .macos) {
300
- return makeErrorJson(allocator, "mouse button events are only supported on macOS");
1120
+ return failData([]const u8, "display-list", "UNSUPPORTED_PLATFORM", "display-list is unsupported on this platform");
301
1121
  }
302
1122
 
303
- var parsed = std.json.parseFromSlice(MouseButtonPayload, allocator, payload_json, .{}) catch {
304
- return makeErrorJson(allocator, "invalid mouse button payload json");
305
- };
306
- defer parsed.deinit();
1123
+ var display_ids: [16]c.CGDirectDisplayID = undefined;
1124
+ var display_count: u32 = 0;
1125
+ const list_result = c.CGGetActiveDisplayList(display_ids.len, &display_ids, &display_count);
1126
+ if (list_result != c.kCGErrorSuccess) {
1127
+ return failData([]const u8, "display-list", "DISPLAY_QUERY_FAILED", "failed to query active displays");
1128
+ }
307
1129
 
308
- const button_kind = resolveMouseButton(parsed.value.button orelse "left") catch {
309
- return makeErrorJson(allocator, "invalid mouse button");
310
- };
1130
+ var write_buffer: [32 * 1024]u8 = undefined;
1131
+ var stream = std.io.fixedBufferStream(&write_buffer);
1132
+ const writer = stream.writer();
311
1133
 
312
- const point = currentCursorPoint() catch {
313
- return makeErrorJson(allocator, "failed to read cursor position");
1134
+ writer.writeByte('[') catch {
1135
+ return failData([]const u8, "display-list", "SERIALIZE_FAILED", "failed to serialize display list");
314
1136
  };
315
1137
 
316
- postMouseButtonEvent(point, button_kind, is_down, 1) catch {
317
- return makeErrorJson(allocator, "failed to post mouse button event");
1138
+ var i: usize = 0;
1139
+ while (i < display_count) : (i += 1) {
1140
+ if (i > 0) {
1141
+ writer.writeByte(',') catch {
1142
+ return failData([]const u8, "display-list", "SERIALIZE_FAILED", "failed to serialize display list");
1143
+ };
1144
+ }
1145
+
1146
+ const display_id = display_ids[i];
1147
+ const bounds = c.CGDisplayBounds(display_id);
1148
+ var name_buffer: [64]u8 = undefined;
1149
+ const fallback_name = std.fmt.bufPrint(&name_buffer, "Display {d}", .{display_id}) catch "Display";
1150
+ const item = DisplayInfoOutput{
1151
+ .id = display_id,
1152
+ .index = @intCast(i),
1153
+ .name = fallback_name,
1154
+ .x = std.math.round(bounds.origin.x),
1155
+ .y = std.math.round(bounds.origin.y),
1156
+ .width = std.math.round(bounds.size.width),
1157
+ .height = std.math.round(bounds.size.height),
1158
+ .scale = 1,
1159
+ .isPrimary = c.CGDisplayIsMain(display_id) != 0,
1160
+ };
1161
+
1162
+ writer.print("{f}", .{std.json.fmt(item, .{})}) catch {
1163
+ return failData([]const u8, "display-list", "SERIALIZE_FAILED", "failed to serialize display list");
1164
+ };
1165
+ }
1166
+
1167
+ writer.writeByte(']') catch {
1168
+ return failData([]const u8, "display-list", "SERIALIZE_FAILED", "failed to serialize display list");
318
1169
  };
319
1170
 
320
- return makeOkJson(allocator, "null");
1171
+ // TODO: Add Mission Control desktop/space enumeration via private SkyLight APIs.
1172
+ const payload = std.heap.c_allocator.dupe(u8, stream.getWritten()) catch {
1173
+ return failData([]const u8, "display-list", "ALLOC_FAILED", "failed to allocate display list response");
1174
+ };
1175
+ return okData([]const u8, payload);
321
1176
  }
322
1177
 
323
- fn executeMousePositionCommand(allocator: std.mem.Allocator) ![]const u8 {
1178
+ pub fn windowList() DataResult([]const u8) {
324
1179
  if (builtin.target.os.tag != .macos) {
325
- return makeErrorJson(allocator, "mouse-position is only supported on macOS");
1180
+ return failData([]const u8, "window-list", "UNSUPPORTED_PLATFORM", "window-list is only supported on macOS");
326
1181
  }
327
1182
 
328
- const point = currentCursorPoint() catch {
329
- return makeErrorJson(allocator, "failed to read cursor position");
1183
+ const payload = serializeWindowListJson() catch {
1184
+ return failData([]const u8, "window-list", "WINDOW_QUERY_FAILED", "failed to query visible windows");
330
1185
  };
1186
+ return okData([]const u8, payload);
1187
+ }
331
1188
 
332
- const x = @as(i64, @intFromFloat(std.math.round(point.x)));
333
- const y = @as(i64, @intFromFloat(std.math.round(point.y)));
334
- const point_json = try std.fmt.allocPrint(allocator, "{{\"x\":{d},\"y\":{d}}}", .{ x, y });
335
- return makeOkJson(allocator, point_json);
1189
+ pub fn clipboardGet() DataResult([]const u8) {
1190
+ return failData([]const u8, "clipboard-get", "TODO_NOT_IMPLEMENTED", "TODO not implemented: clipboard-get");
336
1191
  }
337
1192
 
338
- fn executeHoverCommand(allocator: std.mem.Allocator, payload_json: []const u8) ![]const u8 {
339
- if (builtin.target.os.tag != .macos) {
340
- return makeErrorJson(allocator, "hover is only supported on macOS");
1193
+ pub fn clipboardSet(input: ClipboardSetInput) CommandResult {
1194
+ _ = input;
1195
+ return todoNotImplemented("clipboard-set");
1196
+ }
1197
+
1198
+ pub fn typeText(input: TypeTextInput) CommandResult {
1199
+ switch (builtin.target.os.tag) {
1200
+ .macos => {
1201
+ typeTextMacos(input) catch |err| {
1202
+ return failCommand("type-text", "EVENT_POST_FAILED", @errorName(err));
1203
+ };
1204
+ return okCommand();
1205
+ },
1206
+ .windows => {
1207
+ typeTextWindows(input) catch |err| {
1208
+ return failCommand("type-text", "EVENT_POST_FAILED", @errorName(err));
1209
+ };
1210
+ return okCommand();
1211
+ },
1212
+ .linux => {
1213
+ typeTextX11(input) catch |err| {
1214
+ return failCommand("type-text", "EVENT_POST_FAILED", @errorName(err));
1215
+ };
1216
+ return okCommand();
1217
+ },
1218
+ else => {
1219
+ return failCommand("type-text", "UNSUPPORTED_PLATFORM", "type-text is unsupported on this platform");
1220
+ },
341
1221
  }
1222
+ }
342
1223
 
343
- var parsed = std.json.parseFromSlice(MouseMovePayload, allocator, payload_json, .{}) catch {
344
- return makeErrorJson(allocator, "invalid hover payload json");
345
- };
346
- defer parsed.deinit();
1224
+ pub fn press(input: PressInput) CommandResult {
1225
+ switch (builtin.target.os.tag) {
1226
+ .macos => {
1227
+ pressMacos(input) catch |err| {
1228
+ return failCommand("press", "EVENT_POST_FAILED", @errorName(err));
1229
+ };
1230
+ return okCommand();
1231
+ },
1232
+ .windows => {
1233
+ pressWindows(input) catch |err| {
1234
+ return failCommand("press", "EVENT_POST_FAILED", @errorName(err));
1235
+ };
1236
+ return okCommand();
1237
+ },
1238
+ .linux => {
1239
+ pressX11(input) catch |err| {
1240
+ return failCommand("press", "EVENT_POST_FAILED", @errorName(err));
1241
+ };
1242
+ return okCommand();
1243
+ },
1244
+ else => {
1245
+ return failCommand("press", "UNSUPPORTED_PLATFORM", "press is unsupported on this platform");
1246
+ },
1247
+ }
1248
+ }
347
1249
 
348
- const point: c.CGPoint = .{
349
- .x = parsed.value.x,
350
- .y = parsed.value.y,
1250
+ pub fn scroll(input: ScrollInput) CommandResult {
1251
+ scroll_impl.scroll(.{
1252
+ .direction = input.direction,
1253
+ .amount = input.amount,
1254
+ .at_x = if (input.at) |point| point.x else null,
1255
+ .at_y = if (input.at) |point| point.y else null,
1256
+ }) catch |err| {
1257
+ const error_name = @errorName(err);
1258
+ if (std.mem.eql(u8, error_name, "InvalidDirection") or
1259
+ std.mem.eql(u8, error_name, "InvalidAmount") or
1260
+ std.mem.eql(u8, error_name, "AmountTooLarge") or
1261
+ std.mem.eql(u8, error_name, "InvalidPoint"))
1262
+ {
1263
+ return failCommand("scroll", "INVALID_INPUT", error_name);
1264
+ }
1265
+ return failCommand("scroll", "EVENT_POST_FAILED", error_name);
351
1266
  };
1267
+ return okCommand();
1268
+ }
1269
+
1270
+ const ParsedPress = struct {
1271
+ key: []const u8,
1272
+ cmd: bool = false,
1273
+ alt: bool = false,
1274
+ ctrl: bool = false,
1275
+ shift: bool = false,
1276
+ fn_key: bool = false,
1277
+ };
1278
+
1279
+ fn parsePressKey(key_input: []const u8) !ParsedPress {
1280
+ var parsed: ParsedPress = .{ .key = "" };
1281
+ var saw_key = false;
1282
+ var parts = std.mem.splitScalar(u8, key_input, '+');
1283
+ while (parts.next()) |part| {
1284
+ const trimmed = std.mem.trim(u8, part, " \t\r\n");
1285
+ if (trimmed.len == 0) {
1286
+ continue;
1287
+ }
352
1288
 
353
- moveCursorToPoint(point) catch {
354
- return makeErrorJson(allocator, "failed to move cursor for hover");
1289
+ if (std.ascii.eqlIgnoreCase(trimmed, "cmd") or std.ascii.eqlIgnoreCase(trimmed, "command") or std.ascii.eqlIgnoreCase(trimmed, "meta")) {
1290
+ parsed.cmd = true;
1291
+ continue;
1292
+ }
1293
+ if (std.ascii.eqlIgnoreCase(trimmed, "alt") or std.ascii.eqlIgnoreCase(trimmed, "option")) {
1294
+ parsed.alt = true;
1295
+ continue;
1296
+ }
1297
+ if (std.ascii.eqlIgnoreCase(trimmed, "ctrl") or std.ascii.eqlIgnoreCase(trimmed, "control")) {
1298
+ parsed.ctrl = true;
1299
+ continue;
1300
+ }
1301
+ if (std.ascii.eqlIgnoreCase(trimmed, "shift")) {
1302
+ parsed.shift = true;
1303
+ continue;
1304
+ }
1305
+ if (std.ascii.eqlIgnoreCase(trimmed, "fn")) {
1306
+ parsed.fn_key = true;
1307
+ continue;
1308
+ }
1309
+
1310
+ if (saw_key) {
1311
+ return error.MultipleMainKeys;
1312
+ }
1313
+ parsed.key = trimmed;
1314
+ saw_key = true;
1315
+ }
1316
+
1317
+ if (!saw_key) {
1318
+ return error.MissingMainKey;
1319
+ }
1320
+ return parsed;
1321
+ }
1322
+
1323
+ fn normalizedCount(value: ?f64) u32 {
1324
+ if (value) |count| {
1325
+ const rounded = @as(i64, @intFromFloat(std.math.round(count)));
1326
+ if (rounded > 0) {
1327
+ return @as(u32, @intCast(rounded));
1328
+ }
1329
+ }
1330
+ return 1;
1331
+ }
1332
+
1333
+ fn normalizedDelayNs(value: ?f64) u64 {
1334
+ if (value) |delay_ms| {
1335
+ const rounded = @as(i64, @intFromFloat(std.math.round(delay_ms)));
1336
+ if (rounded > 0) {
1337
+ return @as(u64, @intCast(rounded)) * std.time.ns_per_ms;
1338
+ }
1339
+ }
1340
+ return 0;
1341
+ }
1342
+
1343
+ fn codepointToUtf16(codepoint: u21) !struct { units: [2]u16, len: usize } {
1344
+ if (codepoint <= 0xD7FF or (codepoint >= 0xE000 and codepoint <= 0xFFFF)) {
1345
+ return .{ .units = .{ @as(u16, @intCast(codepoint)), 0 }, .len = 1 };
1346
+ }
1347
+ if (codepoint >= 0x10000 and codepoint <= 0x10FFFF) {
1348
+ const value = codepoint - 0x10000;
1349
+ const high = @as(u16, @intCast(0xD800 + (value >> 10)));
1350
+ const low = @as(u16, @intCast(0xDC00 + (value & 0x3FF)));
1351
+ return .{ .units = .{ high, low }, .len = 2 };
1352
+ }
1353
+ return error.InvalidCodepoint;
1354
+ }
1355
+
1356
+ fn typeTextMacos(input: TypeTextInput) !void {
1357
+ const delay_ns = normalizedDelayNs(input.delayMs);
1358
+ var view = try std.unicode.Utf8View.init(input.text);
1359
+ var iterator = view.iterator();
1360
+ while (iterator.nextCodepoint()) |codepoint| {
1361
+ const utf16 = try codepointToUtf16(codepoint);
1362
+ const down = c_macos.CGEventCreateKeyboardEvent(null, 0, true) orelse return error.CGEventCreateFailed;
1363
+ defer c_macos.CFRelease(down);
1364
+ c_macos.CGEventSetFlags(down, 0);
1365
+ c_macos.CGEventKeyboardSetUnicodeString(down, @as(c_macos.UniCharCount, @intCast(utf16.len)), @ptrCast(&utf16.units[0]));
1366
+ c_macos.CGEventPost(c_macos.kCGHIDEventTap, down);
1367
+
1368
+ const up = c_macos.CGEventCreateKeyboardEvent(null, 0, false) orelse return error.CGEventCreateFailed;
1369
+ defer c_macos.CFRelease(up);
1370
+ c_macos.CGEventSetFlags(up, 0);
1371
+ c_macos.CGEventKeyboardSetUnicodeString(up, @as(c_macos.UniCharCount, @intCast(utf16.len)), @ptrCast(&utf16.units[0]));
1372
+ c_macos.CGEventPost(c_macos.kCGHIDEventTap, up);
1373
+
1374
+ if (delay_ns > 0) {
1375
+ std.Thread.sleep(delay_ns);
1376
+ }
1377
+ }
1378
+ }
1379
+
1380
+ fn keyCodeForMacosKey(key_name: []const u8) !c_macos.CGKeyCode {
1381
+ if (key_name.len == 1) {
1382
+ const ch = std.ascii.toLower(key_name[0]);
1383
+ return switch (ch) {
1384
+ 'a' => mac_keycode.a,
1385
+ 'b' => mac_keycode.b,
1386
+ 'c' => mac_keycode.c,
1387
+ 'd' => mac_keycode.d,
1388
+ 'e' => mac_keycode.e,
1389
+ 'f' => mac_keycode.f,
1390
+ 'g' => mac_keycode.g,
1391
+ 'h' => mac_keycode.h,
1392
+ 'i' => mac_keycode.i,
1393
+ 'j' => mac_keycode.j,
1394
+ 'k' => mac_keycode.k,
1395
+ 'l' => mac_keycode.l,
1396
+ 'm' => mac_keycode.m,
1397
+ 'n' => mac_keycode.n,
1398
+ 'o' => mac_keycode.o,
1399
+ 'p' => mac_keycode.p,
1400
+ 'q' => mac_keycode.q,
1401
+ 'r' => mac_keycode.r,
1402
+ 's' => mac_keycode.s,
1403
+ 't' => mac_keycode.t,
1404
+ 'u' => mac_keycode.u,
1405
+ 'v' => mac_keycode.v,
1406
+ 'w' => mac_keycode.w,
1407
+ 'x' => mac_keycode.x,
1408
+ 'y' => mac_keycode.y,
1409
+ 'z' => mac_keycode.z,
1410
+ '0' => mac_keycode.zero,
1411
+ '1' => mac_keycode.one,
1412
+ '2' => mac_keycode.two,
1413
+ '3' => mac_keycode.three,
1414
+ '4' => mac_keycode.four,
1415
+ '5' => mac_keycode.five,
1416
+ '6' => mac_keycode.six,
1417
+ '7' => mac_keycode.seven,
1418
+ '8' => mac_keycode.eight,
1419
+ '9' => mac_keycode.nine,
1420
+ '=' => mac_keycode.equal,
1421
+ '-' => mac_keycode.minus,
1422
+ '[' => mac_keycode.left_bracket,
1423
+ ']' => mac_keycode.right_bracket,
1424
+ ';' => mac_keycode.semicolon,
1425
+ '\'' => mac_keycode.quote,
1426
+ '\\' => mac_keycode.backslash,
1427
+ ',' => mac_keycode.comma,
1428
+ '.' => mac_keycode.period,
1429
+ '/' => mac_keycode.slash,
1430
+ '`' => mac_keycode.grave,
1431
+ else => error.UnknownKey,
1432
+ };
1433
+ }
1434
+
1435
+ if (std.ascii.eqlIgnoreCase(key_name, "enter") or std.ascii.eqlIgnoreCase(key_name, "return")) return mac_keycode.enter;
1436
+ if (std.ascii.eqlIgnoreCase(key_name, "tab")) return mac_keycode.tab;
1437
+ if (std.ascii.eqlIgnoreCase(key_name, "space")) return mac_keycode.space;
1438
+ if (std.ascii.eqlIgnoreCase(key_name, "escape") or std.ascii.eqlIgnoreCase(key_name, "esc")) return mac_keycode.escape;
1439
+ if (std.ascii.eqlIgnoreCase(key_name, "backspace")) return mac_keycode.delete;
1440
+ if (std.ascii.eqlIgnoreCase(key_name, "delete")) return mac_keycode.forward_delete;
1441
+ if (std.ascii.eqlIgnoreCase(key_name, "left")) return mac_keycode.left_arrow;
1442
+ if (std.ascii.eqlIgnoreCase(key_name, "right")) return mac_keycode.right_arrow;
1443
+ if (std.ascii.eqlIgnoreCase(key_name, "up")) return mac_keycode.up_arrow;
1444
+ if (std.ascii.eqlIgnoreCase(key_name, "down")) return mac_keycode.down_arrow;
1445
+ if (std.ascii.eqlIgnoreCase(key_name, "home")) return mac_keycode.home;
1446
+ if (std.ascii.eqlIgnoreCase(key_name, "end")) return mac_keycode.end;
1447
+ if (std.ascii.eqlIgnoreCase(key_name, "pageup")) return mac_keycode.page_up;
1448
+ if (std.ascii.eqlIgnoreCase(key_name, "pagedown")) return mac_keycode.page_down;
1449
+ if (std.ascii.eqlIgnoreCase(key_name, "f1")) return mac_keycode.f1;
1450
+ if (std.ascii.eqlIgnoreCase(key_name, "f2")) return mac_keycode.f2;
1451
+ if (std.ascii.eqlIgnoreCase(key_name, "f3")) return mac_keycode.f3;
1452
+ if (std.ascii.eqlIgnoreCase(key_name, "f4")) return mac_keycode.f4;
1453
+ if (std.ascii.eqlIgnoreCase(key_name, "f5")) return mac_keycode.f5;
1454
+ if (std.ascii.eqlIgnoreCase(key_name, "f6")) return mac_keycode.f6;
1455
+ if (std.ascii.eqlIgnoreCase(key_name, "f7")) return mac_keycode.f7;
1456
+ if (std.ascii.eqlIgnoreCase(key_name, "f8")) return mac_keycode.f8;
1457
+ if (std.ascii.eqlIgnoreCase(key_name, "f9")) return mac_keycode.f9;
1458
+ if (std.ascii.eqlIgnoreCase(key_name, "f10")) return mac_keycode.f10;
1459
+ if (std.ascii.eqlIgnoreCase(key_name, "f11")) return mac_keycode.f11;
1460
+ if (std.ascii.eqlIgnoreCase(key_name, "f12")) return mac_keycode.f12;
1461
+
1462
+ return error.UnknownKey;
1463
+ }
1464
+
1465
+ fn postMacosKey(key_code: c_macos.CGKeyCode, is_down: bool, flags: c_macos.CGEventFlags) !void {
1466
+ const event = c_macos.CGEventCreateKeyboardEvent(null, key_code, is_down) orelse return error.CGEventCreateFailed;
1467
+ defer c_macos.CFRelease(event);
1468
+ c_macos.CGEventSetFlags(event, flags);
1469
+ c_macos.CGEventPost(c_macos.kCGHIDEventTap, event);
1470
+ }
1471
+
1472
+ fn pressMacos(input: PressInput) !void {
1473
+ const parsed = try parsePressKey(input.key);
1474
+ const key_code = try keyCodeForMacosKey(parsed.key);
1475
+ const repeat_count = normalizedCount(input.count);
1476
+ const delay_ns = normalizedDelayNs(input.delayMs);
1477
+
1478
+ var flags: c_macos.CGEventFlags = 0;
1479
+ if (parsed.cmd) flags |= c_macos.kCGEventFlagMaskCommand;
1480
+ if (parsed.alt) flags |= c_macos.kCGEventFlagMaskAlternate;
1481
+ if (parsed.ctrl) flags |= c_macos.kCGEventFlagMaskControl;
1482
+ if (parsed.shift) flags |= c_macos.kCGEventFlagMaskShift;
1483
+ if (parsed.fn_key) flags |= c_macos.kCGEventFlagMaskSecondaryFn;
1484
+
1485
+ var index: u32 = 0;
1486
+ while (index < repeat_count) : (index += 1) {
1487
+ if (parsed.cmd) try postMacosKey(mac_keycode.command, true, flags);
1488
+ if (parsed.alt) try postMacosKey(mac_keycode.option, true, flags);
1489
+ if (parsed.ctrl) try postMacosKey(mac_keycode.control, true, flags);
1490
+ if (parsed.shift) try postMacosKey(mac_keycode.shift, true, flags);
1491
+ if (parsed.fn_key) try postMacosKey(mac_keycode.fn_key, true, flags);
1492
+
1493
+ try postMacosKey(key_code, true, flags);
1494
+ try postMacosKey(key_code, false, flags);
1495
+
1496
+ if (parsed.fn_key) try postMacosKey(mac_keycode.fn_key, false, flags);
1497
+ if (parsed.shift) try postMacosKey(mac_keycode.shift, false, flags);
1498
+ if (parsed.ctrl) try postMacosKey(mac_keycode.control, false, flags);
1499
+ if (parsed.alt) try postMacosKey(mac_keycode.option, false, flags);
1500
+ if (parsed.cmd) try postMacosKey(mac_keycode.command, false, flags);
1501
+
1502
+ if (delay_ns > 0 and index + 1 < repeat_count) {
1503
+ std.Thread.sleep(delay_ns);
1504
+ }
1505
+ }
1506
+ }
1507
+
1508
+ fn typeTextWindows(input: TypeTextInput) !void {
1509
+ const delay_ns = normalizedDelayNs(input.delayMs);
1510
+ var view = try std.unicode.Utf8View.init(input.text);
1511
+ var iterator = view.iterator();
1512
+ while (iterator.nextCodepoint()) |codepoint| {
1513
+ const utf16 = try codepointToUtf16(codepoint);
1514
+ var unit_index: usize = 0;
1515
+ while (unit_index < utf16.len) : (unit_index += 1) {
1516
+ const unit = utf16.units[unit_index];
1517
+ var down = std.mem.zeroes(c_windows.INPUT);
1518
+ down.type = c_windows.INPUT_KEYBOARD;
1519
+ down.Anonymous.ki.wVk = 0;
1520
+ down.Anonymous.ki.wScan = unit;
1521
+ down.Anonymous.ki.dwFlags = c_windows.KEYEVENTF_UNICODE;
1522
+ _ = c_windows.SendInput(1, &down, @sizeOf(c_windows.INPUT));
1523
+
1524
+ var up = down;
1525
+ up.Anonymous.ki.dwFlags = c_windows.KEYEVENTF_UNICODE | c_windows.KEYEVENTF_KEYUP;
1526
+ _ = c_windows.SendInput(1, &up, @sizeOf(c_windows.INPUT));
1527
+ }
1528
+
1529
+ if (delay_ns > 0) {
1530
+ std.Thread.sleep(delay_ns);
1531
+ }
1532
+ }
1533
+ }
1534
+
1535
+ fn keyCodeForWindowsKey(key_name: []const u8) !u16 {
1536
+ if (key_name.len == 1) {
1537
+ const ch = std.ascii.toUpper(key_name[0]);
1538
+ if ((ch >= 'A' and ch <= 'Z') or (ch >= '0' and ch <= '9')) {
1539
+ return ch;
1540
+ }
1541
+ return switch (key_name[0]) {
1542
+ '=' => c_windows.VK_OEM_PLUS,
1543
+ '-' => c_windows.VK_OEM_MINUS,
1544
+ '[' => c_windows.VK_OEM_4,
1545
+ ']' => c_windows.VK_OEM_6,
1546
+ ';' => c_windows.VK_OEM_1,
1547
+ '\'' => c_windows.VK_OEM_7,
1548
+ '\\' => c_windows.VK_OEM_5,
1549
+ ',' => c_windows.VK_OEM_COMMA,
1550
+ '.' => c_windows.VK_OEM_PERIOD,
1551
+ '/' => c_windows.VK_OEM_2,
1552
+ '`' => c_windows.VK_OEM_3,
1553
+ else => error.UnknownKey,
1554
+ };
1555
+ }
1556
+
1557
+ if (std.ascii.eqlIgnoreCase(key_name, "enter") or std.ascii.eqlIgnoreCase(key_name, "return")) return c_windows.VK_RETURN;
1558
+ if (std.ascii.eqlIgnoreCase(key_name, "tab")) return c_windows.VK_TAB;
1559
+ if (std.ascii.eqlIgnoreCase(key_name, "space")) return c_windows.VK_SPACE;
1560
+ if (std.ascii.eqlIgnoreCase(key_name, "escape") or std.ascii.eqlIgnoreCase(key_name, "esc")) return c_windows.VK_ESCAPE;
1561
+ if (std.ascii.eqlIgnoreCase(key_name, "backspace")) return c_windows.VK_BACK;
1562
+ if (std.ascii.eqlIgnoreCase(key_name, "delete")) return c_windows.VK_DELETE;
1563
+ if (std.ascii.eqlIgnoreCase(key_name, "left")) return c_windows.VK_LEFT;
1564
+ if (std.ascii.eqlIgnoreCase(key_name, "right")) return c_windows.VK_RIGHT;
1565
+ if (std.ascii.eqlIgnoreCase(key_name, "up")) return c_windows.VK_UP;
1566
+ if (std.ascii.eqlIgnoreCase(key_name, "down")) return c_windows.VK_DOWN;
1567
+ if (std.ascii.eqlIgnoreCase(key_name, "home")) return c_windows.VK_HOME;
1568
+ if (std.ascii.eqlIgnoreCase(key_name, "end")) return c_windows.VK_END;
1569
+ if (std.ascii.eqlIgnoreCase(key_name, "pageup")) return c_windows.VK_PRIOR;
1570
+ if (std.ascii.eqlIgnoreCase(key_name, "pagedown")) return c_windows.VK_NEXT;
1571
+ if (std.ascii.eqlIgnoreCase(key_name, "f1")) return c_windows.VK_F1;
1572
+ if (std.ascii.eqlIgnoreCase(key_name, "f2")) return c_windows.VK_F2;
1573
+ if (std.ascii.eqlIgnoreCase(key_name, "f3")) return c_windows.VK_F3;
1574
+ if (std.ascii.eqlIgnoreCase(key_name, "f4")) return c_windows.VK_F4;
1575
+ if (std.ascii.eqlIgnoreCase(key_name, "f5")) return c_windows.VK_F5;
1576
+ if (std.ascii.eqlIgnoreCase(key_name, "f6")) return c_windows.VK_F6;
1577
+ if (std.ascii.eqlIgnoreCase(key_name, "f7")) return c_windows.VK_F7;
1578
+ if (std.ascii.eqlIgnoreCase(key_name, "f8")) return c_windows.VK_F8;
1579
+ if (std.ascii.eqlIgnoreCase(key_name, "f9")) return c_windows.VK_F9;
1580
+ if (std.ascii.eqlIgnoreCase(key_name, "f10")) return c_windows.VK_F10;
1581
+ if (std.ascii.eqlIgnoreCase(key_name, "f11")) return c_windows.VK_F11;
1582
+ if (std.ascii.eqlIgnoreCase(key_name, "f12")) return c_windows.VK_F12;
1583
+
1584
+ return error.UnknownKey;
1585
+ }
1586
+
1587
+ fn postWindowsVirtualKey(virtual_key: u16, is_down: bool) void {
1588
+ var event = std.mem.zeroes(c_windows.INPUT);
1589
+ event.type = c_windows.INPUT_KEYBOARD;
1590
+ event.Anonymous.ki.wVk = virtual_key;
1591
+ event.Anonymous.ki.wScan = 0;
1592
+ event.Anonymous.ki.dwFlags = if (is_down) 0 else c_windows.KEYEVENTF_KEYUP;
1593
+ _ = c_windows.SendInput(1, &event, @sizeOf(c_windows.INPUT));
1594
+ }
1595
+
1596
+ fn pressWindows(input: PressInput) !void {
1597
+ const parsed = try parsePressKey(input.key);
1598
+ const key_code = try keyCodeForWindowsKey(parsed.key);
1599
+ const repeat_count = normalizedCount(input.count);
1600
+ const delay_ns = normalizedDelayNs(input.delayMs);
1601
+
1602
+ var index: u32 = 0;
1603
+ while (index < repeat_count) : (index += 1) {
1604
+ if (parsed.cmd) postWindowsVirtualKey(c_windows.VK_LWIN, true);
1605
+ if (parsed.alt) postWindowsVirtualKey(c_windows.VK_MENU, true);
1606
+ if (parsed.ctrl) postWindowsVirtualKey(c_windows.VK_CONTROL, true);
1607
+ if (parsed.shift) postWindowsVirtualKey(c_windows.VK_SHIFT, true);
1608
+
1609
+ postWindowsVirtualKey(key_code, true);
1610
+ postWindowsVirtualKey(key_code, false);
1611
+
1612
+ if (parsed.shift) postWindowsVirtualKey(c_windows.VK_SHIFT, false);
1613
+ if (parsed.ctrl) postWindowsVirtualKey(c_windows.VK_CONTROL, false);
1614
+ if (parsed.alt) postWindowsVirtualKey(c_windows.VK_MENU, false);
1615
+ if (parsed.cmd) postWindowsVirtualKey(c_windows.VK_LWIN, false);
1616
+
1617
+ if (delay_ns > 0 and index + 1 < repeat_count) {
1618
+ std.Thread.sleep(delay_ns);
1619
+ }
1620
+ }
1621
+ }
1622
+
1623
+ fn typeTextX11(input: TypeTextInput) !void {
1624
+ const delay_ns = normalizedDelayNs(input.delayMs);
1625
+ const display = c_x11.XOpenDisplay(null) orelse return error.XOpenDisplayFailed;
1626
+ defer _ = c_x11.XCloseDisplay(display);
1627
+
1628
+ for (input.text) |byte| {
1629
+ if (byte >= 0x80) {
1630
+ return error.NonAsciiUnsupported;
1631
+ }
1632
+ var key_name = [_:0]u8{ byte, 0 };
1633
+ const key_sym = c_x11.XStringToKeysym(&key_name);
1634
+ if (key_sym == 0) {
1635
+ return error.UnknownKey;
1636
+ }
1637
+ const key_code = c_x11.XKeysymToKeycode(display, @intCast(key_sym));
1638
+ _ = c_x11.XTestFakeKeyEvent(display, key_code, c_x11.True, c_x11.CurrentTime);
1639
+ _ = c_x11.XTestFakeKeyEvent(display, key_code, c_x11.False, c_x11.CurrentTime);
1640
+ _ = c_x11.XFlush(display);
1641
+ if (delay_ns > 0) {
1642
+ std.Thread.sleep(delay_ns);
1643
+ }
1644
+ }
1645
+ }
1646
+
1647
+ fn keySymForX11Key(key_name: []const u8) !c_ulong {
1648
+ if (key_name.len == 1) {
1649
+ var key_buffer = [_:0]u8{ key_name[0], 0 };
1650
+ const key_sym = c_x11.XStringToKeysym(&key_buffer);
1651
+ if (key_sym == 0) return error.UnknownKey;
1652
+ return @intCast(key_sym);
1653
+ }
1654
+
1655
+ if (std.ascii.eqlIgnoreCase(key_name, "enter") or std.ascii.eqlIgnoreCase(key_name, "return")) return c_x11.XK_Return;
1656
+ if (std.ascii.eqlIgnoreCase(key_name, "tab")) return c_x11.XK_Tab;
1657
+ if (std.ascii.eqlIgnoreCase(key_name, "space")) return c_x11.XK_space;
1658
+ if (std.ascii.eqlIgnoreCase(key_name, "escape") or std.ascii.eqlIgnoreCase(key_name, "esc")) return c_x11.XK_Escape;
1659
+ if (std.ascii.eqlIgnoreCase(key_name, "backspace")) return c_x11.XK_BackSpace;
1660
+ if (std.ascii.eqlIgnoreCase(key_name, "delete")) return c_x11.XK_Delete;
1661
+ if (std.ascii.eqlIgnoreCase(key_name, "left")) return c_x11.XK_Left;
1662
+ if (std.ascii.eqlIgnoreCase(key_name, "right")) return c_x11.XK_Right;
1663
+ if (std.ascii.eqlIgnoreCase(key_name, "up")) return c_x11.XK_Up;
1664
+ if (std.ascii.eqlIgnoreCase(key_name, "down")) return c_x11.XK_Down;
1665
+ if (std.ascii.eqlIgnoreCase(key_name, "home")) return c_x11.XK_Home;
1666
+ if (std.ascii.eqlIgnoreCase(key_name, "end")) return c_x11.XK_End;
1667
+ if (std.ascii.eqlIgnoreCase(key_name, "pageup")) return c_x11.XK_Page_Up;
1668
+ if (std.ascii.eqlIgnoreCase(key_name, "pagedown")) return c_x11.XK_Page_Down;
1669
+ return error.UnknownKey;
1670
+ }
1671
+
1672
+ fn postX11Key(display: *c_x11.Display, key_sym: c_ulong, is_down: bool) !void {
1673
+ const key_code = c_x11.XKeysymToKeycode(display, @intCast(key_sym));
1674
+ if (key_code == 0) {
1675
+ return error.UnknownKey;
1676
+ }
1677
+ _ = c_x11.XTestFakeKeyEvent(display, key_code, if (is_down) c_x11.True else c_x11.False, c_x11.CurrentTime);
1678
+ _ = c_x11.XFlush(display);
1679
+ }
1680
+
1681
+ fn pressX11(input: PressInput) !void {
1682
+ const parsed = try parsePressKey(input.key);
1683
+ const key_sym = try keySymForX11Key(parsed.key);
1684
+ const repeat_count = normalizedCount(input.count);
1685
+ const delay_ns = normalizedDelayNs(input.delayMs);
1686
+
1687
+ const display = c_x11.XOpenDisplay(null) orelse return error.XOpenDisplayFailed;
1688
+ defer _ = c_x11.XCloseDisplay(display);
1689
+
1690
+ var index: u32 = 0;
1691
+ while (index < repeat_count) : (index += 1) {
1692
+ if (parsed.cmd) try postX11Key(display, c_x11.XK_Super_L, true);
1693
+ if (parsed.alt) try postX11Key(display, c_x11.XK_Alt_L, true);
1694
+ if (parsed.ctrl) try postX11Key(display, c_x11.XK_Control_L, true);
1695
+ if (parsed.shift) try postX11Key(display, c_x11.XK_Shift_L, true);
1696
+
1697
+ try postX11Key(display, key_sym, true);
1698
+ try postX11Key(display, key_sym, false);
1699
+
1700
+ if (parsed.shift) try postX11Key(display, c_x11.XK_Shift_L, false);
1701
+ if (parsed.ctrl) try postX11Key(display, c_x11.XK_Control_L, false);
1702
+ if (parsed.alt) try postX11Key(display, c_x11.XK_Alt_L, false);
1703
+ if (parsed.cmd) try postX11Key(display, c_x11.XK_Super_L, false);
1704
+
1705
+ if (delay_ns > 0 and index + 1 < repeat_count) {
1706
+ std.Thread.sleep(delay_ns);
1707
+ }
1708
+ }
1709
+ }
1710
+
1711
+ fn createScreenshotImage(input: struct {
1712
+ display_index: ?f64,
1713
+ window_id: ?f64,
1714
+ region: ?ScreenshotRegion,
1715
+ }) !ScreenshotCapture {
1716
+ if (input.window_id != null and input.region != null) {
1717
+ return error.InvalidScreenshotInput;
1718
+ }
1719
+
1720
+ if (input.window_id) |window_id| {
1721
+ const normalized_window_id = normalizeWindowId(window_id) catch {
1722
+ return error.InvalidWindowId;
1723
+ };
1724
+ const window_bounds = findWindowBoundsById(normalized_window_id) catch {
1725
+ return error.WindowNotFound;
1726
+ };
1727
+ const selected_display = resolveDisplayForRect(window_bounds) catch {
1728
+ return error.DisplayResolutionFailed;
1729
+ };
1730
+
1731
+ const window_image = c.CGDisplayCreateImageForRect(selected_display.id, window_bounds);
1732
+ if (window_image == null) {
1733
+ return error.CaptureFailed;
1734
+ }
1735
+ return .{
1736
+ .image = window_image,
1737
+ .capture_x = window_bounds.origin.x,
1738
+ .capture_y = window_bounds.origin.y,
1739
+ .capture_width = window_bounds.size.width,
1740
+ .capture_height = window_bounds.size.height,
1741
+ .desktop_index = selected_display.index,
1742
+ };
1743
+ }
1744
+
1745
+ const selected_display = resolveDisplayId(input.display_index) catch {
1746
+ return error.DisplayResolutionFailed;
355
1747
  };
356
1748
 
357
- return makeOkJson(allocator, "null");
1749
+ if (input.region) |region| {
1750
+ const rect: c.CGRect = .{
1751
+ .origin = .{
1752
+ .x = selected_display.bounds.origin.x + region.x,
1753
+ .y = selected_display.bounds.origin.y + region.y,
1754
+ },
1755
+ .size = .{ .width = region.width, .height = region.height },
1756
+ };
1757
+ const region_image = c.CGDisplayCreateImageForRect(selected_display.id, rect);
1758
+ if (region_image == null) {
1759
+ return error.CaptureFailed;
1760
+ }
1761
+ return .{
1762
+ .image = region_image,
1763
+ .capture_x = rect.origin.x,
1764
+ .capture_y = rect.origin.y,
1765
+ .capture_width = rect.size.width,
1766
+ .capture_height = rect.size.height,
1767
+ .desktop_index = selected_display.index,
1768
+ };
1769
+ }
1770
+
1771
+ const full_image = c.CGDisplayCreateImage(selected_display.id);
1772
+ if (full_image == null) {
1773
+ return error.CaptureFailed;
1774
+ }
1775
+ return .{
1776
+ .image = full_image,
1777
+ .capture_x = selected_display.bounds.origin.x,
1778
+ .capture_y = selected_display.bounds.origin.y,
1779
+ .capture_width = selected_display.bounds.size.width,
1780
+ .capture_height = selected_display.bounds.size.height,
1781
+ .desktop_index = selected_display.index,
1782
+ };
358
1783
  }
359
1784
 
360
- fn executeDragCommand(allocator: std.mem.Allocator, payload_json: []const u8) ![]const u8 {
361
- if (builtin.target.os.tag != .macos) {
362
- return makeErrorJson(allocator, "drag is only supported on macOS");
1785
+ fn normalizeWindowId(raw_id: f64) !u32 {
1786
+ const normalized = @as(i64, @intFromFloat(std.math.round(raw_id)));
1787
+ if (normalized <= 0) {
1788
+ return error.InvalidWindowId;
363
1789
  }
1790
+ return @intCast(normalized);
1791
+ }
364
1792
 
365
- var parsed = std.json.parseFromSlice(DragPayload, allocator, payload_json, .{}) catch {
366
- return makeErrorJson(allocator, "invalid drag payload json");
1793
+ fn findWindowBoundsById(target_window_id: u32) !c.CGRect {
1794
+ const Context = struct {
1795
+ target_id: u32,
1796
+ bounds: ?c.CGRect = null,
367
1797
  };
368
- defer parsed.deinit();
369
1798
 
370
- const drag_payload = parsed.value;
371
- const button_kind = resolveMouseButton(drag_payload.button orelse "left") catch {
372
- return makeErrorJson(allocator, "invalid drag button");
1799
+ var context = Context{ .target_id = target_window_id };
1800
+ window.forEachVisibleWindow(Context, &context, struct {
1801
+ fn callback(ctx: *Context, info: window.WindowInfo) !void {
1802
+ if (info.id != ctx.target_id) {
1803
+ return;
1804
+ }
1805
+ ctx.bounds = .{
1806
+ .origin = .{ .x = info.bounds.x, .y = info.bounds.y },
1807
+ .size = .{ .width = info.bounds.width, .height = info.bounds.height },
1808
+ };
1809
+ return error.Found;
1810
+ }
1811
+ }.callback) catch |err| {
1812
+ if (err != error.Found) {
1813
+ return err;
1814
+ }
373
1815
  };
374
1816
 
375
- const from: c.CGPoint = .{ .x = drag_payload.from.x, .y = drag_payload.from.y };
376
- const to: c.CGPoint = .{ .x = drag_payload.to.x, .y = drag_payload.to.y };
1817
+ if (context.bounds) |bounds| {
1818
+ return bounds;
1819
+ }
1820
+ return error.WindowNotFound;
1821
+ }
377
1822
 
378
- moveCursorToPoint(from) catch {
379
- return makeErrorJson(allocator, "failed to move cursor to drag origin");
1823
+ fn resolveDisplayForRect(rect: c.CGRect) !SelectedDisplay {
1824
+ var display_ids: [16]c.CGDirectDisplayID = undefined;
1825
+ var display_count: u32 = 0;
1826
+ const list_result = c.CGGetActiveDisplayList(display_ids.len, &display_ids, &display_count);
1827
+ if (list_result != c.kCGErrorSuccess or display_count == 0) {
1828
+ return error.DisplayQueryFailed;
1829
+ }
1830
+
1831
+ var best_index: usize = 0;
1832
+ var best_overlap: f64 = -1;
1833
+ var i: usize = 0;
1834
+ while (i < display_count) : (i += 1) {
1835
+ const bounds = c.CGDisplayBounds(display_ids[i]);
1836
+ const overlap = intersectionArea(rect, bounds);
1837
+ if (overlap > best_overlap) {
1838
+ best_overlap = overlap;
1839
+ best_index = i;
1840
+ }
1841
+ }
1842
+
1843
+ const id = display_ids[best_index];
1844
+ return .{
1845
+ .id = id,
1846
+ .index = best_index,
1847
+ .bounds = c.CGDisplayBounds(id),
380
1848
  };
1849
+ }
1850
+
1851
+ fn intersectionArea(a: c.CGRect, b: c.CGRect) f64 {
1852
+ const left = @max(a.origin.x, b.origin.x);
1853
+ const top = @max(a.origin.y, b.origin.y);
1854
+ const right = @min(a.origin.x + a.size.width, b.origin.x + b.size.width);
1855
+ const bottom = @min(a.origin.y + a.size.height, b.origin.y + b.size.height);
1856
+ if (right <= left or bottom <= top) {
1857
+ return 0;
1858
+ }
1859
+ return (right - left) * (bottom - top);
1860
+ }
381
1861
 
382
- postMouseButtonEvent(from, button_kind, true, 1) catch {
383
- return makeErrorJson(allocator, "failed to post drag mouse-down");
1862
+ fn serializeWindowListJson() ![]u8 {
1863
+ const Context = struct {
1864
+ stream: *std.io.FixedBufferStream([]u8),
1865
+ first: bool,
384
1866
  };
385
1867
 
386
- const total_duration_ns = (drag_payload.durationMs orelse 400) * std.time.ns_per_ms;
387
- const step_count: u64 = 16;
388
- const step_duration_ns = if (step_count == 0) 0 else total_duration_ns / step_count;
1868
+ var write_buffer: [64 * 1024]u8 = undefined;
1869
+ var stream = std.io.fixedBufferStream(&write_buffer);
1870
+
1871
+ try stream.writer().writeByte('[');
1872
+ var context = Context{ .stream = &stream, .first = true };
1873
+
1874
+ try window.forEachVisibleWindow(Context, &context, struct {
1875
+ fn callback(ctx: *Context, info: window.WindowInfo) !void {
1876
+ const rect: c.CGRect = .{
1877
+ .origin = .{ .x = info.bounds.x, .y = info.bounds.y },
1878
+ .size = .{ .width = info.bounds.width, .height = info.bounds.height },
1879
+ };
1880
+ const selected_display = resolveDisplayForRect(rect) catch {
1881
+ return;
1882
+ };
1883
+ const item = WindowInfoOutput{
1884
+ .id = info.id,
1885
+ .ownerPid = info.owner_pid,
1886
+ .ownerName = info.owner_name,
1887
+ .title = info.title,
1888
+ .x = info.bounds.x,
1889
+ .y = info.bounds.y,
1890
+ .width = info.bounds.width,
1891
+ .height = info.bounds.height,
1892
+ .desktopIndex = @intCast(selected_display.index),
1893
+ };
1894
+
1895
+ if (!ctx.first) {
1896
+ try ctx.stream.writer().writeByte(',');
1897
+ }
1898
+ ctx.first = false;
1899
+ try ctx.stream.writer().print("{f}", .{std.json.fmt(item, .{})});
1900
+ }
1901
+ }.callback);
389
1902
 
390
- var index: u64 = 1;
391
- while (index <= step_count) : (index += 1) {
392
- const fraction = @as(f64, @floatFromInt(index)) / @as(f64, @floatFromInt(step_count));
393
- const next_point: c.CGPoint = .{
394
- .x = from.x + (to.x - from.x) * fraction,
395
- .y = from.y + (to.y - from.y) * fraction,
396
- };
1903
+ try stream.writer().writeByte(']');
1904
+ return std.heap.c_allocator.dupe(u8, stream.getWritten());
1905
+ }
397
1906
 
398
- moveCursorToPoint(next_point) catch {
399
- return makeErrorJson(allocator, "failed during drag cursor movement");
1907
+ fn scaleScreenshotImageIfNeeded(image: c.CGImageRef) !ScaledScreenshotImage {
1908
+ const image_width = @as(f64, @floatFromInt(c.CGImageGetWidth(image)));
1909
+ const image_height = @as(f64, @floatFromInt(c.CGImageGetHeight(image)));
1910
+ const long_edge = @max(image_width, image_height);
1911
+ if (long_edge <= screenshot_max_long_edge_px) {
1912
+ _ = c.CFRetain(image);
1913
+ return .{
1914
+ .image = image,
1915
+ .width = image_width,
1916
+ .height = image_height,
400
1917
  };
1918
+ }
401
1919
 
402
- if (step_duration_ns > 0 and index < step_count) {
403
- std.Thread.sleep(step_duration_ns);
404
- }
1920
+ const scale = screenshot_max_long_edge_px / long_edge;
1921
+ const target_width = @max(1, @as(usize, @intFromFloat(std.math.round(image_width * scale))));
1922
+ const target_height = @max(1, @as(usize, @intFromFloat(std.math.round(image_height * scale))));
1923
+
1924
+ const color_space = c.CGColorSpaceCreateDeviceRGB();
1925
+ if (color_space == null) {
1926
+ return error.ScaleFailed;
405
1927
  }
1928
+ defer c.CFRelease(color_space);
406
1929
 
407
- postMouseButtonEvent(to, button_kind, false, 1) catch {
408
- return makeErrorJson(allocator, "failed to post drag mouse-up");
1930
+ const bitmap_info: c.CGBitmapInfo = c.kCGImageAlphaPremultipliedLast;
1931
+ const context = c.CGBitmapContextCreate(
1932
+ null,
1933
+ target_width,
1934
+ target_height,
1935
+ 8,
1936
+ 0,
1937
+ color_space,
1938
+ bitmap_info,
1939
+ );
1940
+ if (context == null) {
1941
+ return error.ScaleFailed;
1942
+ }
1943
+ defer c.CFRelease(context);
1944
+
1945
+ c.CGContextSetInterpolationQuality(context, c.kCGInterpolationHigh);
1946
+ const draw_rect: c.CGRect = .{
1947
+ .origin = .{ .x = 0, .y = 0 },
1948
+ .size = .{
1949
+ .width = @as(c.CGFloat, @floatFromInt(target_width)),
1950
+ .height = @as(c.CGFloat, @floatFromInt(target_height)),
1951
+ },
409
1952
  };
1953
+ c.CGContextDrawImage(context, draw_rect, image);
410
1954
 
411
- return makeOkJson(allocator, "null");
1955
+ const scaled = c.CGBitmapContextCreateImage(context);
1956
+ if (scaled == null) {
1957
+ return error.ScaleFailed;
1958
+ }
1959
+ return .{
1960
+ .image = scaled,
1961
+ .width = @as(f64, @floatFromInt(target_width)),
1962
+ .height = @as(f64, @floatFromInt(target_height)),
1963
+ };
412
1964
  }
413
1965
 
414
- const MouseButtonKind = enum {
415
- left,
416
- right,
417
- middle,
418
- };
1966
+ fn resolveDisplayId(display_index: ?f64) !SelectedDisplay {
1967
+ const selected_index: usize = if (display_index) |value| blk: {
1968
+ const normalized = @as(i64, @intFromFloat(std.math.round(value)));
1969
+ if (normalized < 0) {
1970
+ return error.InvalidDisplayIndex;
1971
+ }
1972
+ break :blk @as(usize, @intCast(normalized));
1973
+ } else 0;
1974
+ var display_ids: [16]c.CGDirectDisplayID = undefined;
1975
+ var display_count: u32 = 0;
1976
+ const list_result = c.CGGetActiveDisplayList(display_ids.len, &display_ids, &display_count);
1977
+ if (list_result != c.kCGErrorSuccess) {
1978
+ return error.DisplayQueryFailed;
1979
+ }
1980
+ if (selected_index >= display_count) {
1981
+ return error.InvalidDisplayIndex;
1982
+ }
1983
+ const selected_id = display_ids[selected_index];
1984
+ const bounds = c.CGDisplayBounds(selected_id);
1985
+ return .{
1986
+ .id = selected_id,
1987
+ .index = selected_index,
1988
+ .bounds = bounds,
1989
+ };
1990
+ }
1991
+
1992
+ fn writeScreenshotPng(input: struct {
1993
+ image: c.CGImageRef,
1994
+ output_path: []const u8,
1995
+ }) !void {
1996
+ const path_as_u8: [*]const u8 = @ptrCast(input.output_path.ptr);
1997
+ const file_url = c.CFURLCreateFromFileSystemRepresentation(
1998
+ null,
1999
+ path_as_u8,
2000
+ @as(c_long, @intCast(input.output_path.len)),
2001
+ 0,
2002
+ );
2003
+ if (file_url == null) {
2004
+ return error.FileUrlCreateFailed;
2005
+ }
2006
+ defer c.CFRelease(file_url);
2007
+
2008
+ const png_type = c.CFStringCreateWithCString(null, "public.png", c.kCFStringEncodingUTF8);
2009
+ if (png_type == null) {
2010
+ return error.PngTypeCreateFailed;
2011
+ }
2012
+ defer c.CFRelease(png_type);
2013
+
2014
+ const destination = c.CGImageDestinationCreateWithURL(file_url, png_type, 1, null);
2015
+ if (destination == null) {
2016
+ return error.ImageDestinationCreateFailed;
2017
+ }
2018
+ defer c.CFRelease(destination);
2019
+
2020
+ c.CGImageDestinationAddImage(destination, input.image, null);
2021
+ const did_finalize = c.CGImageDestinationFinalize(destination);
2022
+ if (!did_finalize) {
2023
+ return error.ImageDestinationFinalizeFailed;
2024
+ }
2025
+ }
419
2026
 
420
2027
  fn resolveMouseButton(button: []const u8) !MouseButtonKind {
421
2028
  if (std.ascii.eqlIgnoreCase(button, "left")) {
@@ -481,13 +2088,102 @@ fn moveCursorToPoint(point: c.CGPoint) !void {
481
2088
  c.CGEventPost(c.kCGHIDEventTap, move_event);
482
2089
  }
483
2090
 
2091
+ fn openX11Display() !*c_x11.Display {
2092
+ if (builtin.target.os.tag != .linux) {
2093
+ return error.UnsupportedPlatform;
2094
+ }
2095
+ return c_x11.XOpenDisplay(null) orelse error.XOpenDisplayFailed;
2096
+ }
2097
+
2098
+ fn resolveX11ButtonCode(button: MouseButtonKind) c_uint {
2099
+ return switch (button) {
2100
+ .left => 1,
2101
+ .middle => 2,
2102
+ .right => 3,
2103
+ };
2104
+ }
2105
+
2106
+ fn normalizedCoordinate(value: f64) !c_int {
2107
+ if (!std.math.isFinite(value)) {
2108
+ return error.InvalidPoint;
2109
+ }
2110
+ const rounded = @as(i64, @intFromFloat(std.math.round(value)));
2111
+ if (rounded < std.math.minInt(c_int) or rounded > std.math.maxInt(c_int)) {
2112
+ return error.InvalidPoint;
2113
+ }
2114
+ return @as(c_int, @intCast(rounded));
2115
+ }
2116
+
2117
+ fn moveCursorToPointX11(point: Point, display: *c_x11.Display) !void {
2118
+ const x = try normalizedCoordinate(point.x);
2119
+ const y = try normalizedCoordinate(point.y);
2120
+ _ = c_x11.XWarpPointer(display, 0, c_x11.XDefaultRootWindow(display), 0, 0, 0, 0, x, y);
2121
+ }
2122
+
2123
+ fn postMouseButtonEventX11(button: MouseButtonKind, is_down: bool, display: *c_x11.Display) !void {
2124
+ const button_code = resolveX11ButtonCode(button);
2125
+ const press_state: c_int = if (is_down) c_x11.True else c_x11.False;
2126
+ const posted = c_x11.XTestFakeButtonEvent(display, button_code, press_state, c_x11.CurrentTime);
2127
+ if (posted == 0) {
2128
+ return error.EventPostFailed;
2129
+ }
2130
+ }
2131
+
2132
+ fn postClickPairX11(point: Point, button: MouseButtonKind, display: *c_x11.Display) !void {
2133
+ try moveCursorToPointX11(point, display);
2134
+ try postMouseButtonEventX11(button, true, display);
2135
+ try postMouseButtonEventX11(button, false, display);
2136
+ }
2137
+
2138
+ fn currentCursorPointX11(display: *c_x11.Display) !struct { x: c_int, y: c_int } {
2139
+ const root_window = c_x11.XDefaultRootWindow(display);
2140
+ var root_return: c_x11.Window = 0;
2141
+ var child_return: c_x11.Window = 0;
2142
+ var root_x: c_int = 0;
2143
+ var root_y: c_int = 0;
2144
+ var win_x: c_int = 0;
2145
+ var win_y: c_int = 0;
2146
+ var mask_return: c_uint = 0;
2147
+
2148
+ const ok = c_x11.XQueryPointer(
2149
+ display,
2150
+ root_window,
2151
+ &root_return,
2152
+ &child_return,
2153
+ &root_x,
2154
+ &root_y,
2155
+ &win_x,
2156
+ &win_y,
2157
+ &mask_return,
2158
+ );
2159
+ if (ok == 0) {
2160
+ return error.CursorReadFailed;
2161
+ }
2162
+
2163
+ return .{ .x = root_x, .y = root_y };
2164
+ }
2165
+
484
2166
  fn initModule(js: *napigen.JsContext, exports: napigen.napi_value) !napigen.napi_value {
485
- try js.setNamedProperty(exports, "execute", try js.createFunction(execute));
2167
+ try js.setNamedProperty(exports, "screenshot", try js.createFunction(screenshot));
2168
+ try js.setNamedProperty(exports, "click", try js.createFunction(click));
2169
+ try js.setNamedProperty(exports, "typeText", try js.createFunction(typeText));
2170
+ try js.setNamedProperty(exports, "press", try js.createFunction(press));
2171
+ try js.setNamedProperty(exports, "scroll", try js.createFunction(scroll));
2172
+ try js.setNamedProperty(exports, "drag", try js.createFunction(drag));
2173
+ try js.setNamedProperty(exports, "hover", try js.createFunction(hover));
2174
+ try js.setNamedProperty(exports, "mouseMove", try js.createFunction(mouseMove));
2175
+ try js.setNamedProperty(exports, "mouseDown", try js.createFunction(mouseDown));
2176
+ try js.setNamedProperty(exports, "mouseUp", try js.createFunction(mouseUp));
2177
+ try js.setNamedProperty(exports, "mousePosition", try js.createFunction(mousePosition));
2178
+ try js.setNamedProperty(exports, "displayList", try js.createFunction(displayList));
2179
+ try js.setNamedProperty(exports, "windowList", try js.createFunction(windowList));
2180
+ try js.setNamedProperty(exports, "clipboardGet", try js.createFunction(clipboardGet));
2181
+ try js.setNamedProperty(exports, "clipboardSet", try js.createFunction(clipboardSet));
486
2182
  return exports;
487
2183
  }
488
2184
 
489
2185
  comptime {
490
- if (!builtin.is_test) {
2186
+ if (build_options.enable_napigen) {
491
2187
  napigen.defineModule(initModule);
492
2188
  }
493
2189
  }