usecomputer 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/zig/src/lib.zig CHANGED
@@ -1,4 +1,4 @@
1
- // Native N-API module for usecomputer commands on macOS using Zig.
1
+ // Native N-API module for usecomputer desktop automation commands.
2
2
  // Exports direct typed methods (no string command dispatcher) so TS can call
3
3
  // high-level native functions and receive structured error objects.
4
4
 
@@ -6,7 +6,12 @@ const std = @import("std");
6
6
  const builtin = @import("builtin");
7
7
  const scroll_impl = @import("scroll.zig");
8
8
  const window = @import("window.zig");
9
- const napigen = if (builtin.is_test) undefined else @import("napigen");
9
+ // napigen is only available when building as N-API library.
10
+ // The build system provides a "napigen" module for the library target but not
11
+ // for the standalone exe or test targets. We detect availability at comptime
12
+ // via the build options module.
13
+ const build_options = @import("build_options");
14
+ const napigen = if (build_options.enable_napigen) @import("napigen") else undefined;
10
15
  const c_macos = if (builtin.target.os.tag == .macos) @cImport({
11
16
  @cInclude("CoreGraphics/CoreGraphics.h");
12
17
  @cInclude("CoreFoundation/CoreFoundation.h");
@@ -19,8 +24,13 @@ const c_windows = if (builtin.target.os.tag == .windows) @cImport({
19
24
 
20
25
  const c_x11 = if (builtin.target.os.tag == .linux) @cImport({
21
26
  @cInclude("X11/Xlib.h");
27
+ @cInclude("X11/Xutil.h");
22
28
  @cInclude("X11/keysym.h");
29
+ @cInclude("X11/extensions/XShm.h");
23
30
  @cInclude("X11/extensions/XTest.h");
31
+ @cInclude("sys/ipc.h");
32
+ @cInclude("sys/shm.h");
33
+ @cInclude("png.h");
24
34
  }) else struct {};
25
35
 
26
36
  const c = c_macos;
@@ -191,7 +201,7 @@ fn todoNotImplemented(command: []const u8) CommandResult {
191
201
  return failCommand(command, "TODO_NOT_IMPLEMENTED", "TODO not implemented");
192
202
  }
193
203
 
194
- const Point = struct {
204
+ pub const Point = struct {
195
205
  x: f64,
196
206
  y: f64,
197
207
  };
@@ -221,7 +231,7 @@ const DragInput = struct {
221
231
  button: ?[]const u8 = null,
222
232
  };
223
233
 
224
- const ScreenshotRegion = struct {
234
+ pub const ScreenshotRegion = struct {
225
235
  x: f64,
226
236
  y: f64,
227
237
  width: f64,
@@ -236,7 +246,7 @@ const ScreenshotInput = struct {
236
246
  annotate: ?bool = null,
237
247
  };
238
248
 
239
- const ScreenshotOutput = struct {
249
+ pub const ScreenshotOutput = struct {
240
250
  path: []const u8,
241
251
  desktopIndex: f64,
242
252
  captureX: f64,
@@ -247,25 +257,51 @@ const ScreenshotOutput = struct {
247
257
  imageHeight: f64,
248
258
  };
249
259
 
250
- const SelectedDisplay = struct {
260
+ const SelectedDisplay = if (builtin.target.os.tag == .macos) struct {
251
261
  id: c.CGDirectDisplayID,
252
262
  index: usize,
253
263
  bounds: c.CGRect,
264
+ } else struct {
265
+ id: u32,
266
+ index: usize,
267
+ bounds: struct {
268
+ x: f64,
269
+ y: f64,
270
+ width: f64,
271
+ height: f64,
272
+ },
254
273
  };
255
274
 
256
- const ScreenshotCapture = struct {
275
+ const ScreenshotCapture = if (builtin.target.os.tag == .macos) struct {
257
276
  image: c.CGImageRef,
258
277
  capture_x: f64,
259
278
  capture_y: f64,
260
279
  capture_width: f64,
261
280
  capture_height: f64,
262
281
  desktop_index: usize,
282
+ } else struct {
283
+ image: RawRgbaImage,
284
+ capture_x: f64,
285
+ capture_y: f64,
286
+ capture_width: f64,
287
+ capture_height: f64,
288
+ desktop_index: usize,
263
289
  };
264
290
 
265
- const ScaledScreenshotImage = struct {
291
+ const ScaledScreenshotImage = if (builtin.target.os.tag == .macos) struct {
266
292
  image: c.CGImageRef,
267
293
  width: f64,
268
294
  height: f64,
295
+ } else struct {
296
+ image: RawRgbaImage,
297
+ width: f64,
298
+ height: f64,
299
+ };
300
+
301
+ const RawRgbaImage = struct {
302
+ pixels: []u8,
303
+ width: usize,
304
+ height: usize,
269
305
  };
270
306
 
271
307
  const TypeTextInput = struct {
@@ -290,13 +326,50 @@ const ClipboardSetInput = struct {
290
326
  };
291
327
 
292
328
  pub fn screenshot(input: ScreenshotInput) DataResult(ScreenshotOutput) {
293
- if (builtin.target.os.tag != .macos) {
294
- return failData(ScreenshotOutput, "screenshot", "UNSUPPORTED_PLATFORM", "screenshot is only supported on macOS");
295
- }
296
-
297
329
  _ = input.annotate;
298
330
  const output_path = input.path orelse "./screenshot.png";
299
331
 
332
+ if (builtin.target.os.tag == .linux) {
333
+ if (input.window != null) {
334
+ return failData(ScreenshotOutput, "screenshot", "UNSUPPORTED_INPUT", "window screenshots are not supported on Linux yet");
335
+ }
336
+
337
+ const capture = createLinuxScreenshotImage(.{
338
+ .display_index = input.display,
339
+ .region = input.region,
340
+ }) catch |err| {
341
+ return failData(ScreenshotOutput, "screenshot", linuxScreenshotErrorCode(err), linuxScreenshotErrorMessage(err));
342
+ };
343
+ defer std.heap.c_allocator.free(capture.image.pixels);
344
+
345
+ const scaled_image = scaleLinuxScreenshotImageIfNeeded(capture.image) catch {
346
+ return failData(ScreenshotOutput, "screenshot", "SCALE_FAILED", "failed to scale screenshot image");
347
+ };
348
+ defer std.heap.c_allocator.free(scaled_image.image.pixels);
349
+
350
+ writeLinuxScreenshotPng(.{
351
+ .image = scaled_image.image,
352
+ .output_path = output_path,
353
+ }) catch {
354
+ return failData(ScreenshotOutput, "screenshot", "WRITE_FAILED", "failed to write screenshot file");
355
+ };
356
+
357
+ return okData(ScreenshotOutput, .{
358
+ .path = output_path,
359
+ .desktopIndex = @floatFromInt(capture.desktop_index),
360
+ .captureX = capture.capture_x,
361
+ .captureY = capture.capture_y,
362
+ .captureWidth = capture.capture_width,
363
+ .captureHeight = capture.capture_height,
364
+ .imageWidth = scaled_image.width,
365
+ .imageHeight = scaled_image.height,
366
+ });
367
+ }
368
+
369
+ if (builtin.target.os.tag != .macos) {
370
+ return failData(ScreenshotOutput, "screenshot", "UNSUPPORTED_PLATFORM", "screenshot is only supported on macOS and Linux X11");
371
+ }
372
+
300
373
  const capture = createScreenshotImage(.{
301
374
  .display_index = input.display,
302
375
  .window_id = input.window,
@@ -330,11 +403,388 @@ pub fn screenshot(input: ScreenshotInput) DataResult(ScreenshotOutput) {
330
403
  });
331
404
  }
332
405
 
333
- pub fn click(input: ClickInput) CommandResult {
334
- if (builtin.target.os.tag != .macos) {
335
- return failCommand("click", "UNSUPPORTED_PLATFORM", "click is only supported on macOS");
406
+ fn linuxScreenshotErrorCode(err: anyerror) []const u8 {
407
+ return switch (err) {
408
+ error.InvalidDisplayIndex, error.InvalidRegion, error.RegionOutOfBounds => "INVALID_INPUT",
409
+ error.DisplayOpenFailed, error.MissingDisplayEnv, error.NoScreens, error.XShmUnavailable => "X11_UNAVAILABLE",
410
+ error.CaptureFailed, error.ImageCreateFailed, error.ShmGetFailed, error.ShmAttachFailed, error.ShmAllocFailed => "CAPTURE_FAILED",
411
+ else => "CAPTURE_FAILED",
412
+ };
413
+ }
414
+
415
+ fn linuxScreenshotErrorMessage(err: anyerror) []const u8 {
416
+ return switch (err) {
417
+ error.InvalidDisplayIndex => "Linux screenshots currently support only display 0",
418
+ error.InvalidRegion => "invalid screenshot region",
419
+ error.RegionOutOfBounds => "screenshot region is outside the X11 root window bounds",
420
+ error.MissingDisplayEnv => "DISPLAY is not set; Linux screenshots require an X11 session",
421
+ error.DisplayOpenFailed => "failed to open X11 display",
422
+ error.NoScreens => "X11 display has no screens",
423
+ error.XShmUnavailable => "X11 shared memory extension is unavailable",
424
+ error.ImageCreateFailed, error.ShmAllocFailed, error.ShmAttachFailed, error.ShmGetFailed, error.CaptureFailed => "failed to capture screenshot image",
425
+ else => "failed to capture screenshot image",
426
+ };
427
+ }
428
+
429
+ fn createLinuxScreenshotImage(input: struct {
430
+ display_index: ?f64,
431
+ region: ?ScreenshotRegion,
432
+ }) !ScreenshotCapture {
433
+ if (builtin.target.os.tag != .linux) {
434
+ return error.UnsupportedPlatform;
336
435
  }
436
+ if (input.display_index) |value| {
437
+ const normalized = @as(i64, @intFromFloat(std.math.round(value)));
438
+ if (normalized != 0) {
439
+ return error.InvalidDisplayIndex;
440
+ }
441
+ }
442
+ if (std.posix.getenv("DISPLAY") == null) {
443
+ return error.MissingDisplayEnv;
444
+ }
445
+
446
+ const display = c_x11.XOpenDisplay(null) orelse return error.DisplayOpenFailed;
447
+ defer _ = c_x11.XCloseDisplay(display);
448
+
449
+ const screen_index = c_x11.XDefaultScreen(display);
450
+ if (screen_index < 0) {
451
+ return error.NoScreens;
452
+ }
453
+ const root = c_x11.XRootWindow(display, screen_index);
454
+ const screen_width_i = c_x11.XDisplayWidth(display, screen_index);
455
+ const screen_height_i = c_x11.XDisplayHeight(display, screen_index);
456
+ if (screen_width_i <= 0 or screen_height_i <= 0) {
457
+ return error.CaptureFailed;
458
+ }
459
+
460
+ const screen_width = @as(usize, @intCast(screen_width_i));
461
+ const screen_height = @as(usize, @intCast(screen_height_i));
462
+ const capture_rect = try resolveLinuxCaptureRect(.{
463
+ .screen_width = screen_width,
464
+ .screen_height = screen_height,
465
+ .region = input.region,
466
+ });
467
+
468
+ // Try XShm first (fast), fall back to XGetImage (slow but always works).
469
+ // XShm fails on XWayland when processes don't share SHM namespaces.
470
+ const image = captureWithXShm(display, screen_index, root, capture_rect) orelse
471
+ captureWithXGetImage(display, root, capture_rect) orelse
472
+ return error.CaptureFailed;
473
+ // XDestroyImage is a C macro: ((*((ximage)->f.destroy_image))((ximage)))
474
+ // Zig's @cImport can't translate it, so call the function pointer directly.
475
+ defer _ = image.*.f.destroy_image.?(image);
476
+
477
+ const rgba = try convertX11ImageToRgba(image, capture_rect.width, capture_rect.height);
478
+ return .{
479
+ .image = rgba,
480
+ .capture_x = @floatFromInt(capture_rect.x),
481
+ .capture_y = @floatFromInt(capture_rect.y),
482
+ .capture_width = @floatFromInt(capture_rect.width),
483
+ .capture_height = @floatFromInt(capture_rect.height),
484
+ .desktop_index = 0,
485
+ };
486
+ }
487
+
488
+ const LinuxCaptureRect = struct {
489
+ x: usize,
490
+ y: usize,
491
+ width: usize,
492
+ height: usize,
493
+ };
494
+
495
+ // X error handler state for detecting X errors during screenshot capture.
496
+ // XSetErrorHandler is process-global, so this is necessarily a global.
497
+ var x_capture_error_occurred: bool = false;
498
+
499
+ fn captureErrorHandler(_: ?*c_x11.Display, _: ?*c_x11.XErrorEvent) callconv(.c) c_int {
500
+ x_capture_error_occurred = true;
501
+ return 0;
502
+ }
503
+
504
+ /// Fast screenshot path using XShm (shared memory). Returns null if XShm is
505
+ /// unavailable or fails (common on XWayland with different SHM namespaces).
506
+ fn captureWithXShm(
507
+ display: *c_x11.Display,
508
+ screen_index: c_int,
509
+ root: c_x11.Window,
510
+ capture_rect: LinuxCaptureRect,
511
+ ) ?*c_x11.XImage {
512
+ if (c_x11.XShmQueryExtension(display) == 0) {
513
+ return null;
514
+ }
515
+
516
+ const visual = c_x11.XDefaultVisual(display, screen_index);
517
+ const depth = @as(c_uint, @intCast(c_x11.XDefaultDepth(display, screen_index)));
518
+ var shm_info: c_x11.XShmSegmentInfo = undefined;
519
+ shm_info.shmid = -1;
520
+ shm_info.shmaddr = null;
521
+ shm_info.readOnly = 0;
522
+
523
+ const image = c_x11.XShmCreateImage(
524
+ display,
525
+ visual,
526
+ depth,
527
+ c_x11.ZPixmap,
528
+ null,
529
+ &shm_info,
530
+ @as(c_uint, @intCast(capture_rect.width)),
531
+ @as(c_uint, @intCast(capture_rect.height)),
532
+ ) orelse return null;
533
+
534
+ const bytes_per_image = @as(usize, @intCast(image.*.bytes_per_line)) * capture_rect.height;
535
+ const shmget_result = c_x11.shmget(c_x11.IPC_PRIVATE, bytes_per_image, c_x11.IPC_CREAT | 0o600);
536
+ if (shmget_result < 0) {
537
+ image.*.data = null;
538
+ _ = image.*.f.destroy_image.?(image);
539
+ return null;
540
+ }
541
+ shm_info.shmid = shmget_result;
542
+
543
+ const shmaddr = c_x11.shmat(shm_info.shmid, null, 0);
544
+ if (@intFromPtr(shmaddr) == std.math.maxInt(usize)) {
545
+ _ = c_x11.shmctl(shm_info.shmid, c_x11.IPC_RMID, null);
546
+ image.*.data = null;
547
+ _ = image.*.f.destroy_image.?(image);
548
+ return null;
549
+ }
550
+ shm_info.shmaddr = @ptrCast(shmaddr);
551
+ image.*.data = shm_info.shmaddr;
552
+
553
+ // Install custom error handler to catch BadAccess from XShmAttach
554
+ // (happens on XWayland when SHM namespaces don't match).
555
+ x_capture_error_occurred = false;
556
+ const old_handler = c_x11.XSetErrorHandler(captureErrorHandler);
557
+
558
+ _ = c_x11.XShmAttach(display, &shm_info);
559
+ _ = c_x11.XSync(display, 0);
560
+
561
+ if (x_capture_error_occurred) {
562
+ // Restore original handler and clean up
563
+ _ = c_x11.XSetErrorHandler(old_handler);
564
+ _ = c_x11.shmdt(shmaddr);
565
+ _ = c_x11.shmctl(shm_info.shmid, c_x11.IPC_RMID, null);
566
+ image.*.data = null;
567
+ _ = image.*.f.destroy_image.?(image);
568
+ return null;
569
+ }
570
+
571
+ if (c_x11.XShmGetImage(
572
+ display,
573
+ root,
574
+ image,
575
+ @as(c_int, @intCast(capture_rect.x)),
576
+ @as(c_int, @intCast(capture_rect.y)),
577
+ c_x11.AllPlanes,
578
+ ) == 0) {
579
+ _ = c_x11.XSetErrorHandler(old_handler);
580
+ _ = c_x11.XShmDetach(display, &shm_info);
581
+ _ = c_x11.shmdt(shmaddr);
582
+ _ = c_x11.shmctl(shm_info.shmid, c_x11.IPC_RMID, null);
583
+ image.*.data = null;
584
+ _ = image.*.f.destroy_image.?(image);
585
+ return null;
586
+ }
587
+
588
+ // Copy image data to a separate allocation so we can detach SHM.
589
+ // The caller owns the XImage and will free it via destroy_image.
590
+ const data_copy = std.heap.c_allocator.alloc(u8, bytes_per_image) catch {
591
+ _ = c_x11.XSetErrorHandler(old_handler);
592
+ _ = c_x11.XShmDetach(display, &shm_info);
593
+ _ = c_x11.shmdt(shmaddr);
594
+ _ = c_x11.shmctl(shm_info.shmid, c_x11.IPC_RMID, null);
595
+ image.*.data = null;
596
+ _ = image.*.f.destroy_image.?(image);
597
+ return null;
598
+ };
599
+ @memcpy(data_copy, @as([*]const u8, @ptrCast(shmaddr))[0..bytes_per_image]);
600
+ image.*.data = @ptrCast(data_copy.ptr);
337
601
 
602
+ _ = c_x11.XSetErrorHandler(old_handler);
603
+ _ = c_x11.XShmDetach(display, &shm_info);
604
+ _ = c_x11.shmdt(shmaddr);
605
+ _ = c_x11.shmctl(shm_info.shmid, c_x11.IPC_RMID, null);
606
+
607
+ return image;
608
+ }
609
+
610
+ /// Slow but reliable fallback: XGetImage copies pixels over the X connection.
611
+ /// Works everywhere including XWayland regardless of SHM namespace.
612
+ /// Installs a temporary X error handler to catch BadMatch errors (common
613
+ /// on XWayland when the capture region doesn't match the root drawable).
614
+ fn captureWithXGetImage(
615
+ display: *c_x11.Display,
616
+ root: c_x11.Window,
617
+ capture_rect: LinuxCaptureRect,
618
+ ) ?*c_x11.XImage {
619
+ x_capture_error_occurred = false;
620
+ const old_handler = c_x11.XSetErrorHandler(captureErrorHandler);
621
+ defer _ = c_x11.XSetErrorHandler(old_handler);
622
+
623
+ const image = c_x11.XGetImage(
624
+ display,
625
+ root,
626
+ @as(c_int, @intCast(capture_rect.x)),
627
+ @as(c_int, @intCast(capture_rect.y)),
628
+ @as(c_uint, @intCast(capture_rect.width)),
629
+ @as(c_uint, @intCast(capture_rect.height)),
630
+ c_x11.AllPlanes,
631
+ c_x11.ZPixmap,
632
+ );
633
+ _ = c_x11.XSync(display, 0);
634
+
635
+ if (x_capture_error_occurred) {
636
+ if (image) |img| {
637
+ _ = img.*.f.destroy_image.?(img);
638
+ }
639
+ return null;
640
+ }
641
+ return image;
642
+ }
643
+
644
+ fn resolveLinuxCaptureRect(input: struct {
645
+ screen_width: usize,
646
+ screen_height: usize,
647
+ region: ?ScreenshotRegion,
648
+ }) !LinuxCaptureRect {
649
+ if (input.region) |region| {
650
+ const x = @as(i64, @intFromFloat(std.math.round(region.x)));
651
+ const y = @as(i64, @intFromFloat(std.math.round(region.y)));
652
+ const width = @as(i64, @intFromFloat(std.math.round(region.width)));
653
+ const height = @as(i64, @intFromFloat(std.math.round(region.height)));
654
+ if (x < 0 or y < 0 or width <= 0 or height <= 0) {
655
+ return error.InvalidRegion;
656
+ }
657
+ const max_x = x + width;
658
+ const max_y = y + height;
659
+ if (max_x > input.screen_width or max_y > input.screen_height) {
660
+ return error.RegionOutOfBounds;
661
+ }
662
+ return .{
663
+ .x = @as(usize, @intCast(x)),
664
+ .y = @as(usize, @intCast(y)),
665
+ .width = @as(usize, @intCast(width)),
666
+ .height = @as(usize, @intCast(height)),
667
+ };
668
+ }
669
+
670
+ return .{
671
+ .x = 0,
672
+ .y = 0,
673
+ .width = input.screen_width,
674
+ .height = input.screen_height,
675
+ };
676
+ }
677
+
678
+ fn convertX11ImageToRgba(image: *c_x11.XImage, width: usize, height: usize) !RawRgbaImage {
679
+ const pixels = try std.heap.c_allocator.alloc(u8, width * height * 4);
680
+ errdefer std.heap.c_allocator.free(pixels);
681
+
682
+ var y: usize = 0;
683
+ while (y < height) : (y += 1) {
684
+ var x: usize = 0;
685
+ while (x < width) : (x += 1) {
686
+ // XGetPixel is a C macro: ((*((ximage)->f.get_pixel))((ximage), (x), (y)))
687
+ const pixel = image.*.f.get_pixel.?(image, @as(c_int, @intCast(x)), @as(c_int, @intCast(y)));
688
+ const red = normalizeX11Channel(.{ .pixel = pixel, .mask = image.*.red_mask });
689
+ const green = normalizeX11Channel(.{ .pixel = pixel, .mask = image.*.green_mask });
690
+ const blue = normalizeX11Channel(.{ .pixel = pixel, .mask = image.*.blue_mask });
691
+ const offset = (y * width + x) * 4;
692
+ pixels[offset] = red;
693
+ pixels[offset + 1] = green;
694
+ pixels[offset + 2] = blue;
695
+ pixels[offset + 3] = 255;
696
+ }
697
+ }
698
+
699
+ return .{ .pixels = pixels, .width = width, .height = height };
700
+ }
701
+
702
+ fn normalizeX11Channel(input: struct {
703
+ pixel: c_ulong,
704
+ mask: c_ulong,
705
+ }) u8 {
706
+ if (input.mask == 0) {
707
+ return 0;
708
+ }
709
+ // @ctz returns u7 on 64-bit c_ulong (aarch64-linux), but >> needs u6.
710
+ // The shift can't exceed 63 since mask != 0 and is at most 64 bits.
711
+ const shift: std.math.Log2Int(c_ulong) = @intCast(@ctz(input.mask));
712
+ const bits: std.math.Log2Int(c_ulong) = @intCast(@min(@popCount(input.mask), @bitSizeOf(c_ulong) - 1));
713
+ const raw = (input.pixel & input.mask) >> shift;
714
+ const max_value = (@as(u64, 1) << @intCast(bits)) - 1;
715
+ if (max_value == 0) {
716
+ return 0;
717
+ }
718
+ return @as(u8, @intCast((raw * 255) / max_value));
719
+ }
720
+
721
+ fn scaleLinuxScreenshotImageIfNeeded(image: RawRgbaImage) !ScaledScreenshotImage {
722
+ const image_width = @as(f64, @floatFromInt(image.width));
723
+ const image_height = @as(f64, @floatFromInt(image.height));
724
+ const long_edge = @max(image_width, image_height);
725
+ if (long_edge <= screenshot_max_long_edge_px) {
726
+ const copy = try std.heap.c_allocator.dupe(u8, image.pixels);
727
+ return .{
728
+ .image = .{ .pixels = copy, .width = image.width, .height = image.height },
729
+ .width = image_width,
730
+ .height = image_height,
731
+ };
732
+ }
733
+
734
+ const scale = screenshot_max_long_edge_px / long_edge;
735
+ const target_width = @max(1, @as(usize, @intFromFloat(std.math.round(image_width * scale))));
736
+ const target_height = @max(1, @as(usize, @intFromFloat(std.math.round(image_height * scale))));
737
+ const scaled_pixels = try std.heap.c_allocator.alloc(u8, target_width * target_height * 4);
738
+ errdefer std.heap.c_allocator.free(scaled_pixels);
739
+
740
+ var y: usize = 0;
741
+ while (y < target_height) : (y += 1) {
742
+ const source_y = @min(image.height - 1, @as(usize, @intFromFloat((@as(f64, @floatFromInt(y)) * image_height) / @as(f64, @floatFromInt(target_height)))));
743
+ var x: usize = 0;
744
+ while (x < target_width) : (x += 1) {
745
+ const source_x = @min(image.width - 1, @as(usize, @intFromFloat((@as(f64, @floatFromInt(x)) * image_width) / @as(f64, @floatFromInt(target_width)))));
746
+ const source_offset = (source_y * image.width + source_x) * 4;
747
+ const target_offset = (y * target_width + x) * 4;
748
+ @memcpy(scaled_pixels[target_offset .. target_offset + 4], image.pixels[source_offset .. source_offset + 4]);
749
+ }
750
+ }
751
+
752
+ return .{
753
+ .image = .{ .pixels = scaled_pixels, .width = target_width, .height = target_height },
754
+ .width = @floatFromInt(target_width),
755
+ .height = @floatFromInt(target_height),
756
+ };
757
+ }
758
+
759
+ fn writeLinuxScreenshotPng(input: struct {
760
+ image: RawRgbaImage,
761
+ output_path: []const u8,
762
+ }) !void {
763
+ var png: c_x11.png_image = std.mem.zeroes(c_x11.png_image);
764
+ png.version = c_x11.PNG_IMAGE_VERSION;
765
+ png.width = @as(c_x11.png_uint_32, @intCast(input.image.width));
766
+ png.height = @as(c_x11.png_uint_32, @intCast(input.image.height));
767
+ png.format = c_x11.PNG_FORMAT_RGBA;
768
+
769
+ const output_path_z = try std.heap.c_allocator.dupeZ(u8, input.output_path);
770
+ defer std.heap.c_allocator.free(output_path_z);
771
+
772
+ const write_result = c_x11.png_image_write_to_file(
773
+ &png,
774
+ output_path_z.ptr,
775
+ 0,
776
+ input.image.pixels.ptr,
777
+ @as(c_int, @intCast(input.image.width * 4)),
778
+ null,
779
+ );
780
+ if (write_result == 0) {
781
+ c_x11.png_image_free(&png);
782
+ return error.PngWriteFailed;
783
+ }
784
+ c_x11.png_image_free(&png);
785
+ }
786
+
787
+ pub fn click(input: ClickInput) CommandResult {
338
788
  const click_count: u32 = if (input.count) |count| blk: {
339
789
  const normalized = @as(i64, @intFromFloat(std.math.round(count)));
340
790
  if (normalized <= 0) {
@@ -347,40 +797,86 @@ pub fn click(input: ClickInput) CommandResult {
347
797
  return failCommand("click", "INVALID_INPUT", "invalid click button");
348
798
  };
349
799
 
350
- const point: c.CGPoint = .{
351
- .x = input.point.x,
352
- .y = input.point.y,
353
- };
800
+ switch (builtin.target.os.tag) {
801
+ .macos => {
802
+ const point: c.CGPoint = .{
803
+ .x = input.point.x,
804
+ .y = input.point.y,
805
+ };
354
806
 
355
- var index: u32 = 0;
356
- while (index < click_count) : (index += 1) {
357
- const click_state = @as(i64, @intCast(index + 1));
358
- postClickPair(point, button_kind, click_state) catch {
359
- return failCommand("click", "EVENT_POST_FAILED", "failed to post click event");
360
- };
807
+ var index: u32 = 0;
808
+ while (index < click_count) : (index += 1) {
809
+ const click_state = @as(i64, @intCast(index + 1));
810
+ postClickPair(point, button_kind, click_state) catch {
811
+ return failCommand("click", "EVENT_POST_FAILED", "failed to post click event");
812
+ };
361
813
 
362
- if (index + 1 < click_count) {
363
- std.Thread.sleep(80 * std.time.ns_per_ms);
364
- }
365
- }
814
+ if (index + 1 < click_count) {
815
+ std.Thread.sleep(80 * std.time.ns_per_ms);
816
+ }
817
+ }
366
818
 
367
- return okCommand();
819
+ return okCommand();
820
+ },
821
+ .linux => {
822
+ const display = openX11Display() catch {
823
+ return failCommand("click", "EVENT_POST_FAILED", "failed to open X11 display");
824
+ };
825
+ defer _ = c_x11.XCloseDisplay(display);
826
+
827
+ moveCursorToPointX11(.{ .x = input.point.x, .y = input.point.y }, display) catch {
828
+ return failCommand("click", "EVENT_POST_FAILED", "failed to move mouse cursor");
829
+ };
830
+
831
+ var index: u32 = 0;
832
+ while (index < click_count) : (index += 1) {
833
+ postClickPairX11(.{ .x = input.point.x, .y = input.point.y }, button_kind, display) catch {
834
+ return failCommand("click", "EVENT_POST_FAILED", "failed to post click event");
835
+ };
836
+
837
+ if (index + 1 < click_count) {
838
+ std.Thread.sleep(80 * std.time.ns_per_ms);
839
+ }
840
+ }
841
+
842
+ _ = c_x11.XFlush(display);
843
+ return okCommand();
844
+ },
845
+ else => {
846
+ return failCommand("click", "UNSUPPORTED_PLATFORM", "click is unsupported on this platform");
847
+ },
848
+ }
368
849
  }
369
850
 
370
851
  pub fn mouseMove(input: MouseMoveInput) CommandResult {
371
- if (builtin.target.os.tag != .macos) {
372
- return failCommand("mouse-move", "UNSUPPORTED_PLATFORM", "mouse-move is only supported on macOS");
373
- }
852
+ switch (builtin.target.os.tag) {
853
+ .macos => {
854
+ const point: c.CGPoint = .{
855
+ .x = input.x,
856
+ .y = input.y,
857
+ };
858
+ moveCursorToPoint(point) catch {
859
+ return failCommand("mouse-move", "EVENT_POST_FAILED", "failed to move mouse cursor");
860
+ };
374
861
 
375
- const point: c.CGPoint = .{
376
- .x = input.x,
377
- .y = input.y,
378
- };
379
- moveCursorToPoint(point) catch {
380
- return failCommand("mouse-move", "EVENT_POST_FAILED", "failed to move mouse cursor");
381
- };
862
+ return okCommand();
863
+ },
864
+ .linux => {
865
+ const display = openX11Display() catch {
866
+ return failCommand("mouse-move", "EVENT_POST_FAILED", "failed to open X11 display");
867
+ };
868
+ defer _ = c_x11.XCloseDisplay(display);
382
869
 
383
- return okCommand();
870
+ moveCursorToPointX11(.{ .x = input.x, .y = input.y }, display) catch {
871
+ return failCommand("mouse-move", "EVENT_POST_FAILED", "failed to move mouse cursor");
872
+ };
873
+ _ = c_x11.XFlush(display);
874
+ return okCommand();
875
+ },
876
+ else => {
877
+ return failCommand("mouse-move", "UNSUPPORTED_PLATFORM", "mouse-move is unsupported on this platform");
878
+ },
879
+ }
384
880
  }
385
881
 
386
882
  pub fn mouseDown(input: MouseButtonInput) CommandResult {
@@ -395,35 +891,66 @@ fn handleMouseButtonInput(args: struct {
395
891
  input: MouseButtonInput,
396
892
  is_down: bool,
397
893
  }) CommandResult {
398
- if (builtin.target.os.tag != .macos) {
399
- return failCommand("mouse-button", "UNSUPPORTED_PLATFORM", "mouse button events are only supported on macOS");
400
- }
401
-
402
894
  const button_kind = resolveMouseButton(args.input.button orelse "left") catch {
403
895
  return failCommand("mouse-button", "INVALID_INPUT", "invalid mouse button");
404
896
  };
405
897
 
406
- const point = currentCursorPoint() catch {
407
- return failCommand("mouse-button", "CURSOR_READ_FAILED", "failed to read cursor position");
408
- };
898
+ switch (builtin.target.os.tag) {
899
+ .macos => {
900
+ const point = currentCursorPoint() catch {
901
+ return failCommand("mouse-button", "CURSOR_READ_FAILED", "failed to read cursor position");
902
+ };
409
903
 
410
- postMouseButtonEvent(point, button_kind, args.is_down, 1) catch {
411
- return failCommand("mouse-button", "EVENT_POST_FAILED", "failed to post mouse button event");
412
- };
904
+ postMouseButtonEvent(point, button_kind, args.is_down, 1) catch {
905
+ return failCommand("mouse-button", "EVENT_POST_FAILED", "failed to post mouse button event");
906
+ };
413
907
 
414
- return okCommand();
908
+ return okCommand();
909
+ },
910
+ .linux => {
911
+ const display = openX11Display() catch {
912
+ return failCommand("mouse-button", "EVENT_POST_FAILED", "failed to open X11 display");
913
+ };
914
+ defer _ = c_x11.XCloseDisplay(display);
915
+
916
+ postMouseButtonEventX11(button_kind, args.is_down, display) catch {
917
+ return failCommand("mouse-button", "EVENT_POST_FAILED", "failed to post mouse button event");
918
+ };
919
+ _ = c_x11.XFlush(display);
920
+
921
+ return okCommand();
922
+ },
923
+ else => {
924
+ return failCommand("mouse-button", "UNSUPPORTED_PLATFORM", "mouse button events are unsupported on this platform");
925
+ },
926
+ }
415
927
  }
416
928
 
417
929
  pub fn mousePosition() DataResult(Point) {
418
- if (builtin.target.os.tag != .macos) {
419
- return failData(Point, "mouse-position", "UNSUPPORTED_PLATFORM", "mouse-position is only supported on macOS");
420
- }
930
+ switch (builtin.target.os.tag) {
931
+ .macos => {
932
+ const point = currentCursorPoint() catch {
933
+ return failData(Point, "mouse-position", "CURSOR_READ_FAILED", "failed to read cursor position");
934
+ };
421
935
 
422
- const point = currentCursorPoint() catch {
423
- return failData(Point, "mouse-position", "CURSOR_READ_FAILED", "failed to read cursor position");
424
- };
936
+ return okData(Point, .{ .x = std.math.round(point.x), .y = std.math.round(point.y) });
937
+ },
938
+ .linux => {
939
+ const display = openX11Display() catch {
940
+ return failData(Point, "mouse-position", "EVENT_POST_FAILED", "failed to open X11 display");
941
+ };
942
+ defer _ = c_x11.XCloseDisplay(display);
943
+
944
+ const point = currentCursorPointX11(display) catch {
945
+ return failData(Point, "mouse-position", "CURSOR_READ_FAILED", "failed to read cursor position");
946
+ };
425
947
 
426
- return okData(Point, .{ .x = std.math.round(point.x), .y = std.math.round(point.y) });
948
+ return okData(Point, .{ .x = @floatFromInt(point.x), .y = @floatFromInt(point.y) });
949
+ },
950
+ else => {
951
+ return failData(Point, "mouse-position", "UNSUPPORTED_PLATFORM", "mouse-position is unsupported on this platform");
952
+ },
953
+ }
427
954
  }
428
955
 
429
956
  pub fn hover(input: Point) CommandResult {
@@ -431,25 +958,9 @@ pub fn hover(input: Point) CommandResult {
431
958
  }
432
959
 
433
960
  pub fn drag(input: DragInput) CommandResult {
434
- if (builtin.target.os.tag != .macos) {
435
- return failCommand("drag", "UNSUPPORTED_PLATFORM", "drag is only supported on macOS");
436
- }
437
-
438
961
  const button_kind = resolveMouseButton(input.button orelse "left") catch {
439
962
  return failCommand("drag", "INVALID_INPUT", "invalid drag button");
440
963
  };
441
-
442
- const from: c.CGPoint = .{ .x = input.from.x, .y = input.from.y };
443
- const to: c.CGPoint = .{ .x = input.to.x, .y = input.to.y };
444
-
445
- moveCursorToPoint(from) catch {
446
- return failCommand("drag", "EVENT_POST_FAILED", "failed to move cursor to drag origin");
447
- };
448
-
449
- postMouseButtonEvent(from, button_kind, true, 1) catch {
450
- return failCommand("drag", "EVENT_POST_FAILED", "failed to post drag mouse-down");
451
- };
452
-
453
964
  const duration_ms = if (input.durationMs) |value| blk: {
454
965
  const normalized = @as(i64, @intFromFloat(std.math.round(value)));
455
966
  if (normalized <= 0) {
@@ -461,33 +972,152 @@ pub fn drag(input: DragInput) CommandResult {
461
972
  const step_count: u64 = 16;
462
973
  const step_duration_ns = if (step_count == 0) 0 else total_duration_ns / step_count;
463
974
 
464
- var index: u64 = 1;
465
- while (index <= step_count) : (index += 1) {
466
- const fraction = @as(f64, @floatFromInt(index)) / @as(f64, @floatFromInt(step_count));
467
- const next_point: c.CGPoint = .{
468
- .x = from.x + (to.x - from.x) * fraction,
469
- .y = from.y + (to.y - from.y) * fraction,
975
+ switch (builtin.target.os.tag) {
976
+ .macos => {
977
+ const from: c.CGPoint = .{ .x = input.from.x, .y = input.from.y };
978
+ const to: c.CGPoint = .{ .x = input.to.x, .y = input.to.y };
979
+
980
+ moveCursorToPoint(from) catch {
981
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to move cursor to drag origin");
982
+ };
983
+
984
+ postMouseButtonEvent(from, button_kind, true, 1) catch {
985
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to post drag mouse-down");
986
+ };
987
+
988
+ var index: u64 = 1;
989
+ while (index <= step_count) : (index += 1) {
990
+ const fraction = @as(f64, @floatFromInt(index)) / @as(f64, @floatFromInt(step_count));
991
+ const next_point: c.CGPoint = .{
992
+ .x = from.x + (to.x - from.x) * fraction,
993
+ .y = from.y + (to.y - from.y) * fraction,
994
+ };
995
+
996
+ moveCursorToPoint(next_point) catch {
997
+ return failCommand("drag", "EVENT_POST_FAILED", "failed during drag cursor movement");
998
+ };
999
+
1000
+ if (step_duration_ns > 0 and index < step_count) {
1001
+ std.Thread.sleep(step_duration_ns);
1002
+ }
1003
+ }
1004
+
1005
+ postMouseButtonEvent(to, button_kind, false, 1) catch {
1006
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to post drag mouse-up");
1007
+ };
1008
+
1009
+ return okCommand();
1010
+ },
1011
+ .linux => {
1012
+ const display = openX11Display() catch {
1013
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to open X11 display");
1014
+ };
1015
+ defer _ = c_x11.XCloseDisplay(display);
1016
+
1017
+ moveCursorToPointX11(.{ .x = input.from.x, .y = input.from.y }, display) catch {
1018
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to move cursor to drag origin");
1019
+ };
1020
+
1021
+ postMouseButtonEventX11(button_kind, true, display) catch {
1022
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to post drag mouse-down");
1023
+ };
1024
+
1025
+ var index: u64 = 1;
1026
+ while (index <= step_count) : (index += 1) {
1027
+ const fraction = @as(f64, @floatFromInt(index)) / @as(f64, @floatFromInt(step_count));
1028
+ const next_point = Point{
1029
+ .x = input.from.x + (input.to.x - input.from.x) * fraction,
1030
+ .y = input.from.y + (input.to.y - input.from.y) * fraction,
1031
+ };
1032
+
1033
+ moveCursorToPointX11(next_point, display) catch {
1034
+ return failCommand("drag", "EVENT_POST_FAILED", "failed during drag cursor movement");
1035
+ };
1036
+
1037
+ if (step_duration_ns > 0 and index < step_count) {
1038
+ std.Thread.sleep(step_duration_ns);
1039
+ }
1040
+ }
1041
+
1042
+ postMouseButtonEventX11(button_kind, false, display) catch {
1043
+ return failCommand("drag", "EVENT_POST_FAILED", "failed to post drag mouse-up");
1044
+ };
1045
+ _ = c_x11.XFlush(display);
1046
+
1047
+ return okCommand();
1048
+ },
1049
+ else => {
1050
+ return failCommand("drag", "UNSUPPORTED_PLATFORM", "drag is unsupported on this platform");
1051
+ },
1052
+ }
1053
+ }
1054
+
1055
+ pub fn displayList() DataResult([]const u8) {
1056
+ if (builtin.target.os.tag == .linux) {
1057
+ const display = openX11Display() catch {
1058
+ return failData([]const u8, "display-list", "DISPLAY_QUERY_FAILED", "failed to open X11 display");
470
1059
  };
1060
+ defer _ = c_x11.XCloseDisplay(display);
471
1061
 
472
- moveCursorToPoint(next_point) catch {
473
- return failCommand("drag", "EVENT_POST_FAILED", "failed during drag cursor movement");
1062
+ const screen_count: usize = @intCast(c_x11.XScreenCount(display));
1063
+ if (screen_count == 0) {
1064
+ return failData([]const u8, "display-list", "DISPLAY_QUERY_FAILED", "failed to query active displays");
1065
+ }
1066
+
1067
+ const primary_screen = c_x11.XDefaultScreen(display);
1068
+
1069
+ var write_buffer: [32 * 1024]u8 = undefined;
1070
+ var stream = std.io.fixedBufferStream(&write_buffer);
1071
+ const writer = stream.writer();
1072
+
1073
+ writer.writeByte('[') catch {
1074
+ return failData([]const u8, "display-list", "SERIALIZE_FAILED", "failed to serialize display list");
474
1075
  };
475
1076
 
476
- if (step_duration_ns > 0 and index < step_count) {
477
- std.Thread.sleep(step_duration_ns);
1077
+ var i: usize = 0;
1078
+ while (i < screen_count) : (i += 1) {
1079
+ if (i > 0) {
1080
+ writer.writeByte(',') catch {
1081
+ return failData([]const u8, "display-list", "SERIALIZE_FAILED", "failed to serialize display list");
1082
+ };
1083
+ }
1084
+
1085
+ var name_buffer: [64]u8 = undefined;
1086
+ const display_name = std.fmt.bufPrint(&name_buffer, "Display {d}", .{i}) catch "Display";
1087
+ const screen_index: c_int = @intCast(i);
1088
+ const root = c_x11.XRootWindow(display, screen_index);
1089
+ const width = c_x11.XDisplayWidth(display, screen_index);
1090
+ const height = c_x11.XDisplayHeight(display, screen_index);
1091
+
1092
+ const item = DisplayInfoOutput{
1093
+ .id = @as(u32, @truncate(@as(u64, @intCast(root)))),
1094
+ .index = @intCast(i),
1095
+ .name = display_name,
1096
+ .x = 0,
1097
+ .y = 0,
1098
+ .width = @floatFromInt(width),
1099
+ .height = @floatFromInt(height),
1100
+ .scale = 1,
1101
+ .isPrimary = screen_index == primary_screen,
1102
+ };
1103
+
1104
+ writer.print("{f}", .{std.json.fmt(item, .{})}) catch {
1105
+ return failData([]const u8, "display-list", "SERIALIZE_FAILED", "failed to serialize display list");
1106
+ };
478
1107
  }
479
- }
480
1108
 
481
- postMouseButtonEvent(to, button_kind, false, 1) catch {
482
- return failCommand("drag", "EVENT_POST_FAILED", "failed to post drag mouse-up");
483
- };
1109
+ writer.writeByte(']') catch {
1110
+ return failData([]const u8, "display-list", "SERIALIZE_FAILED", "failed to serialize display list");
1111
+ };
484
1112
 
485
- return okCommand();
486
- }
1113
+ const payload = std.heap.c_allocator.dupe(u8, stream.getWritten()) catch {
1114
+ return failData([]const u8, "display-list", "ALLOC_FAILED", "failed to allocate display list response");
1115
+ };
1116
+ return okData([]const u8, payload);
1117
+ }
487
1118
 
488
- pub fn displayList() DataResult([]const u8) {
489
1119
  if (builtin.target.os.tag != .macos) {
490
- return failData([]const u8, "display-list", "UNSUPPORTED_PLATFORM", "display-list is only supported on macOS");
1120
+ return failData([]const u8, "display-list", "UNSUPPORTED_PLATFORM", "display-list is unsupported on this platform");
491
1121
  }
492
1122
 
493
1123
  var display_ids: [16]c.CGDirectDisplayID = undefined;
@@ -1458,6 +2088,81 @@ fn moveCursorToPoint(point: c.CGPoint) !void {
1458
2088
  c.CGEventPost(c.kCGHIDEventTap, move_event);
1459
2089
  }
1460
2090
 
2091
+ fn openX11Display() !*c_x11.Display {
2092
+ if (builtin.target.os.tag != .linux) {
2093
+ return error.UnsupportedPlatform;
2094
+ }
2095
+ return c_x11.XOpenDisplay(null) orelse error.XOpenDisplayFailed;
2096
+ }
2097
+
2098
+ fn resolveX11ButtonCode(button: MouseButtonKind) c_uint {
2099
+ return switch (button) {
2100
+ .left => 1,
2101
+ .middle => 2,
2102
+ .right => 3,
2103
+ };
2104
+ }
2105
+
2106
+ fn normalizedCoordinate(value: f64) !c_int {
2107
+ if (!std.math.isFinite(value)) {
2108
+ return error.InvalidPoint;
2109
+ }
2110
+ const rounded = @as(i64, @intFromFloat(std.math.round(value)));
2111
+ if (rounded < std.math.minInt(c_int) or rounded > std.math.maxInt(c_int)) {
2112
+ return error.InvalidPoint;
2113
+ }
2114
+ return @as(c_int, @intCast(rounded));
2115
+ }
2116
+
2117
+ fn moveCursorToPointX11(point: Point, display: *c_x11.Display) !void {
2118
+ const x = try normalizedCoordinate(point.x);
2119
+ const y = try normalizedCoordinate(point.y);
2120
+ _ = c_x11.XWarpPointer(display, 0, c_x11.XDefaultRootWindow(display), 0, 0, 0, 0, x, y);
2121
+ }
2122
+
2123
+ fn postMouseButtonEventX11(button: MouseButtonKind, is_down: bool, display: *c_x11.Display) !void {
2124
+ const button_code = resolveX11ButtonCode(button);
2125
+ const press_state: c_int = if (is_down) c_x11.True else c_x11.False;
2126
+ const posted = c_x11.XTestFakeButtonEvent(display, button_code, press_state, c_x11.CurrentTime);
2127
+ if (posted == 0) {
2128
+ return error.EventPostFailed;
2129
+ }
2130
+ }
2131
+
2132
+ fn postClickPairX11(point: Point, button: MouseButtonKind, display: *c_x11.Display) !void {
2133
+ try moveCursorToPointX11(point, display);
2134
+ try postMouseButtonEventX11(button, true, display);
2135
+ try postMouseButtonEventX11(button, false, display);
2136
+ }
2137
+
2138
+ fn currentCursorPointX11(display: *c_x11.Display) !struct { x: c_int, y: c_int } {
2139
+ const root_window = c_x11.XDefaultRootWindow(display);
2140
+ var root_return: c_x11.Window = 0;
2141
+ var child_return: c_x11.Window = 0;
2142
+ var root_x: c_int = 0;
2143
+ var root_y: c_int = 0;
2144
+ var win_x: c_int = 0;
2145
+ var win_y: c_int = 0;
2146
+ var mask_return: c_uint = 0;
2147
+
2148
+ const ok = c_x11.XQueryPointer(
2149
+ display,
2150
+ root_window,
2151
+ &root_return,
2152
+ &child_return,
2153
+ &root_x,
2154
+ &root_y,
2155
+ &win_x,
2156
+ &win_y,
2157
+ &mask_return,
2158
+ );
2159
+ if (ok == 0) {
2160
+ return error.CursorReadFailed;
2161
+ }
2162
+
2163
+ return .{ .x = root_x, .y = root_y };
2164
+ }
2165
+
1461
2166
  fn initModule(js: *napigen.JsContext, exports: napigen.napi_value) !napigen.napi_value {
1462
2167
  try js.setNamedProperty(exports, "screenshot", try js.createFunction(screenshot));
1463
2168
  try js.setNamedProperty(exports, "click", try js.createFunction(click));
@@ -1478,7 +2183,7 @@ fn initModule(js: *napigen.JsContext, exports: napigen.napi_value) !napigen.napi
1478
2183
  }
1479
2184
 
1480
2185
  comptime {
1481
- if (!builtin.is_test) {
2186
+ if (build_options.enable_napigen) {
1482
2187
  napigen.defineModule(initModule);
1483
2188
  }
1484
2189
  }