usecomputer 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,43 @@
4
4
 
5
5
  All notable changes to `usecomputer` will be documented in this file.
6
6
 
7
+ ## 0.1.3
8
+
9
+ 1. **Kitty Graphics Protocol support** — `screenshot` can now emit the PNG image
10
+ inline to stdout using the [Kitty Graphics Protocol](https://sw.kovidgoyal.net/kitty/graphics-protocol/).
11
+ Set `AGENT_GRAPHICS=kitty` and the image lands directly in the AI model's context
12
+ window — no separate file-read tool call needed:
13
+
14
+ ```bash
15
+ AGENT_GRAPHICS=kitty usecomputer screenshot ./shot.png --json
16
+ # JSON output: { ..., "agentGraphics": true }
17
+ ```
18
+
19
+ Works out of the box with [kitty-graphics-agent](https://github.com/remorses/kitty-graphics-agent),
20
+ an OpenCode plugin that intercepts the escape sequences and injects them as
21
+ LLM-visible image attachments. Add it to `opencode.json` to enable:
22
+
23
+ ```json
24
+ { "plugin": ["kitty-graphics-agent"] }
25
+ ```
26
+
27
+ The plugin sets `AGENT_GRAPHICS=kitty` automatically. `agentGraphics` in the
28
+ JSON output is `true` only when emission actually succeeded.
29
+
30
+ 2. **Aligned table output for list commands** — `display list`, `window list`, and
31
+ `desktop list` now render as aligned, human-readable tables (matching the format
32
+ the old TypeScript CLI produced). JSON mode (`--json`) is unchanged:
33
+
34
+ ```
35
+ desktop primary size position id scale name
36
+ 0 yes 3440x1440 0,0 5 1 Display 5
37
+ 1 no 1512x982 3440,458 1 1 Display 1
38
+ ```
39
+
40
+ 3. **Fixed `agentGraphics` JSON field** — the field now reflects actual Kitty
41
+ emission success rather than just whether `AGENT_GRAPHICS=kitty` was set.
42
+ Empty PNG files and I/O errors report `false` instead of `true`.
43
+
7
44
  ## 0.1.2
8
45
 
9
46
  1. **Removed all unimplemented command stubs** — 18 placeholder commands (`snapshot`, `get text/title/value/bounds/focused`, `window focus/resize/move/minimize/maximize/close`, `app list/launch/quit`, `wait`, `find`, `diff snapshot/screenshot`) that only threw "TODO not implemented" have been removed. The CLI now only exposes commands that actually work.
package/README.md CHANGED
@@ -2,18 +2,12 @@
2
2
 
3
3
  # usecomputer
4
4
 
5
- `usecomputer` is a macOS desktop automation CLI for AI agents.
5
+ `usecomputer` is a desktop automation CLI for AI agents. It works on macOS and
6
+ Linux (X11).
6
7
 
7
- It can move the mouse, click, drag, and query cursor position using native
8
- Quartz events through a Zig N-API module.
9
-
10
- Keyboard synthesis (`type` and `press`) is also available. The native backend
11
- includes platform-specific key injection paths for macOS, Windows, and Linux
12
- X11.
13
-
14
- The package also exports the native commands as plain library functions, so you
15
- can `import * as usecomputer from "usecomputer"` and reuse the same screenshot,
16
- mouse, keyboard, and coord-map behavior from Node.js.
8
+ Screenshot, mouse control (move, click, drag, scroll), and keyboard synthesis
9
+ (`type` and `press`) are all available as CLI commands backed by a native Zig
10
+ binary — no Node.js runtime required.
17
11
 
18
12
  ## Install
19
13
 
@@ -23,8 +17,8 @@ npm install -g usecomputer
23
17
 
24
18
  ## Requirements
25
19
 
26
- - macOS (Darwin)
27
- - Accessibility permission enabled for your terminal app
20
+ - **macOS** — Accessibility permission enabled for your terminal app
21
+ - **Linux** X11 session with `DISPLAY` set (Wayland via XWayland works too)
28
22
 
29
23
  ## Quick start
30
24
 
@@ -303,6 +297,32 @@ targeting:
303
297
  usecomputer debug-point -x 400 -y 220 --coord-map "0,0,1600,900,1568,882"
304
298
  ```
305
299
 
300
+ ## Kitty Graphics Protocol (agent-friendly screenshots)
301
+
302
+ When the `AGENT_GRAPHICS` environment variable contains `kitty`, the
303
+ `screenshot` command emits the PNG image inline to stdout using the
304
+ [Kitty Graphics Protocol](https://sw.kovidgoyal.net/kitty/graphics-protocol/).
305
+ This lets AI agents receive screenshots in a single tool call — no separate
306
+ file read needed.
307
+
308
+ The protocol is supported by [kitty-graphics-agent](https://github.com/remorses/kitty-graphics-agent),
309
+ an OpenCode plugin that intercepts Kitty Graphics escape sequences from CLI
310
+ output and injects them as LLM-visible image attachments. To use it, add the
311
+ plugin to your `opencode.json`:
312
+
313
+ ```json
314
+ {
315
+ "plugin": ["kitty-graphics-agent"]
316
+ }
317
+ ```
318
+
319
+ The plugin sets `AGENT_GRAPHICS=kitty` in the shell environment automatically.
320
+ When the agent runs `usecomputer screenshot`, the image appears directly in the
321
+ model's context window.
322
+
323
+ The JSON output includes `"agentGraphics": true` when the image was emitted
324
+ inline, so programmatic consumers know the screenshot is already in context.
325
+
306
326
  ## Keyboard commands
307
327
 
308
328
  ### Type text
package/bin.sh ADDED
@@ -0,0 +1,49 @@
1
+ #!/bin/sh
2
+ # Shell launcher for usecomputer — runs the native Zig binary for the current platform.
3
+
4
+ set -e
5
+
6
+ # Resolve the real directory where this script lives (follows symlinks)
7
+ SCRIPT="$0"
8
+ while [ -L "$SCRIPT" ]; do
9
+ SCRIPT_DIR="$(cd "$(dirname "$SCRIPT")" && pwd)"
10
+ SCRIPT="$(readlink "$SCRIPT")"
11
+ # Handle relative symlink targets
12
+ case "$SCRIPT" in
13
+ /*) ;;
14
+ *) SCRIPT="$SCRIPT_DIR/$SCRIPT" ;;
15
+ esac
16
+ done
17
+ SCRIPT_DIR="$(cd "$(dirname "$SCRIPT")" && pwd)"
18
+
19
+ # Detect platform and architecture
20
+ OS="$(uname -s)"
21
+ ARCH="$(uname -m)"
22
+
23
+ case "$OS" in
24
+ Darwin) PLATFORM="darwin" ;;
25
+ Linux) PLATFORM="linux" ;;
26
+ MINGW*|MSYS*|CYGWIN*) PLATFORM="win32" ;;
27
+ *) echo "error: unsupported platform: $OS" >&2; exit 1 ;;
28
+ esac
29
+
30
+ case "$ARCH" in
31
+ arm64|aarch64) ARCH_NAME="arm64" ;;
32
+ x86_64|amd64) ARCH_NAME="x64" ;;
33
+ *) echo "error: unsupported architecture: $ARCH" >&2; exit 1 ;;
34
+ esac
35
+
36
+ TARGET="${PLATFORM}-${ARCH_NAME}"
37
+ NATIVE_BIN="${SCRIPT_DIR}/dist/${TARGET}/usecomputer"
38
+
39
+ if [ "$PLATFORM" = "win32" ]; then
40
+ NATIVE_BIN="${NATIVE_BIN}.exe"
41
+ fi
42
+
43
+ if [ ! -x "$NATIVE_BIN" ]; then
44
+ echo "error: native binary not found at ${NATIVE_BIN}" >&2
45
+ echo "hint: run 'zig build' or install from npm to get prebuilt binaries" >&2
46
+ exit 1
47
+ fi
48
+
49
+ exec "$NATIVE_BIN" "$@"
package/build.zig CHANGED
@@ -139,4 +139,16 @@ pub fn build(b: *std.Build) void {
139
139
  linkPlatformDeps(test_exe.root_module, target_os);
140
140
  const run_test = b.addRunArtifact(test_exe);
141
141
  test_step.dependOn(&run_test.step);
142
+
143
+ // Kitty graphics protocol tests
144
+ const kitty_test_mod = b.createModule(.{
145
+ .root_source_file = b.path("zig/src/kitty-graphics.zig"),
146
+ .target = target,
147
+ .optimize = optimize,
148
+ });
149
+ const kitty_test_exe = b.addTest(.{
150
+ .root_module = kitty_test_mod,
151
+ });
152
+ const run_kitty_test = b.addRunArtifact(kitty_test_exe);
153
+ test_step.dependOn(&run_kitty_test.step);
142
154
  }
Binary file
Binary file
Binary file
Binary file
package/dist/index.d.ts CHANGED
@@ -1,7 +1,5 @@
1
- export { createCli } from './cli.js';
2
1
  export { createBridge, createBridgeFromNative } from './bridge.js';
3
2
  export * from './lib.js';
4
3
  export * from './coord-map.js';
5
4
  export * from './types.js';
6
- export * from './command-parsers.js';
7
5
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAA;AACpC,OAAO,EAAE,YAAY,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAA;AAClE,cAAc,UAAU,CAAA;AACxB,cAAc,gBAAgB,CAAA;AAC9B,cAAc,YAAY,CAAA;AAC1B,cAAc,sBAAsB,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,YAAY,EAAE,sBAAsB,EAAE,MAAM,aAAa,CAAA;AAClE,cAAc,UAAU,CAAA;AACxB,cAAc,gBAAgB,CAAA;AAC9B,cAAc,YAAY,CAAA"}
package/dist/index.js CHANGED
@@ -1,7 +1,5 @@
1
- // Public API exports for usecomputer library helpers, parser, bridge, and CLI modules.
2
- export { createCli } from './cli.js';
1
+ // Public API exports for usecomputer library helpers, bridge, and coord-map modules.
3
2
  export { createBridge, createBridgeFromNative } from './bridge.js';
4
3
  export * from './lib.js';
5
4
  export * from './coord-map.js';
6
5
  export * from './types.js';
7
- export * from './command-parsers.js';
package/package.json CHANGED
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "name": "usecomputer",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "type": "module",
5
5
  "description": "Fast computer automation CLI for AI agents. Control any desktop with accessibility snapshots, clicks, typing, scrolling, and more.",
6
- "bin": "./bin.js",
6
+ "bin": "./bin.sh",
7
7
  "main": "./dist/index.js",
8
8
  "types": "./dist/index.d.ts",
9
9
  "exports": {
@@ -35,20 +35,10 @@
35
35
  "zig",
36
36
  "build.zig",
37
37
  "build.zig.zon",
38
- "bin.js",
38
+ "bin.sh",
39
39
  "README.md",
40
40
  "CHANGELOG.md"
41
41
  ],
42
- "scripts": {
43
- "build": "tsc && chmod +x bin.js",
44
- "build:zig": "zig build",
45
- "build:native": "tsx scripts/build.ts",
46
- "build:native:macos": "tsx scripts/build.ts darwin-arm64 darwin-x64",
47
- "vm": "tsx scripts/vm.ts",
48
- "test": "vitest --run",
49
- "typecheck": "tsc --noEmit",
50
- "prepublishOnly": "[ -n \"$CI\" ] || (pnpm build && pnpm build:native:macos)"
51
- },
52
42
  "keywords": [
53
43
  "computer-use",
54
44
  "automation",
@@ -65,21 +55,17 @@
65
55
  "license": "MIT",
66
56
  "repository": {
67
57
  "type": "git",
68
- "url": "git+https://github.com/remorses/kimaki.git",
69
- "directory": "usecomputer"
58
+ "url": "git+https://github.com/remorses/usecomputer.git"
70
59
  },
71
- "homepage": "https://github.com/remorses/kimaki/tree/main/usecomputer",
60
+ "homepage": "https://github.com/remorses/usecomputer",
72
61
  "bugs": {
73
- "url": "https://github.com/remorses/kimaki/issues"
62
+ "url": "https://github.com/remorses/usecomputer/issues"
74
63
  },
75
64
  "os": [
76
65
  "darwin",
77
66
  "linux"
78
67
  ],
79
68
  "dependencies": {
80
- "goke": "^6.3.0",
81
- "picocolors": "^1.1.1",
82
- "string-dedent": "^3.0.1",
83
69
  "zod": "^4.3.6"
84
70
  },
85
71
  "devDependencies": {
@@ -88,7 +74,13 @@
88
74
  "typescript": "^5.8.3",
89
75
  "vitest": "^4.0.18"
90
76
  },
91
- "optionalDependencies": {
92
- "sharp": "^0.34.5"
77
+ "scripts": {
78
+ "build": "tsc && chmod +x bin.sh",
79
+ "build:zig": "zig build",
80
+ "build:native": "tsx scripts/build.ts",
81
+ "build:native:macos": "tsx scripts/build.ts darwin-arm64 darwin-x64",
82
+ "vm": "tsx scripts/vm.ts",
83
+ "test": "vitest --run",
84
+ "typecheck": "tsc --noEmit"
93
85
  }
94
- }
86
+ }
package/src/index.ts CHANGED
@@ -1,8 +1,6 @@
1
- // Public API exports for usecomputer library helpers, parser, bridge, and CLI modules.
1
+ // Public API exports for usecomputer library helpers, bridge, and coord-map modules.
2
2
 
3
- export { createCli } from './cli.js'
4
3
  export { createBridge, createBridgeFromNative } from './bridge.js'
5
4
  export * from './lib.js'
6
5
  export * from './coord-map.js'
7
6
  export * from './types.js'
8
- export * from './command-parsers.js'
@@ -0,0 +1,151 @@
1
+ // Kitty Graphics Protocol emission for usecomputer screenshot output.
2
+ //
3
+ // When AGENT_GRAPHICS=kitty is set in the environment, CLI tools can emit
4
+ // images inline to stdout via APC escape sequences. An agent plugin
5
+ // (like kitty-graphics-agent) intercepts these sequences, strips them from
6
+ // the text output, and injects the images as LLM-visible attachments.
7
+ //
8
+ // Protocol format:
9
+ // \x1b_G<control_data>;<base64_payload>\x1b\\
10
+ //
11
+ // Large images are chunked: continuation chunks use m=1, the last chunk
12
+ // uses m=0. Chunk size is 4096 bytes of base64 data (per spec convention).
13
+ //
14
+ // Reference: https://sw.kovidgoyal.net/kitty/graphics-protocol/
15
+ // Agent spec: https://github.com/remorses/kitty-graphics-agent
16
+
17
+ const std = @import("std");
18
+
19
+ const base64_alphabet: [64]u8 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".*;
20
+ const base64_encoder = std.base64.Base64Encoder.init(base64_alphabet, '=');
21
+
22
+ /// Max base64 characters per kitty graphics chunk (spec convention).
23
+ const chunk_size: usize = 4096;
24
+
25
+ /// Check whether a value string contains "kitty".
26
+ /// Extracted for deterministic testing without env var manipulation.
27
+ pub fn containsKitty(val: []const u8) bool {
28
+ return std.mem.indexOf(u8, val, "kitty") != null;
29
+ }
30
+
31
+ /// Check whether the AGENT_GRAPHICS environment variable contains "kitty".
32
+ /// CLIs should call this to decide whether to emit kitty graphics on stdout.
33
+ /// Uses std.process.getEnvVarOwned for cross-platform compatibility (works on
34
+ /// macOS, Linux, and Windows — std.posix.getenv is unavailable on Windows).
35
+ pub fn canEmitAgentGraphics() bool {
36
+ const alloc = std.heap.page_allocator;
37
+ const val = std.process.getEnvVarOwned(alloc, "AGENT_GRAPHICS") catch return false;
38
+ defer alloc.free(val);
39
+ return containsKitty(val);
40
+ }
41
+
42
+ /// Emit a PNG image as Kitty Graphics Protocol escape sequences.
43
+ ///
44
+ /// Writes APC sequences to `writer` with f=100 (PNG), a=T (transmit+display),
45
+ /// and chunked transfer (m=1 for continuation, m=0 for last/only chunk).
46
+ ///
47
+ /// `png_data` is the raw PNG file bytes (not base64-encoded).
48
+ pub fn emitKittyGraphics(png_data: []const u8, writer: anytype) !void {
49
+ // Base64-encode the entire PNG
50
+ const encoded_len = base64_encoder.calcSize(png_data.len);
51
+
52
+ // Allocate buffer for full base64 string
53
+ const encoded_buf = try std.heap.page_allocator.alloc(u8, encoded_len);
54
+ defer std.heap.page_allocator.free(encoded_buf);
55
+
56
+ const encoded = base64_encoder.encode(encoded_buf, png_data);
57
+
58
+ // Emit chunks
59
+ var offset: usize = 0;
60
+ while (offset < encoded.len) {
61
+ const remaining = encoded.len - offset;
62
+ const this_chunk_size = @min(remaining, chunk_size);
63
+ const is_last = (offset + this_chunk_size >= encoded.len);
64
+ const chunk = encoded[offset .. offset + this_chunk_size];
65
+
66
+ if (offset == 0) {
67
+ // First (or only) chunk: include full control data
68
+ if (is_last) {
69
+ try writer.print("\x1b_Gf=100,a=T,m=0;{s}\x1b\\", .{chunk});
70
+ } else {
71
+ try writer.print("\x1b_Gf=100,a=T,m=1;{s}\x1b\\", .{chunk});
72
+ }
73
+ } else {
74
+ // Continuation chunk: only m= key (per spec)
75
+ if (is_last) {
76
+ try writer.print("\x1b_Gm=0;{s}\x1b\\", .{chunk});
77
+ } else {
78
+ try writer.print("\x1b_Gm=1;{s}\x1b\\", .{chunk});
79
+ }
80
+ }
81
+
82
+ offset += this_chunk_size;
83
+ }
84
+ }
85
+
86
+ // ─── Tests ───
87
+
88
+ test "containsKitty detects kitty in value" {
89
+ try std.testing.expect(containsKitty("kitty"));
90
+ try std.testing.expect(containsKitty("kitty,iterm2"));
91
+ try std.testing.expect(containsKitty("iterm2,kitty"));
92
+ try std.testing.expect(!containsKitty("iterm2"));
93
+ try std.testing.expect(!containsKitty(""));
94
+ try std.testing.expect(!containsKitty("KITTY")); // case-sensitive per spec
95
+ }
96
+
97
+ test "emitKittyGraphics single chunk for small image" {
98
+ var buf = std.ArrayList(u8).initCapacity(std.testing.allocator, 0) catch unreachable;
99
+ defer buf.deinit(std.testing.allocator);
100
+
101
+ // Small PNG-like data (just bytes, doesn't need to be valid PNG for emission test)
102
+ const small_data = "tiny-png-data";
103
+ try emitKittyGraphics(small_data, buf.writer(std.testing.allocator));
104
+
105
+ const output = buf.items;
106
+ // Should start with APC start
107
+ try std.testing.expect(std.mem.startsWith(u8, output, "\x1b_G"));
108
+ // Should have f=100,a=T,m=0 (single chunk = last chunk)
109
+ try std.testing.expect(std.mem.indexOf(u8, output, "f=100,a=T,m=0;") != null);
110
+ // Should end with ST
111
+ try std.testing.expect(std.mem.endsWith(u8, output, "\x1b\\"));
112
+ }
113
+
114
+ test "emitKittyGraphics multi chunk for large data" {
115
+ var buf = std.ArrayList(u8).initCapacity(std.testing.allocator, 0) catch unreachable;
116
+ defer buf.deinit(std.testing.allocator);
117
+
118
+ // Create data large enough to require multiple chunks after base64 encoding.
119
+ // 4096 base64 chars ~ 3072 raw bytes. Use 8000 raw bytes to get ~10668 base64 chars = 3 chunks.
120
+ var large_data: [8000]u8 = undefined;
121
+ for (&large_data) |*b| {
122
+ b.* = 0xAB;
123
+ }
124
+ try emitKittyGraphics(&large_data, buf.writer(std.testing.allocator));
125
+
126
+ const output = buf.items;
127
+ // First chunk should have f=100,a=T,m=1
128
+ try std.testing.expect(std.mem.indexOf(u8, output, "f=100,a=T,m=1;") != null);
129
+ // Last chunk should have m=0
130
+ // Find the last occurrence of m=0
131
+ var found_m0 = false;
132
+ var search_pos: usize = 0;
133
+ while (std.mem.indexOfPos(u8, output, search_pos, "m=0;")) |pos| {
134
+ found_m0 = true;
135
+ search_pos = pos + 1;
136
+ }
137
+ try std.testing.expect(found_m0);
138
+ // Continuation chunks should have m=1 (without f=100,a=T prefix)
139
+ // Count occurrences of \x1b_Gm=1; (continuation, no control data beyond m=)
140
+ var continuation_count: usize = 0;
141
+ var cpos: usize = 0;
142
+ while (std.mem.indexOfPos(u8, output, cpos, "\x1b_Gm=1;")) |pos| {
143
+ continuation_count += 1;
144
+ cpos = pos + 1;
145
+ }
146
+ // With ~10668 base64 chars / 4096 chunk size = 3 chunks total
147
+ // First chunk: \x1b_Gf=100,a=T,m=1; (1 occurrence)
148
+ // Middle chunk(s): \x1b_Gm=1; (at least 1)
149
+ // Last chunk: \x1b_Gm=0;
150
+ try std.testing.expect(continuation_count >= 1);
151
+ }
package/zig/src/lib.zig CHANGED
@@ -2020,6 +2020,127 @@ fn writeScreenshotPng(input: struct {
2020
2020
  }
2021
2021
  }
2022
2022
 
2023
+ /// Draw a red crosshair+circle debug marker on an existing PNG file (macOS only).
2024
+ /// Reads the PNG, draws the marker at (x, y) in image coordinates, writes back.
2025
+ pub fn drawMarkerOnPng(input: struct {
2026
+ path: []const u8,
2027
+ x: f64,
2028
+ y: f64,
2029
+ imageWidth: f64,
2030
+ imageHeight: f64,
2031
+ }) CommandResult {
2032
+ if (builtin.target.os.tag != .macos) {
2033
+ return failCommand("drawMarkerOnPng", "UNSUPPORTED_PLATFORM", "debug-point image overlay is only supported on macOS");
2034
+ }
2035
+
2036
+ // Load the existing PNG
2037
+ const path_as_u8: [*]const u8 = @ptrCast(input.path.ptr);
2038
+ const file_url = c.CFURLCreateFromFileSystemRepresentation(
2039
+ null,
2040
+ path_as_u8,
2041
+ @as(c_long, @intCast(input.path.len)),
2042
+ 0,
2043
+ );
2044
+ if (file_url == null) return failCommand("drawMarkerOnPng", "LOAD_FAILED", "failed to create file URL");
2045
+ defer c.CFRelease(file_url);
2046
+
2047
+ const source = c.CGImageSourceCreateWithURL(file_url, null);
2048
+ if (source == null) return failCommand("drawMarkerOnPng", "LOAD_FAILED", "failed to load PNG");
2049
+ defer c.CFRelease(source);
2050
+
2051
+ const image = c.CGImageSourceCreateImageAtIndex(source, 0, null);
2052
+ if (image == null) return failCommand("drawMarkerOnPng", "LOAD_FAILED", "failed to decode PNG image");
2053
+ defer c.CFRelease(image);
2054
+
2055
+ const w = c.CGImageGetWidth(image);
2056
+ const h = c.CGImageGetHeight(image);
2057
+
2058
+ // Create bitmap context
2059
+ const color_space = c.CGColorSpaceCreateDeviceRGB();
2060
+ if (color_space == null) return failCommand("drawMarkerOnPng", "DRAW_FAILED", "failed to create color space");
2061
+ defer c.CFRelease(color_space);
2062
+
2063
+ const ctx = c.CGBitmapContextCreate(
2064
+ null,
2065
+ w,
2066
+ h,
2067
+ 8,
2068
+ 0,
2069
+ color_space,
2070
+ c.kCGImageAlphaPremultipliedLast,
2071
+ );
2072
+ if (ctx == null) return failCommand("drawMarkerOnPng", "DRAW_FAILED", "failed to create bitmap context");
2073
+ defer c.CFRelease(ctx);
2074
+
2075
+ // Draw original image
2076
+ const img_rect: c.CGRect = .{
2077
+ .origin = .{ .x = 0, .y = 0 },
2078
+ .size = .{ .width = @floatFromInt(w), .height = @floatFromInt(h) },
2079
+ };
2080
+ c.CGContextDrawImage(ctx, img_rect, image);
2081
+
2082
+ // Flip Y: CGContext origin is bottom-left, marker coords are top-left
2083
+ const px = input.x;
2084
+ const py = @as(f64, @floatFromInt(h)) - input.y;
2085
+
2086
+ const pi = std.math.pi;
2087
+
2088
+ // White crosshair (background for contrast)
2089
+ c.CGContextSetRGBStrokeColor(ctx, 1, 1, 1, 0.95);
2090
+ c.CGContextSetLineWidth(ctx, 5);
2091
+ c.CGContextSetLineCap(ctx, 1); // kCGLineCapRound
2092
+ c.CGContextMoveToPoint(ctx, px - 22, py);
2093
+ c.CGContextAddLineToPoint(ctx, px + 22, py);
2094
+ c.CGContextStrokePath(ctx);
2095
+ c.CGContextMoveToPoint(ctx, px, py - 22);
2096
+ c.CGContextAddLineToPoint(ctx, px, py + 22);
2097
+ c.CGContextStrokePath(ctx);
2098
+
2099
+ // White ring
2100
+ c.CGContextSetLineWidth(ctx, 4);
2101
+ c.CGContextAddArc(ctx, px, py, 18, 0, 2 * pi, 0);
2102
+ c.CGContextStrokePath(ctx);
2103
+
2104
+ // Red crosshair
2105
+ c.CGContextSetRGBStrokeColor(ctx, 1, 0.176, 0.176, 1); // #ff2d2d
2106
+ c.CGContextSetLineWidth(ctx, 3);
2107
+ c.CGContextSetLineCap(ctx, 1);
2108
+ c.CGContextMoveToPoint(ctx, px - 22, py);
2109
+ c.CGContextAddLineToPoint(ctx, px + 22, py);
2110
+ c.CGContextStrokePath(ctx);
2111
+ c.CGContextMoveToPoint(ctx, px, py - 22);
2112
+ c.CGContextAddLineToPoint(ctx, px, py + 22);
2113
+ c.CGContextStrokePath(ctx);
2114
+
2115
+ // Red ring
2116
+ c.CGContextSetLineWidth(ctx, 2);
2117
+ c.CGContextAddArc(ctx, px, py, 18, 0, 2 * pi, 0);
2118
+ c.CGContextStrokePath(ctx);
2119
+
2120
+ // Filled red center dot with white stroke
2121
+ c.CGContextSetRGBFillColor(ctx, 1, 0.176, 0.176, 1);
2122
+ c.CGContextAddArc(ctx, px, py, 10, 0, 2 * pi, 0);
2123
+ c.CGContextFillPath(ctx);
2124
+ c.CGContextSetRGBStrokeColor(ctx, 1, 1, 1, 1);
2125
+ c.CGContextSetLineWidth(ctx, 3);
2126
+ c.CGContextAddArc(ctx, px, py, 10, 0, 2 * pi, 0);
2127
+ c.CGContextStrokePath(ctx);
2128
+
2129
+ // Get result image and write back
2130
+ const result_image = c.CGBitmapContextCreateImage(ctx);
2131
+ if (result_image == null) return failCommand("drawMarkerOnPng", "DRAW_FAILED", "failed to create result image");
2132
+ defer c.CFRelease(result_image);
2133
+
2134
+ writeScreenshotPng(.{
2135
+ .image = result_image,
2136
+ .output_path = input.path,
2137
+ }) catch {
2138
+ return failCommand("drawMarkerOnPng", "WRITE_FAILED", "failed to write annotated PNG");
2139
+ };
2140
+
2141
+ return okCommand();
2142
+ }
2143
+
2023
2144
  fn resolveMouseButton(button: []const u8) !MouseButtonKind {
2024
2145
  if (std.ascii.eqlIgnoreCase(button, "left")) {
2025
2146
  return .left;