gazectl 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # gazectl
2
+
3
+ Head tracking display focus switcher for macOS + [Aerospace](https://github.com/nikitabobko/AerospaceWM).
4
+
5
+ Uses your webcam and MediaPipe to detect which way your head is turned, then switches Aerospace monitor focus automatically.
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ npm i -g gazectl
11
+ ```
12
+
13
+ Or run directly:
14
+
15
+ ```bash
16
+ npx gazectl
17
+ ```
18
+
19
+ Requires Python 3.9+ and [Aerospace](https://github.com/nikitabobko/AerospaceWM). First run sets up a Python venv and downloads the MediaPipe model automatically.
20
+
21
+ ## Usage
22
+
23
+ ```bash
24
+ # First run — calibrates automatically
25
+ gazectl
26
+
27
+ # With verbose logging
28
+ gazectl --verbose
29
+
30
+ # Force recalibration
31
+ gazectl --calibrate
32
+ ```
33
+
34
+ On first run, gazectl asks you to look at each monitor and press Enter. It samples your head angle for 2 seconds per monitor, then saves calibration to `~/.local/share/gazectl/calibration.json`.
35
+
36
+ ## Options
37
+
38
+ | Flag | Default | Description |
39
+ |------|---------|-------------|
40
+ | `--calibrate` | off | Force recalibration |
41
+ | `--calibration-file` | `~/.local/share/gazectl/calibration.json` | Custom calibration path |
42
+ | `--camera` | 0 | Camera index |
43
+ | `--preview` | off | Show camera preview (steals focus — calibration only) |
44
+ | `--verbose` | off | Print yaw angle continuously |
45
+
46
+ ## How it works
47
+
48
+ 1. **Calibrate** — look at each monitor, gazectl records the yaw angle
49
+ 2. **Track** — MediaPipe Face Landmarker detects head yaw in real-time (~30fps)
50
+ 3. **Switch** — when yaw crosses the midpoint between calibrated angles, fires `aerospace focus-monitor`
51
+
52
+ ## License
53
+
54
+ MIT
package/bin/gazectl ADDED
@@ -0,0 +1,53 @@
1
+ #!/usr/bin/env bash
2
+ set -e
3
+
4
+ GAZECTL_DIR="${GAZECTL_HOME:-$HOME/.local/share/gazectl}"
5
+ VENV_DIR="$GAZECTL_DIR/venv"
6
+ MODEL_FILE="$GAZECTL_DIR/face_landmarker.task"
7
+ MODEL_URL="https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task"
8
+
9
+ # Find the gazectl.py script (relative to this bin script)
10
+ SCRIPT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
11
+ GAZECTL_PY="$SCRIPT_DIR/gazectl.py"
12
+ REQUIREMENTS="$SCRIPT_DIR/requirements.txt"
13
+
14
+ if [ ! -f "$GAZECTL_PY" ]; then
15
+ echo "error: gazectl.py not found at $GAZECTL_PY" >&2
16
+ exit 1
17
+ fi
18
+
19
+ # Check for python3
20
+ if ! command -v python3 &>/dev/null; then
21
+ echo "error: python3 is required but not found" >&2
22
+ echo "install: brew install python3" >&2
23
+ exit 1
24
+ fi
25
+
26
+ # Setup on first run
27
+ if [ ! -d "$VENV_DIR" ]; then
28
+ echo "gazectl: first-run setup..."
29
+ mkdir -p "$GAZECTL_DIR"
30
+
31
+ echo " creating python venv..."
32
+ python3 -m venv "$VENV_DIR"
33
+
34
+ echo " installing dependencies..."
35
+ "$VENV_DIR/bin/pip" install -q -r "$REQUIREMENTS"
36
+
37
+ echo " done."
38
+ fi
39
+
40
+ # Download model if missing
41
+ if [ ! -f "$MODEL_FILE" ]; then
42
+ echo " downloading face landmarker model..."
43
+ curl -sL -o "$MODEL_FILE" "$MODEL_URL"
44
+ echo " done."
45
+ fi
46
+
47
+ # Symlink model into script dir if needed (gazectl.py looks for it there)
48
+ SCRIPT_MODEL="$SCRIPT_DIR/face_landmarker.task"
49
+ if [ ! -f "$SCRIPT_MODEL" ] && [ ! -L "$SCRIPT_MODEL" ]; then
50
+ ln -s "$MODEL_FILE" "$SCRIPT_MODEL" 2>/dev/null || true
51
+ fi
52
+
53
+ exec "$VENV_DIR/bin/python" "$GAZECTL_PY" "$@"
package/gazectl.py ADDED
@@ -0,0 +1,411 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ gazectl - Head tracking display focus switcher for macOS + Aerospace.
4
+
5
+ Uses your webcam + MediaPipe Face Landmarker to detect head yaw direction,
6
+ then calls `aerospace focus-monitor` to switch display focus.
7
+
8
+ Calibration-based: on first run, you look at each monitor so the program
9
+ learns which yaw angle corresponds to which display.
10
+ """
11
+
12
+ import json
13
+ import os
14
+ import subprocess
15
+ import time
16
+ import argparse
17
+ import signal
18
+ import sys
19
+ import threading
20
+
21
+ import cv2
22
+ import mediapipe as mp
23
+ from mediapipe.tasks import python as mp_python
24
+ from mediapipe.tasks.python import vision
25
+ import numpy as np
26
+
27
+
28
+ # 3D model points for head pose estimation (generic face model)
29
+ MODEL_POINTS = np.array([
30
+ (0.0, 0.0, 0.0), # Nose tip
31
+ (0.0, -330.0, -65.0), # Chin
32
+ (-225.0, 170.0, -135.0), # Left eye left corner
33
+ (225.0, 170.0, -135.0), # Right eye right corner
34
+ (-150.0, -150.0, -125.0),# Left mouth corner
35
+ (150.0, -150.0, -125.0), # Right mouth corner
36
+ ], dtype=np.float64)
37
+
38
+ # MediaPipe Face Landmarker landmark indices for the 6 points above
39
+ LANDMARK_IDS = [1, 152, 263, 33, 287, 57]
40
+
41
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
42
+ SWIVL_DATA = os.environ.get("SWIVL_HOME", os.path.expanduser("~/.local/share/gazectl"))
43
+ DEFAULT_CALIBRATION_PATH = os.path.join(SWIVL_DATA, "calibration.json")
44
+
45
+
46
+ def get_head_yaw(face_landmarks, frame_w, frame_h):
47
+ """Compute head yaw angle (left/right rotation) from face landmarks."""
48
+ image_points = np.array([
49
+ (face_landmarks[i].x * frame_w, face_landmarks[i].y * frame_h)
50
+ for i in LANDMARK_IDS
51
+ ], dtype=np.float64)
52
+
53
+ focal_length = frame_w
54
+ center = (frame_w / 2, frame_h / 2)
55
+ camera_matrix = np.array([
56
+ [focal_length, 0, center[0]],
57
+ [0, focal_length, center[1]],
58
+ [0, 0, 1],
59
+ ], dtype=np.float64)
60
+ dist_coeffs = np.zeros((4, 1))
61
+
62
+ success, rotation_vec, _ = cv2.solvePnP(
63
+ MODEL_POINTS, image_points, camera_matrix, dist_coeffs,
64
+ flags=cv2.SOLVEPNP_ITERATIVE,
65
+ )
66
+ if not success:
67
+ return None
68
+
69
+ rotation_mat, _ = cv2.Rodrigues(rotation_vec)
70
+ angles, _, _, _, _, _ = cv2.RQDecomp3x3(rotation_mat)
71
+ return -angles[1]
72
+
73
+
74
+ _last_frame_ts = 0
75
+
76
+ def sample_yaw(cap, landmarker, lock, latest_landmarks, duration=2.0):
77
+ """Sample yaw values for `duration` seconds, return median."""
78
+ global _last_frame_ts
79
+ samples = []
80
+ start = time.monotonic()
81
+ frame_ts = max(_last_frame_ts, int(time.monotonic() * 1000))
82
+
83
+ while time.monotonic() - start < duration:
84
+ ret, frame = cap.read()
85
+ if not ret:
86
+ continue
87
+
88
+ frame_ts += 33
89
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
90
+ mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
91
+
92
+ try:
93
+ landmarker.detect_async(mp_image, frame_ts)
94
+ except Exception:
95
+ pass
96
+
97
+ time.sleep(0.03)
98
+
99
+ with lock:
100
+ landmarks = latest_landmarks[0]
101
+
102
+ if landmarks is not None:
103
+ h, w = frame.shape[:2]
104
+ yaw = get_head_yaw(landmarks, w, h)
105
+ if yaw is not None:
106
+ samples.append(yaw)
107
+ print(f" sampling... yaw: {yaw:+.1f}° ({len(samples)} samples)", end="\r")
108
+
109
+ print()
110
+ _last_frame_ts = frame_ts
111
+ if not samples:
112
+ return None
113
+ return float(np.median(samples))
114
+
115
+
116
+ def calibrate(cap, landmarker, lock, latest_landmarks, aero_monitors):
117
+ """Interactive calibration: look at each monitor, record yaw."""
118
+ print("\n === Calibration ===")
119
+ print(f" Found {len(aero_monitors)} monitors:\n")
120
+ for mid, name in aero_monitors:
121
+ print(f" [{mid}] {name}")
122
+
123
+ calibration = {}
124
+ print()
125
+
126
+ for mid, name in aero_monitors:
127
+ print(f" Look at \"{name}\" and press Enter...", end="", flush=True)
128
+ try:
129
+ input()
130
+ except (EOFError, KeyboardInterrupt):
131
+ sys.exit(0)
132
+
133
+ yaw = sample_yaw(cap, landmarker, lock, latest_landmarks)
134
+ if yaw is None:
135
+ print(f" [error] No face detected. Try again.")
136
+ # Retry once
137
+ print(f" Look at \"{name}\" and press Enter...", end="", flush=True)
138
+ try:
139
+ input()
140
+ except (EOFError, KeyboardInterrupt):
141
+ sys.exit(0)
142
+ yaw = sample_yaw(cap, landmarker, lock, latest_landmarks)
143
+ if yaw is None:
144
+ print(f" [error] Still no face detected. Skipping.")
145
+ continue
146
+
147
+ calibration[str(mid)] = yaw
148
+ print(f" {name}: {yaw:+.1f}°")
149
+
150
+ if len(calibration) < 2:
151
+ print("\n [error] Need at least 2 calibrated monitors.")
152
+ sys.exit(1)
153
+
154
+ print("\n Calibration complete:")
155
+ for mid_str, yaw in sorted(calibration.items(), key=lambda x: x[1]):
156
+ name = next((m[1] for m in aero_monitors if str(m[0]) == mid_str), "?")
157
+ print(f" {name} (id {mid_str}): {yaw:+.1f}°")
158
+
159
+ return calibration
160
+
161
+
162
+ def save_calibration(path, data):
163
+ """Save calibration data to JSON."""
164
+ os.makedirs(os.path.dirname(path), exist_ok=True)
165
+ with open(path, "w") as f:
166
+ json.dump(data, f, indent=2)
167
+ print(f" Saved calibration to {path}")
168
+
169
+
170
+ def load_calibration(path):
171
+ """Load calibration data from JSON. Returns None if not found."""
172
+ if not os.path.exists(path):
173
+ return None
174
+ try:
175
+ with open(path) as f:
176
+ return json.load(f)
177
+ except (json.JSONDecodeError, IOError):
178
+ return None
179
+
180
+
181
+ def get_target_monitor(yaw, calibration):
182
+ """Given current yaw and calibration data, return the target monitor ID.
183
+
184
+ Sorts monitors by calibrated yaw, computes midpoint boundaries,
185
+ and returns whichever zone the current yaw falls in.
186
+ """
187
+ # Sort by calibrated yaw value
188
+ sorted_monitors = sorted(calibration.items(), key=lambda x: x[1])
189
+
190
+ # If yaw is below the lowest calibrated value, pick the lowest monitor
191
+ if yaw <= sorted_monitors[0][1]:
192
+ return int(sorted_monitors[0][0])
193
+
194
+ # If yaw is above the highest calibrated value, pick the highest monitor
195
+ if yaw >= sorted_monitors[-1][1]:
196
+ return int(sorted_monitors[-1][0])
197
+
198
+ # Find which zone yaw falls in (midpoint boundaries)
199
+ for i in range(len(sorted_monitors) - 1):
200
+ mid_a = sorted_monitors[i]
201
+ mid_b = sorted_monitors[i + 1]
202
+ boundary = (mid_a[1] + mid_b[1]) / 2
203
+ if yaw < boundary:
204
+ return int(mid_a[0])
205
+
206
+ return int(sorted_monitors[-1][0])
207
+
208
+
209
+ def get_current_monitor():
210
+ """Get the currently focused aerospace monitor ID."""
211
+ try:
212
+ result = subprocess.run(
213
+ ["aerospace", "list-monitors", "--focused"],
214
+ capture_output=True, text=True, timeout=2,
215
+ )
216
+ line = result.stdout.strip()
217
+ if line:
218
+ return int(line.split("|")[0].strip())
219
+ except Exception:
220
+ pass
221
+ return None
222
+
223
+
224
+ def main():
225
+ parser = argparse.ArgumentParser(
226
+ description="Head tracking display focus switcher"
227
+ )
228
+ parser.add_argument(
229
+ "--calibrate", action="store_true",
230
+ help="Force recalibration (even if a calibration file exists)",
231
+ )
232
+ parser.add_argument(
233
+ "--calibration-file", type=str, default=DEFAULT_CALIBRATION_PATH,
234
+ help=f"Path to calibration file (default: {DEFAULT_CALIBRATION_PATH})",
235
+ )
236
+ parser.add_argument(
237
+ "--camera", type=int, default=0,
238
+ help="Camera index (default: 0)",
239
+ )
240
+ parser.add_argument(
241
+ "--preview", action="store_true",
242
+ help="Show camera preview window (calibration only — steals focus from aerospace)",
243
+ )
244
+ parser.add_argument(
245
+ "--verbose", action="store_true",
246
+ help="Print yaw angle continuously",
247
+ )
248
+ args = parser.parse_args()
249
+
250
+ # Fetch aerospace monitors
251
+ try:
252
+ result = subprocess.run(
253
+ ["aerospace", "list-monitors"],
254
+ capture_output=True, text=True, timeout=2,
255
+ )
256
+ aero_monitors = []
257
+ for line in result.stdout.strip().splitlines():
258
+ parts = line.split("|")
259
+ mid = int(parts[0].strip())
260
+ name = parts[1].strip() if len(parts) > 1 else ""
261
+ aero_monitors.append((mid, name))
262
+ except Exception as e:
263
+ print(f" [error] Failed to list monitors: {e}")
264
+ sys.exit(1)
265
+
266
+ if len(aero_monitors) < 2:
267
+ print(" [error] Need at least 2 monitors. Found:", len(aero_monitors))
268
+ sys.exit(1)
269
+
270
+ # Init MediaPipe Face Landmarker — check script dir and data dir
271
+ model_path = os.path.join(SCRIPT_DIR, "face_landmarker.task")
272
+ if not os.path.exists(model_path):
273
+ model_path = os.path.join(SWIVL_DATA, "face_landmarker.task")
274
+ if not os.path.exists(model_path):
275
+ print(f" [error] Model file not found")
276
+ print(" Run: gazectl (the bin wrapper downloads it automatically)")
277
+ sys.exit(1)
278
+
279
+ latest_landmarks = [None]
280
+ lock = threading.Lock()
281
+
282
+ def on_result(result, image, timestamp_ms):
283
+ with lock:
284
+ if result.face_landmarks:
285
+ latest_landmarks[0] = result.face_landmarks[0]
286
+ else:
287
+ latest_landmarks[0] = None
288
+
289
+ base_options = mp_python.BaseOptions(model_asset_path=model_path)
290
+ options = vision.FaceLandmarkerOptions(
291
+ base_options=base_options,
292
+ running_mode=vision.RunningMode.LIVE_STREAM,
293
+ num_faces=1,
294
+ min_face_detection_confidence=0.7,
295
+ min_face_presence_confidence=0.7,
296
+ min_tracking_confidence=0.7,
297
+ result_callback=on_result,
298
+ )
299
+ landmarker = vision.FaceLandmarker.create_from_options(options)
300
+
301
+ # Init camera
302
+ cap = cv2.VideoCapture(args.camera)
303
+ if not cap.isOpened():
304
+ print(" [error] Cannot open camera", args.camera)
305
+ sys.exit(1)
306
+
307
+ cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
308
+ cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
309
+ cap.set(cv2.CAP_PROP_FPS, 30)
310
+
311
+ # Load or run calibration
312
+ calibration = None
313
+ if not args.calibrate:
314
+ calibration = load_calibration(args.calibration_file)
315
+ if calibration:
316
+ print(f" Loaded calibration from {args.calibration_file}")
317
+
318
+ if calibration is None:
319
+ calibration = calibrate(cap, landmarker, lock, latest_landmarks, aero_monitors)
320
+ save_calibration(args.calibration_file, calibration)
321
+
322
+ # Print config
323
+ sorted_cal = sorted(calibration.items(), key=lambda x: x[1])
324
+ boundaries = []
325
+ for i in range(len(sorted_cal) - 1):
326
+ b = (sorted_cal[i][1] + sorted_cal[i + 1][1]) / 2
327
+ boundaries.append(b)
328
+
329
+ print(f"\n headtrack - Head Tracking Display Switcher")
330
+ print(f" ==========================================")
331
+ print(f" Monitors:")
332
+ for mid_str, yaw in sorted_cal:
333
+ name = next((m[1] for m in aero_monitors if str(m[0]) == mid_str), "?")
334
+ print(f" {name}: calibrated at {yaw:+.1f}°")
335
+ print(f" Boundaries: {', '.join(f'{b:+.1f}°' for b in boundaries)}")
336
+ print(f" Preview: {args.preview}")
337
+ print(f"\n Turn your head to switch display focus.")
338
+ print(f" Press Ctrl+C to quit.\n")
339
+
340
+ current_monitor = get_current_monitor()
341
+ frame_ts = _last_frame_ts
342
+
343
+ def cleanup(*_):
344
+ cap.release()
345
+ if args.preview:
346
+ cv2.destroyAllWindows()
347
+ landmarker.close()
348
+ print("\n Stopped.")
349
+ sys.exit(0)
350
+
351
+ signal.signal(signal.SIGINT, cleanup)
352
+ signal.signal(signal.SIGTERM, cleanup)
353
+
354
+ while True:
355
+ ret, frame = cap.read()
356
+ if not ret:
357
+ continue
358
+
359
+ frame_ts += 33
360
+ rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
361
+ mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
362
+
363
+ try:
364
+ landmarker.detect_async(mp_image, frame_ts)
365
+ except Exception:
366
+ pass
367
+
368
+ yaw = None
369
+ with lock:
370
+ landmarks = latest_landmarks[0]
371
+
372
+ if landmarks is not None:
373
+ h, w = frame.shape[:2]
374
+ yaw = get_head_yaw(landmarks, w, h)
375
+
376
+ if yaw is not None:
377
+ target = get_target_monitor(yaw, calibration)
378
+
379
+ if args.verbose:
380
+ target_name = next((m[1] for m in aero_monitors if m[0] == target), "?")
381
+ print(f" yaw: {yaw:+6.1f}° target={target_name}", end="\r")
382
+
383
+ if target != current_monitor:
384
+ name = next((m[1] for m in aero_monitors if m[0] == target), "?")
385
+ subprocess.Popen(
386
+ ["aerospace", "focus-monitor", str(target)],
387
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
388
+ )
389
+ current_monitor = target
390
+ if args.verbose:
391
+ print(f"\n >> Focused: {name}")
392
+
393
+ if args.preview:
394
+ color = (0, 255, 0)
395
+ cv2.putText(frame, f"Yaw: {yaw:+.1f}", (10, 30),
396
+ cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
397
+ bar_x = int(frame.shape[1] / 2 + yaw * 5)
398
+ cv2.line(frame, (frame.shape[1] // 2, 50), (bar_x, 50), color, 4)
399
+
400
+ if args.preview:
401
+ cv2.imshow("headtrack", frame)
402
+ if cv2.waitKey(1) & 0xFF == ord("q"):
403
+ break
404
+ else:
405
+ time.sleep(0.005)
406
+
407
+ cleanup()
408
+
409
+
410
+ if __name__ == "__main__":
411
+ main()
package/package.json ADDED
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "gazectl",
3
+ "version": "0.1.0",
4
+ "description": "Head tracking display focus switcher for macOS + Aerospace",
5
+ "bin": {
6
+ "gazectl": "bin/gazectl"
7
+ },
8
+ "files": [
9
+ "bin/gazectl",
10
+ "gazectl.py",
11
+ "requirements.txt"
12
+ ],
13
+ "keywords": [
14
+ "head-tracking",
15
+ "display",
16
+ "monitor",
17
+ "focus",
18
+ "aerospace",
19
+ "macos",
20
+ "mediapipe",
21
+ "webcam"
22
+ ],
23
+ "os": ["darwin"],
24
+ "author": "Sahaj Jain",
25
+ "license": "MIT",
26
+ "repository": {
27
+ "type": "git",
28
+ "url": "git+https://github.com/jnsahaj/gazectl.git"
29
+ },
30
+ "homepage": "https://github.com/jnsahaj/gazectl"
31
+ }
@@ -0,0 +1,3 @@
1
+ opencv-python>=4.8.0
2
+ mediapipe>=0.10.0
3
+ numpy>=1.24.0