gazectl 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Sahaj Jain
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Head tracking display focus switcher for macOS + [Aerospace](https://github.com/nikitabobko/AerospaceWM).
4
4
 
5
- Uses your webcam and MediaPipe to detect which way your head is turned, then switches Aerospace monitor focus automatically.
5
+ Uses your webcam and Apple's Vision framework to detect which way your head is turned, then switches Aerospace monitor focus automatically.
6
6
 
7
7
  ## Install
8
8
 
@@ -16,7 +16,7 @@ Or run directly:
16
16
  npx gazectl
17
17
  ```
18
18
 
19
- Requires Python 3.9+ and [Aerospace](https://github.com/nikitabobko/AerospaceWM). First run sets up a Python venv and downloads the MediaPipe model automatically.
19
+ Requires macOS 14+ and [Aerospace](https://github.com/nikitabobko/AerospaceWM).
20
20
 
21
21
  ## Usage
22
22
 
@@ -40,15 +40,21 @@ On first run, gazectl asks you to look at each monitor and press Enter. It sampl
40
40
  | `--calibrate` | off | Force recalibration |
41
41
  | `--calibration-file` | `~/.local/share/gazectl/calibration.json` | Custom calibration path |
42
42
  | `--camera` | 0 | Camera index |
43
- | `--preview` | off | Show camera preview (steals focus — calibration only) |
44
43
  | `--verbose` | off | Print yaw angle continuously |
45
44
 
46
45
  ## How it works
47
46
 
48
47
  1. **Calibrate** — look at each monitor, gazectl records the yaw angle
49
- 2. **Track** — MediaPipe Face Landmarker detects head yaw in real-time (~30fps)
48
+ 2. **Track** — Apple Vision detects head yaw in real-time (~30fps)
50
49
  3. **Switch** — when yaw crosses the midpoint between calibrated angles, fires `aerospace focus-monitor`
51
50
 
51
+ ## Build from source
52
+
53
+ ```bash
54
+ swift build -c release
55
+ cp .build/release/gazectl /usr/local/bin/gazectl
56
+ ```
57
+
52
58
  ## License
53
59
 
54
60
  MIT
package/bin/gazectl CHANGED
@@ -1,11 +1,6 @@
1
1
  #!/usr/bin/env bash
2
2
  set -e
3
3
 
4
- GAZECTL_DIR="${GAZECTL_HOME:-$HOME/.local/share/gazectl}"
5
- VENV_DIR="$GAZECTL_DIR/venv"
6
- MODEL_FILE="$GAZECTL_DIR/face_landmarker.task"
7
- MODEL_URL="https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/latest/face_landmarker.task"
8
-
9
4
  # Resolve symlinks to find actual package directory
10
5
  # (npm symlinks bin scripts from node_modules/.bin/)
11
6
  SOURCE="${BASH_SOURCE[0]}"
@@ -14,47 +9,6 @@ while [ -L "$SOURCE" ]; do
14
9
  SOURCE="$(readlink "$SOURCE")"
15
10
  [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE"
16
11
  done
17
- SCRIPT_DIR="$(cd "$(dirname "$SOURCE")/.." && pwd)"
18
- GAZECTL_PY="$SCRIPT_DIR/gazectl.py"
19
- REQUIREMENTS="$SCRIPT_DIR/requirements.txt"
20
-
21
- if [ ! -f "$GAZECTL_PY" ]; then
22
- echo "error: gazectl.py not found at $GAZECTL_PY" >&2
23
- exit 1
24
- fi
25
-
26
- # Check for python3
27
- if ! command -v python3 &>/dev/null; then
28
- echo "error: python3 is required but not found" >&2
29
- echo "install: brew install python3" >&2
30
- exit 1
31
- fi
32
-
33
- # Setup on first run
34
- if [ ! -d "$VENV_DIR" ]; then
35
- echo "gazectl: first-run setup..."
36
- mkdir -p "$GAZECTL_DIR"
37
-
38
- echo " creating python venv..."
39
- python3 -m venv "$VENV_DIR"
40
-
41
- echo " installing dependencies..."
42
- "$VENV_DIR/bin/pip" install -q -r "$REQUIREMENTS"
43
-
44
- echo " done."
45
- fi
46
-
47
- # Download model if missing
48
- if [ ! -f "$MODEL_FILE" ]; then
49
- echo " downloading face landmarker model..."
50
- curl -sL -o "$MODEL_FILE" "$MODEL_URL"
51
- echo " done."
52
- fi
53
-
54
- # Symlink model into script dir if needed (gazectl.py looks for it there)
55
- SCRIPT_MODEL="$SCRIPT_DIR/face_landmarker.task"
56
- if [ ! -f "$SCRIPT_MODEL" ] && [ ! -L "$SCRIPT_MODEL" ]; then
57
- ln -s "$MODEL_FILE" "$SCRIPT_MODEL" 2>/dev/null || true
58
- fi
12
+ BIN_DIR="$(cd "$(dirname "$SOURCE")" && pwd)"
59
13
 
60
- exec "$VENV_DIR/bin/python" "$GAZECTL_PY" "$@"
14
+ exec "$BIN_DIR/gazectl-bin" "$@"
package/package.json CHANGED
@@ -1,15 +1,17 @@
1
1
  {
2
2
  "name": "gazectl",
3
- "version": "0.1.1",
3
+ "version": "0.2.0",
4
4
  "description": "Head tracking display focus switcher for macOS + Aerospace",
5
5
  "bin": {
6
6
  "gazectl": "bin/gazectl"
7
7
  },
8
8
  "files": [
9
9
  "bin/gazectl",
10
- "gazectl.py",
11
- "requirements.txt"
10
+ "bin/gazectl-bin"
12
11
  ],
12
+ "scripts": {
13
+ "build": "swift build -c release"
14
+ },
13
15
  "keywords": [
14
16
  "head-tracking",
15
17
  "display",
@@ -17,8 +19,9 @@
17
19
  "focus",
18
20
  "aerospace",
19
21
  "macos",
20
- "mediapipe",
21
- "webcam"
22
+ "webcam",
23
+ "swift",
24
+ "vision"
22
25
  ],
23
26
  "os": ["darwin"],
24
27
  "author": "Sahaj Jain",
package/gazectl.py DELETED
@@ -1,411 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- gazectl - Head tracking display focus switcher for macOS + Aerospace.
4
-
5
- Uses your webcam + MediaPipe Face Landmarker to detect head yaw direction,
6
- then calls `aerospace focus-monitor` to switch display focus.
7
-
8
- Calibration-based: on first run, you look at each monitor so the program
9
- learns which yaw angle corresponds to which display.
10
- """
11
-
12
- import json
13
- import os
14
- import subprocess
15
- import time
16
- import argparse
17
- import signal
18
- import sys
19
- import threading
20
-
21
- import cv2
22
- import mediapipe as mp
23
- from mediapipe.tasks import python as mp_python
24
- from mediapipe.tasks.python import vision
25
- import numpy as np
26
-
27
-
28
- # 3D model points for head pose estimation (generic face model)
29
- MODEL_POINTS = np.array([
30
- (0.0, 0.0, 0.0), # Nose tip
31
- (0.0, -330.0, -65.0), # Chin
32
- (-225.0, 170.0, -135.0), # Left eye left corner
33
- (225.0, 170.0, -135.0), # Right eye right corner
34
- (-150.0, -150.0, -125.0),# Left mouth corner
35
- (150.0, -150.0, -125.0), # Right mouth corner
36
- ], dtype=np.float64)
37
-
38
- # MediaPipe Face Landmarker landmark indices for the 6 points above
39
- LANDMARK_IDS = [1, 152, 263, 33, 287, 57]
40
-
41
- SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
42
- SWIVL_DATA = os.environ.get("SWIVL_HOME", os.path.expanduser("~/.local/share/gazectl"))
43
- DEFAULT_CALIBRATION_PATH = os.path.join(SWIVL_DATA, "calibration.json")
44
-
45
-
46
- def get_head_yaw(face_landmarks, frame_w, frame_h):
47
- """Compute head yaw angle (left/right rotation) from face landmarks."""
48
- image_points = np.array([
49
- (face_landmarks[i].x * frame_w, face_landmarks[i].y * frame_h)
50
- for i in LANDMARK_IDS
51
- ], dtype=np.float64)
52
-
53
- focal_length = frame_w
54
- center = (frame_w / 2, frame_h / 2)
55
- camera_matrix = np.array([
56
- [focal_length, 0, center[0]],
57
- [0, focal_length, center[1]],
58
- [0, 0, 1],
59
- ], dtype=np.float64)
60
- dist_coeffs = np.zeros((4, 1))
61
-
62
- success, rotation_vec, _ = cv2.solvePnP(
63
- MODEL_POINTS, image_points, camera_matrix, dist_coeffs,
64
- flags=cv2.SOLVEPNP_ITERATIVE,
65
- )
66
- if not success:
67
- return None
68
-
69
- rotation_mat, _ = cv2.Rodrigues(rotation_vec)
70
- angles, _, _, _, _, _ = cv2.RQDecomp3x3(rotation_mat)
71
- return -angles[1]
72
-
73
-
74
- _last_frame_ts = 0
75
-
76
- def sample_yaw(cap, landmarker, lock, latest_landmarks, duration=2.0):
77
- """Sample yaw values for `duration` seconds, return median."""
78
- global _last_frame_ts
79
- samples = []
80
- start = time.monotonic()
81
- frame_ts = max(_last_frame_ts, int(time.monotonic() * 1000))
82
-
83
- while time.monotonic() - start < duration:
84
- ret, frame = cap.read()
85
- if not ret:
86
- continue
87
-
88
- frame_ts += 33
89
- rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
90
- mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
91
-
92
- try:
93
- landmarker.detect_async(mp_image, frame_ts)
94
- except Exception:
95
- pass
96
-
97
- time.sleep(0.03)
98
-
99
- with lock:
100
- landmarks = latest_landmarks[0]
101
-
102
- if landmarks is not None:
103
- h, w = frame.shape[:2]
104
- yaw = get_head_yaw(landmarks, w, h)
105
- if yaw is not None:
106
- samples.append(yaw)
107
- print(f" sampling... yaw: {yaw:+.1f}° ({len(samples)} samples)", end="\r")
108
-
109
- print()
110
- _last_frame_ts = frame_ts
111
- if not samples:
112
- return None
113
- return float(np.median(samples))
114
-
115
-
116
- def calibrate(cap, landmarker, lock, latest_landmarks, aero_monitors):
117
- """Interactive calibration: look at each monitor, record yaw."""
118
- print("\n === Calibration ===")
119
- print(f" Found {len(aero_monitors)} monitors:\n")
120
- for mid, name in aero_monitors:
121
- print(f" [{mid}] {name}")
122
-
123
- calibration = {}
124
- print()
125
-
126
- for mid, name in aero_monitors:
127
- print(f" Look at \"{name}\" and press Enter...", end="", flush=True)
128
- try:
129
- input()
130
- except (EOFError, KeyboardInterrupt):
131
- sys.exit(0)
132
-
133
- yaw = sample_yaw(cap, landmarker, lock, latest_landmarks)
134
- if yaw is None:
135
- print(f" [error] No face detected. Try again.")
136
- # Retry once
137
- print(f" Look at \"{name}\" and press Enter...", end="", flush=True)
138
- try:
139
- input()
140
- except (EOFError, KeyboardInterrupt):
141
- sys.exit(0)
142
- yaw = sample_yaw(cap, landmarker, lock, latest_landmarks)
143
- if yaw is None:
144
- print(f" [error] Still no face detected. Skipping.")
145
- continue
146
-
147
- calibration[str(mid)] = yaw
148
- print(f" {name}: {yaw:+.1f}°")
149
-
150
- if len(calibration) < 2:
151
- print("\n [error] Need at least 2 calibrated monitors.")
152
- sys.exit(1)
153
-
154
- print("\n Calibration complete:")
155
- for mid_str, yaw in sorted(calibration.items(), key=lambda x: x[1]):
156
- name = next((m[1] for m in aero_monitors if str(m[0]) == mid_str), "?")
157
- print(f" {name} (id {mid_str}): {yaw:+.1f}°")
158
-
159
- return calibration
160
-
161
-
162
- def save_calibration(path, data):
163
- """Save calibration data to JSON."""
164
- os.makedirs(os.path.dirname(path), exist_ok=True)
165
- with open(path, "w") as f:
166
- json.dump(data, f, indent=2)
167
- print(f" Saved calibration to {path}")
168
-
169
-
170
- def load_calibration(path):
171
- """Load calibration data from JSON. Returns None if not found."""
172
- if not os.path.exists(path):
173
- return None
174
- try:
175
- with open(path) as f:
176
- return json.load(f)
177
- except (json.JSONDecodeError, IOError):
178
- return None
179
-
180
-
181
- def get_target_monitor(yaw, calibration):
182
- """Given current yaw and calibration data, return the target monitor ID.
183
-
184
- Sorts monitors by calibrated yaw, computes midpoint boundaries,
185
- and returns whichever zone the current yaw falls in.
186
- """
187
- # Sort by calibrated yaw value
188
- sorted_monitors = sorted(calibration.items(), key=lambda x: x[1])
189
-
190
- # If yaw is below the lowest calibrated value, pick the lowest monitor
191
- if yaw <= sorted_monitors[0][1]:
192
- return int(sorted_monitors[0][0])
193
-
194
- # If yaw is above the highest calibrated value, pick the highest monitor
195
- if yaw >= sorted_monitors[-1][1]:
196
- return int(sorted_monitors[-1][0])
197
-
198
- # Find which zone yaw falls in (midpoint boundaries)
199
- for i in range(len(sorted_monitors) - 1):
200
- mid_a = sorted_monitors[i]
201
- mid_b = sorted_monitors[i + 1]
202
- boundary = (mid_a[1] + mid_b[1]) / 2
203
- if yaw < boundary:
204
- return int(mid_a[0])
205
-
206
- return int(sorted_monitors[-1][0])
207
-
208
-
209
- def get_current_monitor():
210
- """Get the currently focused aerospace monitor ID."""
211
- try:
212
- result = subprocess.run(
213
- ["aerospace", "list-monitors", "--focused"],
214
- capture_output=True, text=True, timeout=2,
215
- )
216
- line = result.stdout.strip()
217
- if line:
218
- return int(line.split("|")[0].strip())
219
- except Exception:
220
- pass
221
- return None
222
-
223
-
224
- def main():
225
- parser = argparse.ArgumentParser(
226
- description="Head tracking display focus switcher"
227
- )
228
- parser.add_argument(
229
- "--calibrate", action="store_true",
230
- help="Force recalibration (even if a calibration file exists)",
231
- )
232
- parser.add_argument(
233
- "--calibration-file", type=str, default=DEFAULT_CALIBRATION_PATH,
234
- help=f"Path to calibration file (default: {DEFAULT_CALIBRATION_PATH})",
235
- )
236
- parser.add_argument(
237
- "--camera", type=int, default=0,
238
- help="Camera index (default: 0)",
239
- )
240
- parser.add_argument(
241
- "--preview", action="store_true",
242
- help="Show camera preview window (calibration only — steals focus from aerospace)",
243
- )
244
- parser.add_argument(
245
- "--verbose", action="store_true",
246
- help="Print yaw angle continuously",
247
- )
248
- args = parser.parse_args()
249
-
250
- # Fetch aerospace monitors
251
- try:
252
- result = subprocess.run(
253
- ["aerospace", "list-monitors"],
254
- capture_output=True, text=True, timeout=2,
255
- )
256
- aero_monitors = []
257
- for line in result.stdout.strip().splitlines():
258
- parts = line.split("|")
259
- mid = int(parts[0].strip())
260
- name = parts[1].strip() if len(parts) > 1 else ""
261
- aero_monitors.append((mid, name))
262
- except Exception as e:
263
- print(f" [error] Failed to list monitors: {e}")
264
- sys.exit(1)
265
-
266
- if len(aero_monitors) < 2:
267
- print(" [error] Need at least 2 monitors. Found:", len(aero_monitors))
268
- sys.exit(1)
269
-
270
- # Init MediaPipe Face Landmarker — check script dir and data dir
271
- model_path = os.path.join(SCRIPT_DIR, "face_landmarker.task")
272
- if not os.path.exists(model_path):
273
- model_path = os.path.join(SWIVL_DATA, "face_landmarker.task")
274
- if not os.path.exists(model_path):
275
- print(f" [error] Model file not found")
276
- print(" Run: gazectl (the bin wrapper downloads it automatically)")
277
- sys.exit(1)
278
-
279
- latest_landmarks = [None]
280
- lock = threading.Lock()
281
-
282
- def on_result(result, image, timestamp_ms):
283
- with lock:
284
- if result.face_landmarks:
285
- latest_landmarks[0] = result.face_landmarks[0]
286
- else:
287
- latest_landmarks[0] = None
288
-
289
- base_options = mp_python.BaseOptions(model_asset_path=model_path)
290
- options = vision.FaceLandmarkerOptions(
291
- base_options=base_options,
292
- running_mode=vision.RunningMode.LIVE_STREAM,
293
- num_faces=1,
294
- min_face_detection_confidence=0.7,
295
- min_face_presence_confidence=0.7,
296
- min_tracking_confidence=0.7,
297
- result_callback=on_result,
298
- )
299
- landmarker = vision.FaceLandmarker.create_from_options(options)
300
-
301
- # Init camera
302
- cap = cv2.VideoCapture(args.camera)
303
- if not cap.isOpened():
304
- print(" [error] Cannot open camera", args.camera)
305
- sys.exit(1)
306
-
307
- cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
308
- cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
309
- cap.set(cv2.CAP_PROP_FPS, 30)
310
-
311
- # Load or run calibration
312
- calibration = None
313
- if not args.calibrate:
314
- calibration = load_calibration(args.calibration_file)
315
- if calibration:
316
- print(f" Loaded calibration from {args.calibration_file}")
317
-
318
- if calibration is None:
319
- calibration = calibrate(cap, landmarker, lock, latest_landmarks, aero_monitors)
320
- save_calibration(args.calibration_file, calibration)
321
-
322
- # Print config
323
- sorted_cal = sorted(calibration.items(), key=lambda x: x[1])
324
- boundaries = []
325
- for i in range(len(sorted_cal) - 1):
326
- b = (sorted_cal[i][1] + sorted_cal[i + 1][1]) / 2
327
- boundaries.append(b)
328
-
329
- print(f"\n headtrack - Head Tracking Display Switcher")
330
- print(f" ==========================================")
331
- print(f" Monitors:")
332
- for mid_str, yaw in sorted_cal:
333
- name = next((m[1] for m in aero_monitors if str(m[0]) == mid_str), "?")
334
- print(f" {name}: calibrated at {yaw:+.1f}°")
335
- print(f" Boundaries: {', '.join(f'{b:+.1f}°' for b in boundaries)}")
336
- print(f" Preview: {args.preview}")
337
- print(f"\n Turn your head to switch display focus.")
338
- print(f" Press Ctrl+C to quit.\n")
339
-
340
- current_monitor = get_current_monitor()
341
- frame_ts = _last_frame_ts
342
-
343
- def cleanup(*_):
344
- cap.release()
345
- if args.preview:
346
- cv2.destroyAllWindows()
347
- landmarker.close()
348
- print("\n Stopped.")
349
- sys.exit(0)
350
-
351
- signal.signal(signal.SIGINT, cleanup)
352
- signal.signal(signal.SIGTERM, cleanup)
353
-
354
- while True:
355
- ret, frame = cap.read()
356
- if not ret:
357
- continue
358
-
359
- frame_ts += 33
360
- rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
361
- mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
362
-
363
- try:
364
- landmarker.detect_async(mp_image, frame_ts)
365
- except Exception:
366
- pass
367
-
368
- yaw = None
369
- with lock:
370
- landmarks = latest_landmarks[0]
371
-
372
- if landmarks is not None:
373
- h, w = frame.shape[:2]
374
- yaw = get_head_yaw(landmarks, w, h)
375
-
376
- if yaw is not None:
377
- target = get_target_monitor(yaw, calibration)
378
-
379
- if args.verbose:
380
- target_name = next((m[1] for m in aero_monitors if m[0] == target), "?")
381
- print(f" yaw: {yaw:+6.1f}° target={target_name}", end="\r")
382
-
383
- if target != current_monitor:
384
- name = next((m[1] for m in aero_monitors if m[0] == target), "?")
385
- subprocess.Popen(
386
- ["aerospace", "focus-monitor", str(target)],
387
- stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
388
- )
389
- current_monitor = target
390
- if args.verbose:
391
- print(f"\n >> Focused: {name}")
392
-
393
- if args.preview:
394
- color = (0, 255, 0)
395
- cv2.putText(frame, f"Yaw: {yaw:+.1f}", (10, 30),
396
- cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
397
- bar_x = int(frame.shape[1] / 2 + yaw * 5)
398
- cv2.line(frame, (frame.shape[1] // 2, 50), (bar_x, 50), color, 4)
399
-
400
- if args.preview:
401
- cv2.imshow("headtrack", frame)
402
- if cv2.waitKey(1) & 0xFF == ord("q"):
403
- break
404
- else:
405
- time.sleep(0.005)
406
-
407
- cleanup()
408
-
409
-
410
- if __name__ == "__main__":
411
- main()
package/requirements.txt DELETED
@@ -1,3 +0,0 @@
1
- opencv-python>=4.8.0
2
- mediapipe>=0.10.0
3
- numpy>=1.24.0