openvisionkit 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,913 @@
1
+ import math
2
+ from pathlib import Path
3
+
4
+ import cv2
5
+ import mediapipe as mp
6
+ import numpy as np
7
+
8
+ # Correct imports for Tasks API
9
+ from mediapipe.tasks.python import vision
10
+ from mediapipe.tasks.python.core.base_options import BaseOptions
11
+ from mediapipe.tasks.python.vision.core.vision_task_running_mode import (
12
+ VisionTaskRunningMode,
13
+ )
14
+ from mediapipe.tasks.python.vision.face_landmarker import (
15
+ FaceLandmarker,
16
+ FaceLandmarkerOptions,
17
+ )
18
+
19
+ _MODEL_DIR = Path(__file__).parent / "models"
20
+ _DEFAULT_MODEL = str(_MODEL_DIR / "face_landmarker_v2_with_blendshapes.task")
21
+
22
+
23
+ class FaceMeshDetector:
24
+ # ====================== LANDMARK INDICES (MediaPipe Face Landmarker v2 - 478 points) ======================
25
+ # Iris centers (added by the iris model)
26
+ LEFT_IRIS_CENTER = 468
27
+ RIGHT_IRIS_CENTER = 473
28
+ LEFT_IRIS = [474, 475, 476, 477]
29
+ RIGHT_IRIS = [469, 470, 471, 472]
30
+
31
+ FOREHEAD_CENTER = 10
32
+
33
+ # Eye corners
34
+ LEFT_EYE_OUTER = 33
35
+ LEFT_EYE_INNER = 133
36
+ RIGHT_EYE_OUTER = 362
37
+ RIGHT_EYE_INNER = 263
38
+
39
+ # Mouth landmarks for openness ratio
40
+ UPPER_LIP_CENTER = 13
41
+ LOWER_LIP_CENTER = 14 # Common pair used in many projects
42
+ MOUTH_LEFT = 61
43
+ MOUTH_RIGHT = 291
44
+ LIP_LEFT_CORNERS = 61
45
+ LIP_RIGHT_CORNERS = 291
46
+ LIP_CORNERS = [LIP_LEFT_CORNERS, LIP_RIGHT_CORNERS]
47
+ LIP_CENTER_TOP = 13
48
+ LIP_CENTER_BOTTOM = 14
49
+ LIP_CENTER = [LIP_CENTER_TOP, LIP_CENTER_BOTTOM]
50
+
51
+ # Face width landmarks (for normalization)
52
+ LEFT_CHEEK = 234
53
+ RIGHT_CHEEK = 454
54
+ # Nose tip (for head pose estimation)
55
+ NOSE_TIP = 1
56
+ # ====================== END OF LANDMARK INDICES ======================
57
+
58
+ LEFT_EYE = [
59
+ 33,
60
+ 7,
61
+ 163,
62
+ 144,
63
+ 145,
64
+ 153,
65
+ 154,
66
+ 155,
67
+ 133,
68
+ 173,
69
+ 157,
70
+ 158,
71
+ 159,
72
+ 160,
73
+ 161,
74
+ 246,
75
+ ]
76
+
77
+ RIGHT_EYE = [
78
+ 362,
79
+ 382,
80
+ 381,
81
+ 380,
82
+ 374,
83
+ 373,
84
+ 390,
85
+ 249,
86
+ 263,
87
+ 466,
88
+ 388,
89
+ 387,
90
+ 386,
91
+ 385,
92
+ 384,
93
+ 398,
94
+ ]
95
+
96
+ LEFT_EYE_BLINK = [33, 160, 158, 133, 153, 144]
97
+
98
+ RIGHT_EYE_BLINK = [362, 385, 387, 263, 373, 380]
99
+
100
+ _SYMMETRY_PAIRS = [
101
+ (33, 263),
102
+ (160, 387),
103
+ (158, 385),
104
+ (133, 362),
105
+ (144, 374),
106
+ (145, 375),
107
+ (153, 380),
108
+ (154, 381),
109
+ (61, 291),
110
+ (185, 409),
111
+ (40, 270),
112
+ (37, 267),
113
+ ]
114
+
115
+ """FaceMeshDetector class uses MediaPipe's FaceLandmarker to detect facial landmarks and draw them on the input image.
116
+
117
+ 468 facial landmarks are detected per face, and the class also extracts blendshape coefficients for facial expressions, head pose transformation matrices, and bounding boxes for each detected face. The class is designed
118
+ **Extended features (beyond original landmarks drawing):**
119
+ - Configurable options in constructor (num_faces, confidence thresholds, running mode, blendshapes, matrices).
120
+ - Returns face blendshapes (52 facial expression coefficients per detected face).
121
+ - Returns facial transformation matrices (4x4 head-pose matrices per detected face).
122
+ - Computes and returns axis-aligned bounding boxes for each detected face (true "face detection" feature).
123
+ - Supports IMAGE mode by default (VIDEO / LIVE_STREAM can be enabled via constructor; detect_for_video would require additional timestamp handling).
124
+
125
+ The class can be used for face tracking, expression recognition, AR effects, head-pose estimation, etc.
126
+
127
+ Args:
128
+ model_path (str): The path to the face landmarker model file.
129
+ Default is './models/face_landmarker_v2_with_blendshapes.task'.
130
+ """
131
+
132
+ def __init__(
133
+ self,
134
+ model_path=_DEFAULT_MODEL,
135
+ num_faces: int = 2,
136
+ min_face_detection_confidence: float = 0.5,
137
+ min_face_presence_confidence: float = 0.5,
138
+ min_tracking_confidence: float = 0.5,
139
+ output_face_blendshapes: bool = True,
140
+ output_facial_transformation_matrixes: bool = True,
141
+ running_mode: VisionTaskRunningMode = vision.RunningMode.IMAGE,
142
+ ):
143
+ """Initializes the FaceMeshDetector with flexible options.
144
+
145
+ Args:
146
+ model_path (str): Path to the .task model file.
147
+ num_faces (int): Maximum number of faces to detect.
148
+ min_face_detection_confidence (float): Minimum confidence for face detection.
149
+ min_face_presence_confidence (float): Minimum confidence that a face is present.
150
+ min_tracking_confidence (float): Minimum confidence for tracking (used in VIDEO/LIVE_STREAM).
151
+ output_face_blendshapes (bool): Whether to output 52 blendshape scores for expressions.
152
+ output_facial_transformation_matrixes (bool): Whether to output 4x4 head-pose matrices.
153
+ running_mode (RunningMode): IMAGE (default), VIDEO, or LIVE_STREAM.
154
+ """
155
+ options = FaceLandmarkerOptions(
156
+ base_options=BaseOptions(model_asset_path=model_path),
157
+ num_faces=num_faces,
158
+ min_face_detection_confidence=min_face_detection_confidence,
159
+ min_face_presence_confidence=min_face_presence_confidence,
160
+ min_tracking_confidence=min_tracking_confidence,
161
+ output_face_blendshapes=output_face_blendshapes,
162
+ output_facial_transformation_matrixes=output_facial_transformation_matrixes,
163
+ running_mode=running_mode,
164
+ )
165
+ self.face_detector = FaceLandmarker.create_from_options(options)
166
+ self.drawing_utils = mp.tasks.vision.drawing_utils
167
+ self.drawing_styles = mp.tasks.vision.drawing_styles
168
+
169
+ def euclidean_distance(self, p1, p2):
170
+ """
171
+ 2D pixel distance between two landmarks [x, y].
172
+
173
+ Args:
174
+ p1 (list[int]): [x, y] coordinates of the first point.
175
+ p2 (list[int]): [x, y] coordinates of the second point.
176
+
177
+ Returns:
178
+ float: The Euclidean distance between the two points.
179
+ """
180
+ return np.hypot(p1[0] - p2[0], p1[1] - p2[1])
181
+
182
+ def get_landmark_point(self, landmark, width, height):
183
+ return int(landmark.x * width), int(landmark.y * height)
184
+
185
+ def iris_center(self, face_landmarks, iris_indices, width, height):
186
+ points = []
187
+
188
+ for idx in iris_indices:
189
+ lm = face_landmarks.landmark[idx]
190
+ x, y = self.get_landmark_point(lm, width, height)
191
+ points.append((x, y))
192
+
193
+ points = np.array(points)
194
+ cx, cy = points.mean(axis=0).astype(int)
195
+
196
+ return int(cx), int(cy)
197
+
198
+ def distance_between_points(self, p1, p2):
199
+ return math.dist(p1, p2)
200
+
201
+ def get_mouth_openness_ratio(self, face):
202
+ """Mouth openness ratio (height / width). 0 = closed, ~0.5+ = wide open."""
203
+ if len(face) < 478:
204
+ return 0.0
205
+ upper = face[self.UPPER_LIP_CENTER]
206
+ lower = face[self.LOWER_LIP_CENTER]
207
+ left = face[self.MOUTH_LEFT]
208
+ right = face[self.MOUTH_RIGHT]
209
+
210
+ mouth_height = self.euclidean_distance(upper, lower)
211
+ mouth_width = self.euclidean_distance(left, right)
212
+ return mouth_height / mouth_width if mouth_width > 0 else 0.0
213
+
214
+ def get_eye_gaze_direction(self, face, is_left_eye=True):
215
+ """
216
+ Simple gaze direction (Left / Center / Right) using iris vs eye corners.
217
+ Returns: 'Left', 'Center', or 'Right' (horizontal gaze only).
218
+
219
+ Args:
220
+ - face: List of 478 [x, y] landmarks for a detected face.
221
+ - is_left_eye: Whether to analyze the left eye (True) or right eye (False).
222
+
223
+ Returns:
224
+ - str: 'Left', 'Center', or 'Right' indicating gaze direction.
225
+
226
+ Note: This is a very basic heuristic and may not be highly accurate. For more robust gaze estimation, consider using a dedicated gaze tracking model.
227
+ """
228
+ if len(face) < 478:
229
+ return "Unknown"
230
+
231
+ if is_left_eye:
232
+ iris_center = face[self.LEFT_IRIS_CENTER]
233
+ eye_outer = face[self.LEFT_EYE_OUTER]
234
+ eye_inner = face[self.LEFT_EYE_INNER]
235
+ else:
236
+ iris_center = face[self.RIGHT_IRIS_CENTER]
237
+ eye_outer = face[self.RIGHT_EYE_OUTER]
238
+ eye_inner = face[self.RIGHT_EYE_INNER]
239
+
240
+ eye_center_x = (eye_outer[0] + eye_inner[0]) / 2
241
+ eye_width = abs(eye_outer[0] - eye_inner[0])
242
+ if eye_width == 0:
243
+ return "Center"
244
+
245
+ delta_x = iris_center[0] - eye_center_x
246
+ ratio = delta_x / eye_width
247
+
248
+ if ratio < -0.18:
249
+ return "Left"
250
+ elif ratio > 0.18:
251
+ return "Right"
252
+ else:
253
+ return "Center"
254
+
255
+ def get_inter_pupillary_distance(self, face, normalized=False):
256
+ """
257
+ Approximate eye-to-eye (pupil) distance in pixels.
258
+ If normalized=True → divided by face width (useful for real-world scaling).
259
+
260
+ Args:
261
+ - face: List of 478 [x, y] landmarks for a detected face.
262
+ - normalized: Whether to return distance normalized by face width.
263
+
264
+ Returns:
265
+ - float: Inter-pupillary distance in pixels (or normalized ratio if specified).
266
+ """
267
+ if len(face) < 478:
268
+ return 0.0
269
+
270
+ left_iris = face[self.LEFT_IRIS_CENTER]
271
+ right_iris = face[self.RIGHT_IRIS_CENTER]
272
+ ipd = self.euclidean_distance(left_iris, right_iris)
273
+
274
+ if normalized:
275
+ face_width = self.euclidean_distance(
276
+ face[self.LEFT_CHEEK], face[self.RIGHT_CHEEK]
277
+ )
278
+ return ipd / face_width if face_width > 0 else 0.0
279
+ return ipd
280
+
281
+ def overlay_ar_filter(self, frame, face, filter_img, filter_type="glasses"):
282
+ """Basic AR overlay (sunglasses example).
283
+ Returns the frame with filter drawn on top.
284
+ filter_img must be a PNG with alpha channel (RGBA).
285
+
286
+ Args:
287
+ frame (numpy.ndarray): The input image/frame in BGR format.
288
+ face (list): List of 478 [x, y] landmarks for a detected face.
289
+ filter_img (numpy.ndarray): The AR filter image with alpha channel (RGBA).
290
+ filter_type (str): Type of filter to apply (e.g. "glasses"). Currently only "glasses" is implemented.
291
+
292
+ Returns:
293
+ numpy.ndarray: The output image/frame with the AR filter overlaid.
294
+ """
295
+ if len(face) < 478 or filter_img is None:
296
+ return frame
297
+
298
+ if filter_type == "glasses":
299
+ # Use eye corners to position and scale glasses
300
+ left_outer = face[self.LEFT_EYE_OUTER]
301
+ right_outer = face[self.RIGHT_EYE_OUTER]
302
+
303
+ # Center between eyes
304
+ center_x = int((left_outer[0] + right_outer[0]) / 2)
305
+ center_y = int((left_outer[1] + right_outer[1]) / 2)
306
+
307
+ # Scale based on eye-to-eye distance
308
+ eye_dist = self.euclidean_distance(left_outer, right_outer)
309
+ scale_factor = int(
310
+ eye_dist * 1.8
311
+ ) # adjust multiplier for your filter image
312
+
313
+ # Resize filter
314
+ filter_resized = cv2.resize(filter_img, (scale_factor, scale_factor))
315
+
316
+ # Position (top-left of filter)
317
+ x = int(center_x - scale_factor / 2)
318
+ y = int(center_y - scale_factor * 0.45) # slightly above eyes
319
+
320
+ # Alpha blending (assuming filter_img has alpha channel)
321
+ h, w = filter_resized.shape[:2]
322
+ if y < 0 or x < 0 or y + h > frame.shape[0] or x + w > frame.shape[1]:
323
+ return frame # out of bounds
324
+
325
+ overlay = filter_resized[:, :, :3]
326
+ mask = filter_resized[:, :, 3:] / 255.0
327
+
328
+ roi = frame[y : y + h, x : x + w]
329
+ blended = (1.0 - mask) * roi + mask * overlay
330
+ frame[y : y + h, x : x + w] = blended.astype(np.uint8)
331
+
332
+ return frame
333
+
334
+ def face_mesh_detection(self, img, drawLandMarks=True):
335
+ """
336
+ Detects facial landmarks in the input image and draws them if specified.
337
+ Now also extracts blendshapes, transformation matrices, and bounding boxes.
338
+
339
+ Args:
340
+ img (numpy.ndarray): The input image in BGR format.
341
+ drawLandMarks (bool): Whether to draw the detected landmarks on the image. Default is True.
342
+
343
+ Returns:
344
+ tuple: (annotated_image, faces, blendshapes, transformation_matrices, bboxes)
345
+ - annotated_image (numpy.ndarray): Image with landmarks drawn (if requested).
346
+ - faces (list): List of detected faces; each face is a list of [x, y] pixel coordinates (478 landmarks).
347
+ - blendshapes (list): List of dicts (one per face) with {blendshape_name: score} for facial expressions.
348
+ - transformation_matrices (list): List of 4x4 transformation matrices (as numpy arrays) for head pose.
349
+ - bboxes (list): List of [min_x, min_y, max_x, max_y] bounding boxes (one per face).
350
+ """
351
+ img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
352
+ mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_rgb)
353
+ result = self.face_detector.detect(mp_image)
354
+ return self.landmarks_on_image(img, result, drawLandMarks)
355
+
356
+ def landmarks_on_image(self, image, detection_result, drawLandMarks=True):
357
+ """
358
+ Draws facial landmarks on the input image and extracts extended detection features.
359
+
360
+ Args:
361
+ image (numpy.ndarray): The input image on which to draw the landmarks.
362
+ detection_result: The FaceLandmarkerResult from detection.
363
+ drawLandMarks (bool): Whether to draw the detected landmarks on the image. Default is True.
364
+
365
+ Returns:
366
+ tuple: (annotated_image, faces, blendshapes, transformation_matrices, bboxes)
367
+ - annotated_image (numpy.ndarray): Annotated image.
368
+ - faces (list[list[list[int]]]): Landmark pixel coordinates per face.
369
+ - blendshapes (list[dict]): Blendshape scores per face (e.g. {'eyeBlinkLeft': 0.92, ...}).
370
+ - transformation_matrices (list[np.ndarray]): 4x4 head-pose matrices per face.
371
+ - bboxes (list[list[int]]): Bounding boxes [min_x, min_y, max_x, max_y] per face.
372
+ """
373
+ annotated_image = image.copy()
374
+ faces = []
375
+ blendshapes = []
376
+ transformation_matrices = []
377
+ bboxes = []
378
+
379
+ if not detection_result.face_landmarks:
380
+ return annotated_image, faces, blendshapes, transformation_matrices, bboxes
381
+
382
+ h, w, _ = annotated_image.shape
383
+ face_landmarks_list = detection_result.face_landmarks
384
+ for idx, face_landmarks in enumerate(face_landmarks_list):
385
+ # Draw mesh (same as original)
386
+ if drawLandMarks:
387
+ self.drawing_utils.draw_landmarks(
388
+ image=annotated_image,
389
+ landmark_list=face_landmarks,
390
+ connections=vision.FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION,
391
+ landmark_drawing_spec=None,
392
+ connection_drawing_spec=self.drawing_styles.get_default_face_mesh_tesselation_style(),
393
+ )
394
+
395
+ self.drawing_utils.draw_landmarks(
396
+ image=annotated_image,
397
+ landmark_list=face_landmarks,
398
+ connections=vision.FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS,
399
+ landmark_drawing_spec=None,
400
+ connection_drawing_spec=self.drawing_styles.get_default_face_mesh_contours_style(),
401
+ )
402
+
403
+ self.drawing_utils.draw_landmarks(
404
+ image=annotated_image,
405
+ landmark_list=face_landmarks,
406
+ connections=vision.FaceLandmarksConnections.FACE_LANDMARKS_LEFT_IRIS,
407
+ landmark_drawing_spec=None,
408
+ connection_drawing_spec=self.drawing_styles.get_default_face_mesh_iris_connections_style(),
409
+ )
410
+
411
+ self.drawing_utils.draw_landmarks(
412
+ image=annotated_image,
413
+ landmark_list=face_landmarks,
414
+ connections=vision.FaceLandmarksConnections.FACE_LANDMARKS_RIGHT_IRIS,
415
+ landmark_drawing_spec=None,
416
+ connection_drawing_spec=self.drawing_styles.get_default_face_mesh_iris_connections_style(),
417
+ )
418
+
419
+ # Extract landmark coordinates + compute bounding box
420
+ face = []
421
+ min_x, min_y = w, h
422
+ max_x, max_y = 0, 0
423
+ # print(face_landmarks)
424
+ # NormalizedLandmark(x=0.5348694324493408, y=0.34117743372917175, z=-0.0013497794279828668, visibility=None, presence=None, name=None)
425
+
426
+ for lm in face_landmarks:
427
+ x, y = int(lm.x * w), int(lm.y * h)
428
+ face.append([x, y])
429
+ min_x = min(min_x, x)
430
+ min_y = min(min_y, y)
431
+ max_x = max(max_x, x)
432
+ max_y = max(max_y, y)
433
+
434
+ if drawLandMarks:
435
+ cv2.circle(annotated_image, (x, y), 1, (0, 255, 0), -1)
436
+
437
+ faces.append(face)
438
+ bboxes.append([min_x, min_y, max_x, max_y])
439
+
440
+ # Extract blendshapes (52 facial expression coefficients) for each detected face. The code checks if the detection result contains face blendshapes and if the index is within bounds. If so, it iterates through the classifications in the blendshapes proto and constructs a dictionary mapping blendshape category names to their corresponding scores. This dictionary is then appended to the blendshapes list, which will contain the blendshape information for each detected face.
441
+ if detection_result.face_blendshapes and idx < len(
442
+ detection_result.face_blendshapes
443
+ ):
444
+ face_blend = {}
445
+ # Standard MediaPipe Tasks API access for blendshapes
446
+ blendshapes_proto = detection_result.face_blendshapes[idx]
447
+ # print(blendshapes_proto)
448
+ # Category(index=7, score=3.274757887083979e-07, display_name=None, category_name='cheekSquintLeft')
449
+ for classification in blendshapes_proto:
450
+ face_blend[classification.category_name] = classification.score
451
+ blendshapes.append(face_blend)
452
+ else:
453
+ blendshapes.append({})
454
+
455
+ # Extract facial transformation matrix (head pose) for each detected face. The code checks if the detection result contains facial transformation matrices and if the index is within bounds. If so, it retrieves the matrix, reshapes it into a 4x4 numpy array, and appends it to the transformation_matrices list. This list will contain the head pose information for each detected face. If the matrix is not available, it appends None to maintain the list structure.
456
+ if detection_result.facial_transformation_matrixes and idx < len(
457
+ detection_result.facial_transformation_matrixes
458
+ ):
459
+ matrix = detection_result.facial_transformation_matrixes[idx]
460
+ transformation_matrices.append(np.array(matrix).reshape(4, 4))
461
+ else:
462
+ transformation_matrices.append(None)
463
+
464
+ return annotated_image, faces, blendshapes, transformation_matrices, bboxes
465
+
466
+ def distance_between_landmarks(
467
+ self, p1, p2, img=None, draw=True, color=(255, 0, 255), thickness=3, radius=8
468
+ ):
469
+ """
470
+ Find the Euclidean distance between two landmarks and optionally draw on the image.
471
+
472
+ Args:
473
+ p1 (tuple[int, int]): (x, y) coordinates of the first point
474
+ p2 (tuple[int, int]): (x, y) coordinates of the second point
475
+ img (numpy.ndarray, optional): Image on which to draw. If None, no drawing
476
+ draw (bool): Whether to draw the points and line on the image.
477
+ color (tuple): Color for drawing (BGR). Default is magenta (255, 0, 255).
478
+ thickness (int): Thickness of the line. Default is 3.
479
+ radius (int): Radius of the circles at the points. Default is 8.
480
+
481
+ Returns:
482
+ float: The Euclidean distance between the two points.
483
+ tuple: (x1, y1, x2, y2, cx, cy) coordinates of the two points and their center.
484
+ numpy.ndarray (optional): Annotated image if img is provided and draw=True.
485
+ """
486
+ x1, y1 = p1
487
+ x2, y2 = p2
488
+
489
+ cx = (x1 + x2) // 2
490
+ cy = (y1 + y2) // 2
491
+
492
+ length = self.euclidean_distance((x2, y2), (x1, y1)) # Euclidean distance
493
+
494
+ if img is not None and draw:
495
+ # Draw circles at both points
496
+ cv2.circle(img, (x1, y1), radius, color, cv2.FILLED)
497
+ cv2.circle(img, (x2, y2), radius, color, cv2.FILLED)
498
+
499
+ # Draw connecting line
500
+ cv2.line(img, (x1, y1), (x2, y2), color, thickness)
501
+
502
+ # Draw center point
503
+ cv2.circle(img, (cx, cy), radius // 2, (0, 255, 0), cv2.FILLED)
504
+
505
+ # Optional: Show distance value near the line
506
+ cv2.putText(
507
+ img,
508
+ f"{length:.1f}px",
509
+ (cx + 10, cy - 10),
510
+ cv2.FONT_HERSHEY_SIMPLEX,
511
+ 0.6,
512
+ (255, 255, 255),
513
+ 2,
514
+ )
515
+
516
+ return length, (x1, y1, x2, y2, cx, cy), img
517
+
518
+ return length, (x1, y1, x2, y2, cx, cy)
519
+
520
+ def get_head_pose_angles(self, matrix):
521
+ """Extract yaw, pitch, roll (in degrees) from the 4x4 facial transformation matrix.
522
+
523
+ Args:
524
+ matrix (np.ndarray): 4x4 facial transformation matrix.
525
+
526
+ Returns:
527
+ tuple: Yaw, pitch, and roll angles in degrees.
528
+ """
529
+ if matrix is None:
530
+ return 0.0, 0.0, 0.0
531
+
532
+ # Rotation part of the matrix
533
+ R = matrix[:3, :3]
534
+
535
+ # Yaw (left/right head turn)
536
+ yaw = np.arctan2(R[1, 0], R[0, 0]) * 180 / np.pi
537
+
538
+ # Pitch (up/down head tilt)
539
+ pitch = np.arctan2(-R[2, 0], np.sqrt(R[2, 1] ** 2 + R[2, 2] ** 2)) * 180 / np.pi
540
+
541
+ # Roll (head tilt left/right)
542
+ roll = np.arctan2(R[2, 1], R[2, 2]) * 180 / np.pi
543
+
544
+ return yaw, pitch, roll
545
+
546
+ def get_emotion(self, blend):
547
+ """
548
+ Simple rule-based emotion detection using blendshapes (very fast).
549
+
550
+ Args:
551
+ blend (dict): Dictionary of blendshape coefficients.
552
+
553
+ Returns:
554
+ str: Detected emotion as a string.
555
+ """
556
+ if not blend:
557
+ return "Unknown"
558
+
559
+ smile = blend.get("mouthSmileLeft", 0) + blend.get("mouthSmileRight", 0)
560
+ frown = blend.get("mouthFrownLeft", 0) + blend.get("mouthFrownRight", 0)
561
+ brow_up = (
562
+ blend.get("browInnerUp", 0)
563
+ + blend.get("browOuterUpLeft", 0)
564
+ + blend.get("browOuterUpRight", 0)
565
+ )
566
+ eye_blink = blend.get("eyeBlinkLeft", 0) + blend.get("eyeBlinkRight", 0)
567
+ mouth_open = (
568
+ blend.get("mouthLowerDownLeft", 0)
569
+ + blend.get("mouthLowerDownRight", 0)
570
+ + blend.get("jawOpen", 0)
571
+ )
572
+
573
+ if smile > 0.45 and eye_blink < 0.4:
574
+ return "😊 Happy"
575
+ elif frown > 0.4:
576
+ return "😠 Angry"
577
+ elif eye_blink > 0.75:
578
+ return "😲 Surprised"
579
+ elif mouth_open > 0.45:
580
+ return "😮 Shocked"
581
+ elif brow_up > 0.5:
582
+ return "🤨 Confused"
583
+ else:
584
+ return "😐 Neutral"
585
+
586
+ # ─────────────────────────── NEW METHODS ───────────────────────────
587
+
588
+ def get_eye_aspect_ratio(self, face, eye="left"):
589
+ """Compute the Eye Aspect Ratio (EAR) — the standard blink-detection metric.
590
+ EAR = (vertical_dist_1 + vertical_dist_2) / (2 * horizontal_dist).
591
+ EAR drops sharply when the eye closes.
592
+
593
+ Args:
594
+ face: List of 478 [x, y] pixel coordinates for one detected face.
595
+ eye: 'left' or 'right'.
596
+ Returns:
597
+ float: EAR value. Typical open-eye range 0.25–0.35; blink < 0.22.
598
+ """
599
+ if len(face) < 478:
600
+ return 0.0
601
+ pts = self.LEFT_EYE_BLINK if eye == "left" else self.RIGHT_EYE_BLINK
602
+ p1, p2, p3, p4, p5, p6 = (face[i] for i in pts)
603
+ vertical_1 = self.euclidean_distance(p2, p6)
604
+ vertical_2 = self.euclidean_distance(p3, p5)
605
+ horizontal = self.euclidean_distance(p1, p4)
606
+ if horizontal == 0:
607
+ return 0.0
608
+ return (vertical_1 + vertical_2) / (2.0 * horizontal)
609
+
610
+ def is_blinking(self, face, eye="left", ear_threshold=0.22):
611
+ """Return True if the specified eye is currently closed (blinking).
612
+
613
+ Args:
614
+ face: List of 478 [x, y] pixel coordinates.
615
+ eye: 'left' or 'right'.
616
+ ear_threshold: EAR below which the eye is considered closed.
617
+ Returns:
618
+ bool
619
+ """
620
+ return self.get_eye_aspect_ratio(face, eye) < ear_threshold
621
+
622
+ def is_mouth_open(self, face, ratio_threshold=0.15):
623
+ """Return True if the mouth is open past the given ratio threshold.
624
+
625
+ Args:
626
+ face: List of 478 [x, y] pixel coordinates.
627
+ ratio_threshold: Mouth height/width ratio; 0 = closed, 0.5+ = wide open.
628
+ Returns:
629
+ bool
630
+ """
631
+ return self.get_mouth_openness_ratio(face) > ratio_threshold
632
+
633
+ def get_forehead_center(self, face):
634
+ """Return pixel position of forehead center (landmark 10).
635
+ Useful for placing AR elements (crowns, hats) above the head.
636
+
637
+ Args:
638
+ face: List of 478 [x, y] pixel coordinates.
639
+ Returns:
640
+ tuple(int, int) or None
641
+ """
642
+ if len(face) <= self.FOREHEAD_CENTER:
643
+ return None
644
+ return tuple(face[self.FOREHEAD_CENTER])
645
+
646
+ def get_face_width(self, face):
647
+ """Return cheek-to-cheek pixel distance — a stable proxy for face size.
648
+ Useful for camera-to-face distance estimation.
649
+
650
+ Args:
651
+ face: List of 478 [x, y] pixel coordinates.
652
+ Returns:
653
+ float: pixel distance between LEFT_CHEEK and RIGHT_CHEEK landmarks.
654
+ """
655
+ if len(face) < 478:
656
+ return 0.0
657
+ return self.euclidean_distance(face[self.LEFT_CHEEK], face[self.RIGHT_CHEEK])
658
+
659
+ def draw_head_axes(self, image, matrix, origin=None, scale=60):
660
+ """Draw 3-axis orientation arrows from the facial transformation matrix.
661
+ X=red (yaw), Y=green (pitch), Z=blue (roll).
662
+
663
+ Args:
664
+ image: BGR numpy array to annotate.
665
+ matrix: 4×4 numpy head-pose matrix from face_mesh_detection().
666
+ origin: (x, y) pixel anchor for the axes. Defaults to image center.
667
+ scale: Arrow length in pixels.
668
+ Returns:
669
+ Annotated BGR numpy array.
670
+ """
671
+ if matrix is None:
672
+ return image
673
+ out = image.copy()
674
+ h, w = out.shape[:2]
675
+ if origin is None:
676
+ origin = (w // 2, h // 2)
677
+ R = matrix[:3, :3]
678
+ axes_3d = np.float32([[scale, 0, 0], [0, scale, 0], [0, 0, scale]])
679
+ colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0)]
680
+ labels = ("X", "Y", "Z")
681
+ for axis, color, label in zip(axes_3d, colors, labels, strict=False):
682
+ rotated = R @ axis
683
+ end = (int(origin[0] + rotated[0]), int(origin[1] - rotated[1]))
684
+ cv2.arrowedLine(out, origin, end, color, 2, tipLength=0.3)
685
+ cv2.putText(
686
+ out, label, end, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA
687
+ )
688
+ return out
689
+
690
+ def count_faces(self, faces):
691
+ """Return how many faces were detected.
692
+
693
+ Args:
694
+ faces: The faces list returned by face_mesh_detection().
695
+ Returns:
696
+ int
697
+ """
698
+ return len(faces)
699
+
700
+ def get_all_emotions(self, blendshapes):
701
+ """Return emotion label for every detected face in one call.
702
+
703
+ Args:
704
+ blendshapes: The blendshapes list from face_mesh_detection().
705
+ Returns:
706
+ List[str]: One emotion string per face.
707
+ """
708
+ return [self.get_emotion(b) for b in blendshapes]
709
+
710
+ def get_all_gaze_directions(self, faces):
711
+ """Return left-eye gaze direction for every detected face.
712
+
713
+ Args:
714
+ faces: The faces list from face_mesh_detection().
715
+ Returns:
716
+ List[str]: 'Left'|'Center'|'Right' per face.
717
+ """
718
+ return [self.get_eye_gaze_direction(f, is_left_eye=True) for f in faces]
719
+
720
+ def get_nose_tip(self, face):
721
+ """Return pixel coordinates of the nose tip (landmark 1).
722
+ Commonly used as a face anchor point for AR placement.
723
+
724
+ Args:
725
+ face: List of 478 [x, y] pixel coordinates.
726
+ Returns:
727
+ tuple(int, int) or None
728
+ """
729
+ if len(face) <= self.NOSE_TIP:
730
+ return None
731
+ return tuple(face[self.NOSE_TIP])
732
+
733
+ def is_looking_at_camera(self, face, gaze_tolerance=0.18):
734
+ """Return True if both eyes are gazing roughly toward the camera (center gaze).
735
+
736
+ Args:
737
+ face: List of 478 [x, y] pixel coordinates.
738
+ gaze_tolerance: Iris offset ratio below which gaze is 'Center'.
739
+ Returns:
740
+ bool
741
+ """
742
+ left_gaze = self.get_eye_gaze_direction(face, is_left_eye=True)
743
+ right_gaze = self.get_eye_gaze_direction(face, is_left_eye=False)
744
+ return left_gaze == "Center" and right_gaze == "Center"
745
+
746
+ # ─────────────────────────── EXPRESSION DETECTION (Task 3) ───────────────────────────
747
+
748
+ def is_smiling(self, blend, threshold=0.4):
749
+ """Return True if the average mouth-smile blendshape score exceeds the threshold.
750
+
751
+ Args:
752
+ blend: Dict of blendshape coefficients from face_mesh_detection().
753
+ threshold: Average of mouthSmileLeft + mouthSmileRight above which smiling is detected.
754
+ Returns:
755
+ bool
756
+ """
757
+ left = blend.get("mouthSmileLeft", 0.0)
758
+ right = blend.get("mouthSmileRight", 0.0)
759
+ return (left + right) / 2.0 > threshold
760
+
761
+ def is_yawning(self, face, ratio_threshold=0.5):
762
+ """Return True if the mouth openness ratio exceeds the yawn threshold.
763
+
764
+ Args:
765
+ face: List of 478 [x, y] pixel coordinates.
766
+ ratio_threshold: Mouth height/width ratio above which a yawn is detected.
767
+ Returns:
768
+ bool
769
+ """
770
+ return self.get_mouth_openness_ratio(face) > ratio_threshold
771
+
772
+ def is_surprised(self, blend, face, brow_threshold=0.3, mouth_threshold=0.3):
773
+ """Return True if both eyebrows are raised and mouth is open (surprise heuristic).
774
+
775
+ Args:
776
+ blend: Dict of blendshape coefficients.
777
+ face: List of 478 [x, y] pixel coordinates.
778
+ brow_threshold: browInnerUp score above which brows are considered raised.
779
+ mouth_threshold: Mouth openness ratio above which mouth is considered open.
780
+ Returns:
781
+ bool
782
+ """
783
+ return (
784
+ self.get_eyebrow_raise(blend) > brow_threshold
785
+ and self.get_mouth_openness_ratio(face) > mouth_threshold
786
+ )
787
+
788
+ def get_eyebrow_raise(self, blend):
789
+ """Return the browInnerUp blendshape score (0–1) as a proxy for eyebrow raise.
790
+
791
+ Args:
792
+ blend: Dict of blendshape coefficients.
793
+ Returns:
794
+ float: browInnerUp score; 0 = neutral, 1 = fully raised.
795
+ """
796
+ return float(blend.get("browInnerUp", 0.0))
797
+
798
+ def is_eyes_closed(self, face, ear_threshold=0.22):
799
+ """Return True if both eyes are closed (EAR below threshold for both).
800
+
801
+ Args:
802
+ face: List of 478 [x, y] pixel coordinates.
803
+ ear_threshold: EAR below which an eye is considered closed.
804
+ Returns:
805
+ bool
806
+ """
807
+ left_ear = self.get_eye_aspect_ratio(face, eye="left")
808
+ right_ear = self.get_eye_aspect_ratio(face, eye="right")
809
+ return left_ear < ear_threshold and right_ear < ear_threshold
810
+
811
+ def is_drowsy(self, face, ear_threshold=0.22):
812
+ """Return True if both eyes are closed, indicating potential drowsiness.
813
+ Delegates to is_eyes_closed with the same threshold.
814
+
815
+ Args:
816
+ face: List of 478 [x, y] pixel coordinates.
817
+ ear_threshold: EAR below which an eye is considered closed.
818
+ Returns:
819
+ bool
820
+ """
821
+ return self.is_eyes_closed(face, ear_threshold=ear_threshold)
822
+
823
+ # ─────────────────────────── GEOMETRY & COMPOSITE (Task 4) ───────────────────────────
824
+
825
+ def get_face_bounding_box(self, face):
826
+ """Return axis-aligned bounding box for the face as (x, y, w, h).
827
+
828
+ Args:
829
+ face: List of [x, y] pixel coordinates (any number of landmarks).
830
+ Returns:
831
+ tuple(int, int, int, int): (x, y, width, height) where (x, y) is the top-left corner.
832
+ """
833
+ xs = [p[0] for p in face]
834
+ ys = [p[1] for p in face]
835
+ x = int(min(xs))
836
+ y = int(min(ys))
837
+ w = int(max(xs)) - x
838
+ h = int(max(ys)) - y
839
+ return (x, y, w, h)
840
+
841
+ def get_face_symmetry_score(self, face):
842
+ """Estimate facial symmetry (0–1) by mirroring landmark pairs across the vertical midline.
843
+ 1.0 = perfectly symmetric, 0.0 = highly asymmetric.
844
+
845
+ Args:
846
+ face: List of 478 [x, y] pixel coordinates.
847
+ Returns:
848
+ float: Symmetry score in [0, 1].
849
+ """
850
+ if not face:
851
+ return 0.0
852
+ xs = [p[0] for p in face]
853
+ ys = [p[1] for p in face]
854
+ cx = sum(xs) / len(xs)
855
+ y_range = max(max(ys) - min(ys), 1)
856
+ diffs = []
857
+ for l_idx, r_idx in self._SYMMETRY_PAIRS:
858
+ if l_idx < len(face) and r_idx < len(face):
859
+ lx, ly = face[l_idx]
860
+ rx, ry = face[r_idx]
861
+ mirrored_lx = 2 * cx - lx
862
+ dx = abs(mirrored_lx - rx) / max(cx, 1)
863
+ dy = abs(ly - ry) / y_range
864
+ diffs.append((dx + dy) / 2)
865
+ if not diffs:
866
+ return 0.0
867
+ return float(max(0.0, 1.0 - sum(diffs) / len(diffs)))
868
+
869
+ def draw_face_oval(self, image, face):
870
+ """Draw a green ellipse around the face bounding box on a copy of the image.
871
+
872
+ Args:
873
+ image: BGR numpy array.
874
+ face: List of [x, y] pixel coordinates.
875
+ Returns:
876
+ Annotated BGR numpy array (copy; original is not modified).
877
+ """
878
+ out = image.copy()
879
+ x, y, w, h = self.get_face_bounding_box(face)
880
+ cx, cy = x + w // 2, y + h // 2
881
+ cv2.ellipse(
882
+ out, (cx, cy), (max(1, w // 2), max(1, h // 2)), 0, 0, 360, (0, 255, 0), 2
883
+ )
884
+ return out
885
+
886
+ def get_attention_level(self, face, blend):
887
+ """Composite attention score (0–1) based on gaze direction and eye openness.
888
+ Full gaze toward camera = 1.0; looking away = 0.3; eye-closure penalty = -0.5.
889
+
890
+ Args:
891
+ face: List of 478 [x, y] pixel coordinates.
892
+ blend: Dict of blendshape coefficients (reserved for future extension).
893
+ Returns:
894
+ float: Attention score clamped to [0, 1].
895
+ """
896
+ looking = self.is_looking_at_camera(face)
897
+ gaze_score = 1.0 if looking else 0.3
898
+ eye_penalty = 0.5 if self.is_eyes_closed(face) else 0.0
899
+ return float(max(0.0, gaze_score - eye_penalty))
900
+
901
+ def get_lip_separation(self, face):
902
+ """Return pixel distance between the upper and lower lip center landmarks.
903
+
904
+ Args:
905
+ face: List of 478 [x, y] pixel coordinates.
906
+ Returns:
907
+ float: Lip separation in pixels; 0.0 if face has fewer than 15 landmarks.
908
+ """
909
+ if len(face) < 15:
910
+ return 0.0
911
+ upper = face[self.UPPER_LIP_CENTER]
912
+ lower = face[self.LOWER_LIP_CENTER]
913
+ return float(self.euclidean_distance(upper, lower))