openvisionkit 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openvisionkit/__init__.py +1 -0
- openvisionkit/_version.py +24 -0
- openvisionkit/capture/draw_object.py +296 -0
- openvisionkit/capture/image_template.py +61 -0
- openvisionkit/capture/screen_capture.py +13 -0
- openvisionkit/capture/video_recorder.py +128 -0
- openvisionkit/capture/video_template.py +336 -0
- openvisionkit/lib/classifier.py +186 -0
- openvisionkit/lib/face_detector.py +587 -0
- openvisionkit/lib/face_mesh_detector.py +913 -0
- openvisionkit/lib/form_detector.py +465 -0
- openvisionkit/lib/form_roi_annotator.py +679 -0
- openvisionkit/lib/form_roi_detector.py +1078 -0
- openvisionkit/lib/fps_counter.py +38 -0
- openvisionkit/lib/hair_segmentation.py +298 -0
- openvisionkit/lib/hand_detector.py +1230 -0
- openvisionkit/lib/image_detector.py +1095 -0
- openvisionkit/lib/object_detector.py +401 -0
- openvisionkit/lib/pose_detector.py +919 -0
- openvisionkit/lib/selfie_segmentation.py +528 -0
- openvisionkit/lib/text_detector.py +1229 -0
- openvisionkit/utility/live_plot.py +141 -0
- openvisionkit/utility/vision_utilis.py +871 -0
- openvisionkit-0.4.0.dist-info/METADATA +1018 -0
- openvisionkit-0.4.0.dist-info/RECORD +26 -0
- openvisionkit-0.4.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,913 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import cv2
|
|
5
|
+
import mediapipe as mp
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
# Correct imports for Tasks API
|
|
9
|
+
from mediapipe.tasks.python import vision
|
|
10
|
+
from mediapipe.tasks.python.core.base_options import BaseOptions
|
|
11
|
+
from mediapipe.tasks.python.vision.core.vision_task_running_mode import (
|
|
12
|
+
VisionTaskRunningMode,
|
|
13
|
+
)
|
|
14
|
+
from mediapipe.tasks.python.vision.face_landmarker import (
|
|
15
|
+
FaceLandmarker,
|
|
16
|
+
FaceLandmarkerOptions,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
_MODEL_DIR = Path(__file__).parent / "models"
|
|
20
|
+
_DEFAULT_MODEL = str(_MODEL_DIR / "face_landmarker_v2_with_blendshapes.task")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FaceMeshDetector:
|
|
24
|
+
# ====================== LANDMARK INDICES (MediaPipe Face Landmarker v2 - 478 points) ======================
|
|
25
|
+
# Iris centers (added by the iris model)
|
|
26
|
+
LEFT_IRIS_CENTER = 468
|
|
27
|
+
RIGHT_IRIS_CENTER = 473
|
|
28
|
+
LEFT_IRIS = [474, 475, 476, 477]
|
|
29
|
+
RIGHT_IRIS = [469, 470, 471, 472]
|
|
30
|
+
|
|
31
|
+
FOREHEAD_CENTER = 10
|
|
32
|
+
|
|
33
|
+
# Eye corners
|
|
34
|
+
LEFT_EYE_OUTER = 33
|
|
35
|
+
LEFT_EYE_INNER = 133
|
|
36
|
+
RIGHT_EYE_OUTER = 362
|
|
37
|
+
RIGHT_EYE_INNER = 263
|
|
38
|
+
|
|
39
|
+
# Mouth landmarks for openness ratio
|
|
40
|
+
UPPER_LIP_CENTER = 13
|
|
41
|
+
LOWER_LIP_CENTER = 14 # Common pair used in many projects
|
|
42
|
+
MOUTH_LEFT = 61
|
|
43
|
+
MOUTH_RIGHT = 291
|
|
44
|
+
LIP_LEFT_CORNERS = 61
|
|
45
|
+
LIP_RIGHT_CORNERS = 291
|
|
46
|
+
LIP_CORNERS = [LIP_LEFT_CORNERS, LIP_RIGHT_CORNERS]
|
|
47
|
+
LIP_CENTER_TOP = 13
|
|
48
|
+
LIP_CENTER_BOTTOM = 14
|
|
49
|
+
LIP_CENTER = [LIP_CENTER_TOP, LIP_CENTER_BOTTOM]
|
|
50
|
+
|
|
51
|
+
# Face width landmarks (for normalization)
|
|
52
|
+
LEFT_CHEEK = 234
|
|
53
|
+
RIGHT_CHEEK = 454
|
|
54
|
+
# Nose tip (for head pose estimation)
|
|
55
|
+
NOSE_TIP = 1
|
|
56
|
+
# ====================== END OF LANDMARK INDICES ======================
|
|
57
|
+
|
|
58
|
+
LEFT_EYE = [
|
|
59
|
+
33,
|
|
60
|
+
7,
|
|
61
|
+
163,
|
|
62
|
+
144,
|
|
63
|
+
145,
|
|
64
|
+
153,
|
|
65
|
+
154,
|
|
66
|
+
155,
|
|
67
|
+
133,
|
|
68
|
+
173,
|
|
69
|
+
157,
|
|
70
|
+
158,
|
|
71
|
+
159,
|
|
72
|
+
160,
|
|
73
|
+
161,
|
|
74
|
+
246,
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
RIGHT_EYE = [
|
|
78
|
+
362,
|
|
79
|
+
382,
|
|
80
|
+
381,
|
|
81
|
+
380,
|
|
82
|
+
374,
|
|
83
|
+
373,
|
|
84
|
+
390,
|
|
85
|
+
249,
|
|
86
|
+
263,
|
|
87
|
+
466,
|
|
88
|
+
388,
|
|
89
|
+
387,
|
|
90
|
+
386,
|
|
91
|
+
385,
|
|
92
|
+
384,
|
|
93
|
+
398,
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
LEFT_EYE_BLINK = [33, 160, 158, 133, 153, 144]
|
|
97
|
+
|
|
98
|
+
RIGHT_EYE_BLINK = [362, 385, 387, 263, 373, 380]
|
|
99
|
+
|
|
100
|
+
_SYMMETRY_PAIRS = [
|
|
101
|
+
(33, 263),
|
|
102
|
+
(160, 387),
|
|
103
|
+
(158, 385),
|
|
104
|
+
(133, 362),
|
|
105
|
+
(144, 374),
|
|
106
|
+
(145, 375),
|
|
107
|
+
(153, 380),
|
|
108
|
+
(154, 381),
|
|
109
|
+
(61, 291),
|
|
110
|
+
(185, 409),
|
|
111
|
+
(40, 270),
|
|
112
|
+
(37, 267),
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
"""FaceMeshDetector class uses MediaPipe's FaceLandmarker to detect facial landmarks and draw them on the input image.
|
|
116
|
+
|
|
117
|
+
468 facial landmarks are detected per face, and the class also extracts blendshape coefficients for facial expressions, head pose transformation matrices, and bounding boxes for each detected face. The class is designed
|
|
118
|
+
**Extended features (beyond original landmarks drawing):**
|
|
119
|
+
- Configurable options in constructor (num_faces, confidence thresholds, running mode, blendshapes, matrices).
|
|
120
|
+
- Returns face blendshapes (52 facial expression coefficients per detected face).
|
|
121
|
+
- Returns facial transformation matrices (4x4 head-pose matrices per detected face).
|
|
122
|
+
- Computes and returns axis-aligned bounding boxes for each detected face (true "face detection" feature).
|
|
123
|
+
- Supports IMAGE mode by default (VIDEO / LIVE_STREAM can be enabled via constructor; detect_for_video would require additional timestamp handling).
|
|
124
|
+
|
|
125
|
+
The class can be used for face tracking, expression recognition, AR effects, head-pose estimation, etc.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
model_path (str): The path to the face landmarker model file.
|
|
129
|
+
Default is './models/face_landmarker_v2_with_blendshapes.task'.
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
def __init__(
|
|
133
|
+
self,
|
|
134
|
+
model_path=_DEFAULT_MODEL,
|
|
135
|
+
num_faces: int = 2,
|
|
136
|
+
min_face_detection_confidence: float = 0.5,
|
|
137
|
+
min_face_presence_confidence: float = 0.5,
|
|
138
|
+
min_tracking_confidence: float = 0.5,
|
|
139
|
+
output_face_blendshapes: bool = True,
|
|
140
|
+
output_facial_transformation_matrixes: bool = True,
|
|
141
|
+
running_mode: VisionTaskRunningMode = vision.RunningMode.IMAGE,
|
|
142
|
+
):
|
|
143
|
+
"""Initializes the FaceMeshDetector with flexible options.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
model_path (str): Path to the .task model file.
|
|
147
|
+
num_faces (int): Maximum number of faces to detect.
|
|
148
|
+
min_face_detection_confidence (float): Minimum confidence for face detection.
|
|
149
|
+
min_face_presence_confidence (float): Minimum confidence that a face is present.
|
|
150
|
+
min_tracking_confidence (float): Minimum confidence for tracking (used in VIDEO/LIVE_STREAM).
|
|
151
|
+
output_face_blendshapes (bool): Whether to output 52 blendshape scores for expressions.
|
|
152
|
+
output_facial_transformation_matrixes (bool): Whether to output 4x4 head-pose matrices.
|
|
153
|
+
running_mode (RunningMode): IMAGE (default), VIDEO, or LIVE_STREAM.
|
|
154
|
+
"""
|
|
155
|
+
options = FaceLandmarkerOptions(
|
|
156
|
+
base_options=BaseOptions(model_asset_path=model_path),
|
|
157
|
+
num_faces=num_faces,
|
|
158
|
+
min_face_detection_confidence=min_face_detection_confidence,
|
|
159
|
+
min_face_presence_confidence=min_face_presence_confidence,
|
|
160
|
+
min_tracking_confidence=min_tracking_confidence,
|
|
161
|
+
output_face_blendshapes=output_face_blendshapes,
|
|
162
|
+
output_facial_transformation_matrixes=output_facial_transformation_matrixes,
|
|
163
|
+
running_mode=running_mode,
|
|
164
|
+
)
|
|
165
|
+
self.face_detector = FaceLandmarker.create_from_options(options)
|
|
166
|
+
self.drawing_utils = mp.tasks.vision.drawing_utils
|
|
167
|
+
self.drawing_styles = mp.tasks.vision.drawing_styles
|
|
168
|
+
|
|
169
|
+
def euclidean_distance(self, p1, p2):
|
|
170
|
+
"""
|
|
171
|
+
2D pixel distance between two landmarks [x, y].
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
p1 (list[int]): [x, y] coordinates of the first point.
|
|
175
|
+
p2 (list[int]): [x, y] coordinates of the second point.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
float: The Euclidean distance between the two points.
|
|
179
|
+
"""
|
|
180
|
+
return np.hypot(p1[0] - p2[0], p1[1] - p2[1])
|
|
181
|
+
|
|
182
|
+
def get_landmark_point(self, landmark, width, height):
|
|
183
|
+
return int(landmark.x * width), int(landmark.y * height)
|
|
184
|
+
|
|
185
|
+
def iris_center(self, face_landmarks, iris_indices, width, height):
|
|
186
|
+
points = []
|
|
187
|
+
|
|
188
|
+
for idx in iris_indices:
|
|
189
|
+
lm = face_landmarks.landmark[idx]
|
|
190
|
+
x, y = self.get_landmark_point(lm, width, height)
|
|
191
|
+
points.append((x, y))
|
|
192
|
+
|
|
193
|
+
points = np.array(points)
|
|
194
|
+
cx, cy = points.mean(axis=0).astype(int)
|
|
195
|
+
|
|
196
|
+
return int(cx), int(cy)
|
|
197
|
+
|
|
198
|
+
def distance_between_points(self, p1, p2):
|
|
199
|
+
return math.dist(p1, p2)
|
|
200
|
+
|
|
201
|
+
def get_mouth_openness_ratio(self, face):
|
|
202
|
+
"""Mouth openness ratio (height / width). 0 = closed, ~0.5+ = wide open."""
|
|
203
|
+
if len(face) < 478:
|
|
204
|
+
return 0.0
|
|
205
|
+
upper = face[self.UPPER_LIP_CENTER]
|
|
206
|
+
lower = face[self.LOWER_LIP_CENTER]
|
|
207
|
+
left = face[self.MOUTH_LEFT]
|
|
208
|
+
right = face[self.MOUTH_RIGHT]
|
|
209
|
+
|
|
210
|
+
mouth_height = self.euclidean_distance(upper, lower)
|
|
211
|
+
mouth_width = self.euclidean_distance(left, right)
|
|
212
|
+
return mouth_height / mouth_width if mouth_width > 0 else 0.0
|
|
213
|
+
|
|
214
|
+
def get_eye_gaze_direction(self, face, is_left_eye=True):
|
|
215
|
+
"""
|
|
216
|
+
Simple gaze direction (Left / Center / Right) using iris vs eye corners.
|
|
217
|
+
Returns: 'Left', 'Center', or 'Right' (horizontal gaze only).
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
- face: List of 478 [x, y] landmarks for a detected face.
|
|
221
|
+
- is_left_eye: Whether to analyze the left eye (True) or right eye (False).
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
- str: 'Left', 'Center', or 'Right' indicating gaze direction.
|
|
225
|
+
|
|
226
|
+
Note: This is a very basic heuristic and may not be highly accurate. For more robust gaze estimation, consider using a dedicated gaze tracking model.
|
|
227
|
+
"""
|
|
228
|
+
if len(face) < 478:
|
|
229
|
+
return "Unknown"
|
|
230
|
+
|
|
231
|
+
if is_left_eye:
|
|
232
|
+
iris_center = face[self.LEFT_IRIS_CENTER]
|
|
233
|
+
eye_outer = face[self.LEFT_EYE_OUTER]
|
|
234
|
+
eye_inner = face[self.LEFT_EYE_INNER]
|
|
235
|
+
else:
|
|
236
|
+
iris_center = face[self.RIGHT_IRIS_CENTER]
|
|
237
|
+
eye_outer = face[self.RIGHT_EYE_OUTER]
|
|
238
|
+
eye_inner = face[self.RIGHT_EYE_INNER]
|
|
239
|
+
|
|
240
|
+
eye_center_x = (eye_outer[0] + eye_inner[0]) / 2
|
|
241
|
+
eye_width = abs(eye_outer[0] - eye_inner[0])
|
|
242
|
+
if eye_width == 0:
|
|
243
|
+
return "Center"
|
|
244
|
+
|
|
245
|
+
delta_x = iris_center[0] - eye_center_x
|
|
246
|
+
ratio = delta_x / eye_width
|
|
247
|
+
|
|
248
|
+
if ratio < -0.18:
|
|
249
|
+
return "Left"
|
|
250
|
+
elif ratio > 0.18:
|
|
251
|
+
return "Right"
|
|
252
|
+
else:
|
|
253
|
+
return "Center"
|
|
254
|
+
|
|
255
|
+
def get_inter_pupillary_distance(self, face, normalized=False):
|
|
256
|
+
"""
|
|
257
|
+
Approximate eye-to-eye (pupil) distance in pixels.
|
|
258
|
+
If normalized=True → divided by face width (useful for real-world scaling).
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
- face: List of 478 [x, y] landmarks for a detected face.
|
|
262
|
+
- normalized: Whether to return distance normalized by face width.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
- float: Inter-pupillary distance in pixels (or normalized ratio if specified).
|
|
266
|
+
"""
|
|
267
|
+
if len(face) < 478:
|
|
268
|
+
return 0.0
|
|
269
|
+
|
|
270
|
+
left_iris = face[self.LEFT_IRIS_CENTER]
|
|
271
|
+
right_iris = face[self.RIGHT_IRIS_CENTER]
|
|
272
|
+
ipd = self.euclidean_distance(left_iris, right_iris)
|
|
273
|
+
|
|
274
|
+
if normalized:
|
|
275
|
+
face_width = self.euclidean_distance(
|
|
276
|
+
face[self.LEFT_CHEEK], face[self.RIGHT_CHEEK]
|
|
277
|
+
)
|
|
278
|
+
return ipd / face_width if face_width > 0 else 0.0
|
|
279
|
+
return ipd
|
|
280
|
+
|
|
281
|
+
def overlay_ar_filter(self, frame, face, filter_img, filter_type="glasses"):
|
|
282
|
+
"""Basic AR overlay (sunglasses example).
|
|
283
|
+
Returns the frame with filter drawn on top.
|
|
284
|
+
filter_img must be a PNG with alpha channel (RGBA).
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
frame (numpy.ndarray): The input image/frame in BGR format.
|
|
288
|
+
face (list): List of 478 [x, y] landmarks for a detected face.
|
|
289
|
+
filter_img (numpy.ndarray): The AR filter image with alpha channel (RGBA).
|
|
290
|
+
filter_type (str): Type of filter to apply (e.g. "glasses"). Currently only "glasses" is implemented.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
numpy.ndarray: The output image/frame with the AR filter overlaid.
|
|
294
|
+
"""
|
|
295
|
+
if len(face) < 478 or filter_img is None:
|
|
296
|
+
return frame
|
|
297
|
+
|
|
298
|
+
if filter_type == "glasses":
|
|
299
|
+
# Use eye corners to position and scale glasses
|
|
300
|
+
left_outer = face[self.LEFT_EYE_OUTER]
|
|
301
|
+
right_outer = face[self.RIGHT_EYE_OUTER]
|
|
302
|
+
|
|
303
|
+
# Center between eyes
|
|
304
|
+
center_x = int((left_outer[0] + right_outer[0]) / 2)
|
|
305
|
+
center_y = int((left_outer[1] + right_outer[1]) / 2)
|
|
306
|
+
|
|
307
|
+
# Scale based on eye-to-eye distance
|
|
308
|
+
eye_dist = self.euclidean_distance(left_outer, right_outer)
|
|
309
|
+
scale_factor = int(
|
|
310
|
+
eye_dist * 1.8
|
|
311
|
+
) # adjust multiplier for your filter image
|
|
312
|
+
|
|
313
|
+
# Resize filter
|
|
314
|
+
filter_resized = cv2.resize(filter_img, (scale_factor, scale_factor))
|
|
315
|
+
|
|
316
|
+
# Position (top-left of filter)
|
|
317
|
+
x = int(center_x - scale_factor / 2)
|
|
318
|
+
y = int(center_y - scale_factor * 0.45) # slightly above eyes
|
|
319
|
+
|
|
320
|
+
# Alpha blending (assuming filter_img has alpha channel)
|
|
321
|
+
h, w = filter_resized.shape[:2]
|
|
322
|
+
if y < 0 or x < 0 or y + h > frame.shape[0] or x + w > frame.shape[1]:
|
|
323
|
+
return frame # out of bounds
|
|
324
|
+
|
|
325
|
+
overlay = filter_resized[:, :, :3]
|
|
326
|
+
mask = filter_resized[:, :, 3:] / 255.0
|
|
327
|
+
|
|
328
|
+
roi = frame[y : y + h, x : x + w]
|
|
329
|
+
blended = (1.0 - mask) * roi + mask * overlay
|
|
330
|
+
frame[y : y + h, x : x + w] = blended.astype(np.uint8)
|
|
331
|
+
|
|
332
|
+
return frame
|
|
333
|
+
|
|
334
|
+
def face_mesh_detection(self, img, drawLandMarks=True):
|
|
335
|
+
"""
|
|
336
|
+
Detects facial landmarks in the input image and draws them if specified.
|
|
337
|
+
Now also extracts blendshapes, transformation matrices, and bounding boxes.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
img (numpy.ndarray): The input image in BGR format.
|
|
341
|
+
drawLandMarks (bool): Whether to draw the detected landmarks on the image. Default is True.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
tuple: (annotated_image, faces, blendshapes, transformation_matrices, bboxes)
|
|
345
|
+
- annotated_image (numpy.ndarray): Image with landmarks drawn (if requested).
|
|
346
|
+
- faces (list): List of detected faces; each face is a list of [x, y] pixel coordinates (478 landmarks).
|
|
347
|
+
- blendshapes (list): List of dicts (one per face) with {blendshape_name: score} for facial expressions.
|
|
348
|
+
- transformation_matrices (list): List of 4x4 transformation matrices (as numpy arrays) for head pose.
|
|
349
|
+
- bboxes (list): List of [min_x, min_y, max_x, max_y] bounding boxes (one per face).
|
|
350
|
+
"""
|
|
351
|
+
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
|
352
|
+
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_rgb)
|
|
353
|
+
result = self.face_detector.detect(mp_image)
|
|
354
|
+
return self.landmarks_on_image(img, result, drawLandMarks)
|
|
355
|
+
|
|
356
|
+
def landmarks_on_image(self, image, detection_result, drawLandMarks=True):
|
|
357
|
+
"""
|
|
358
|
+
Draws facial landmarks on the input image and extracts extended detection features.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
image (numpy.ndarray): The input image on which to draw the landmarks.
|
|
362
|
+
detection_result: The FaceLandmarkerResult from detection.
|
|
363
|
+
drawLandMarks (bool): Whether to draw the detected landmarks on the image. Default is True.
|
|
364
|
+
|
|
365
|
+
Returns:
|
|
366
|
+
tuple: (annotated_image, faces, blendshapes, transformation_matrices, bboxes)
|
|
367
|
+
- annotated_image (numpy.ndarray): Annotated image.
|
|
368
|
+
- faces (list[list[list[int]]]): Landmark pixel coordinates per face.
|
|
369
|
+
- blendshapes (list[dict]): Blendshape scores per face (e.g. {'eyeBlinkLeft': 0.92, ...}).
|
|
370
|
+
- transformation_matrices (list[np.ndarray]): 4x4 head-pose matrices per face.
|
|
371
|
+
- bboxes (list[list[int]]): Bounding boxes [min_x, min_y, max_x, max_y] per face.
|
|
372
|
+
"""
|
|
373
|
+
annotated_image = image.copy()
|
|
374
|
+
faces = []
|
|
375
|
+
blendshapes = []
|
|
376
|
+
transformation_matrices = []
|
|
377
|
+
bboxes = []
|
|
378
|
+
|
|
379
|
+
if not detection_result.face_landmarks:
|
|
380
|
+
return annotated_image, faces, blendshapes, transformation_matrices, bboxes
|
|
381
|
+
|
|
382
|
+
h, w, _ = annotated_image.shape
|
|
383
|
+
face_landmarks_list = detection_result.face_landmarks
|
|
384
|
+
for idx, face_landmarks in enumerate(face_landmarks_list):
|
|
385
|
+
# Draw mesh (same as original)
|
|
386
|
+
if drawLandMarks:
|
|
387
|
+
self.drawing_utils.draw_landmarks(
|
|
388
|
+
image=annotated_image,
|
|
389
|
+
landmark_list=face_landmarks,
|
|
390
|
+
connections=vision.FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION,
|
|
391
|
+
landmark_drawing_spec=None,
|
|
392
|
+
connection_drawing_spec=self.drawing_styles.get_default_face_mesh_tesselation_style(),
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
self.drawing_utils.draw_landmarks(
|
|
396
|
+
image=annotated_image,
|
|
397
|
+
landmark_list=face_landmarks,
|
|
398
|
+
connections=vision.FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS,
|
|
399
|
+
landmark_drawing_spec=None,
|
|
400
|
+
connection_drawing_spec=self.drawing_styles.get_default_face_mesh_contours_style(),
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
self.drawing_utils.draw_landmarks(
|
|
404
|
+
image=annotated_image,
|
|
405
|
+
landmark_list=face_landmarks,
|
|
406
|
+
connections=vision.FaceLandmarksConnections.FACE_LANDMARKS_LEFT_IRIS,
|
|
407
|
+
landmark_drawing_spec=None,
|
|
408
|
+
connection_drawing_spec=self.drawing_styles.get_default_face_mesh_iris_connections_style(),
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
self.drawing_utils.draw_landmarks(
|
|
412
|
+
image=annotated_image,
|
|
413
|
+
landmark_list=face_landmarks,
|
|
414
|
+
connections=vision.FaceLandmarksConnections.FACE_LANDMARKS_RIGHT_IRIS,
|
|
415
|
+
landmark_drawing_spec=None,
|
|
416
|
+
connection_drawing_spec=self.drawing_styles.get_default_face_mesh_iris_connections_style(),
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
# Extract landmark coordinates + compute bounding box
|
|
420
|
+
face = []
|
|
421
|
+
min_x, min_y = w, h
|
|
422
|
+
max_x, max_y = 0, 0
|
|
423
|
+
# print(face_landmarks)
|
|
424
|
+
# NormalizedLandmark(x=0.5348694324493408, y=0.34117743372917175, z=-0.0013497794279828668, visibility=None, presence=None, name=None)
|
|
425
|
+
|
|
426
|
+
for lm in face_landmarks:
|
|
427
|
+
x, y = int(lm.x * w), int(lm.y * h)
|
|
428
|
+
face.append([x, y])
|
|
429
|
+
min_x = min(min_x, x)
|
|
430
|
+
min_y = min(min_y, y)
|
|
431
|
+
max_x = max(max_x, x)
|
|
432
|
+
max_y = max(max_y, y)
|
|
433
|
+
|
|
434
|
+
if drawLandMarks:
|
|
435
|
+
cv2.circle(annotated_image, (x, y), 1, (0, 255, 0), -1)
|
|
436
|
+
|
|
437
|
+
faces.append(face)
|
|
438
|
+
bboxes.append([min_x, min_y, max_x, max_y])
|
|
439
|
+
|
|
440
|
+
# Extract blendshapes (52 facial expression coefficients) for each detected face. The code checks if the detection result contains face blendshapes and if the index is within bounds. If so, it iterates through the classifications in the blendshapes proto and constructs a dictionary mapping blendshape category names to their corresponding scores. This dictionary is then appended to the blendshapes list, which will contain the blendshape information for each detected face.
|
|
441
|
+
if detection_result.face_blendshapes and idx < len(
|
|
442
|
+
detection_result.face_blendshapes
|
|
443
|
+
):
|
|
444
|
+
face_blend = {}
|
|
445
|
+
# Standard MediaPipe Tasks API access for blendshapes
|
|
446
|
+
blendshapes_proto = detection_result.face_blendshapes[idx]
|
|
447
|
+
# print(blendshapes_proto)
|
|
448
|
+
# Category(index=7, score=3.274757887083979e-07, display_name=None, category_name='cheekSquintLeft')
|
|
449
|
+
for classification in blendshapes_proto:
|
|
450
|
+
face_blend[classification.category_name] = classification.score
|
|
451
|
+
blendshapes.append(face_blend)
|
|
452
|
+
else:
|
|
453
|
+
blendshapes.append({})
|
|
454
|
+
|
|
455
|
+
# Extract facial transformation matrix (head pose) for each detected face. The code checks if the detection result contains facial transformation matrices and if the index is within bounds. If so, it retrieves the matrix, reshapes it into a 4x4 numpy array, and appends it to the transformation_matrices list. This list will contain the head pose information for each detected face. If the matrix is not available, it appends None to maintain the list structure.
|
|
456
|
+
if detection_result.facial_transformation_matrixes and idx < len(
|
|
457
|
+
detection_result.facial_transformation_matrixes
|
|
458
|
+
):
|
|
459
|
+
matrix = detection_result.facial_transformation_matrixes[idx]
|
|
460
|
+
transformation_matrices.append(np.array(matrix).reshape(4, 4))
|
|
461
|
+
else:
|
|
462
|
+
transformation_matrices.append(None)
|
|
463
|
+
|
|
464
|
+
return annotated_image, faces, blendshapes, transformation_matrices, bboxes
|
|
465
|
+
|
|
466
|
+
def distance_between_landmarks(
|
|
467
|
+
self, p1, p2, img=None, draw=True, color=(255, 0, 255), thickness=3, radius=8
|
|
468
|
+
):
|
|
469
|
+
"""
|
|
470
|
+
Find the Euclidean distance between two landmarks and optionally draw on the image.
|
|
471
|
+
|
|
472
|
+
Args:
|
|
473
|
+
p1 (tuple[int, int]): (x, y) coordinates of the first point
|
|
474
|
+
p2 (tuple[int, int]): (x, y) coordinates of the second point
|
|
475
|
+
img (numpy.ndarray, optional): Image on which to draw. If None, no drawing
|
|
476
|
+
draw (bool): Whether to draw the points and line on the image.
|
|
477
|
+
color (tuple): Color for drawing (BGR). Default is magenta (255, 0, 255).
|
|
478
|
+
thickness (int): Thickness of the line. Default is 3.
|
|
479
|
+
radius (int): Radius of the circles at the points. Default is 8.
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
float: The Euclidean distance between the two points.
|
|
483
|
+
tuple: (x1, y1, x2, y2, cx, cy) coordinates of the two points and their center.
|
|
484
|
+
numpy.ndarray (optional): Annotated image if img is provided and draw=True.
|
|
485
|
+
"""
|
|
486
|
+
x1, y1 = p1
|
|
487
|
+
x2, y2 = p2
|
|
488
|
+
|
|
489
|
+
cx = (x1 + x2) // 2
|
|
490
|
+
cy = (y1 + y2) // 2
|
|
491
|
+
|
|
492
|
+
length = self.euclidean_distance((x2, y2), (x1, y1)) # Euclidean distance
|
|
493
|
+
|
|
494
|
+
if img is not None and draw:
|
|
495
|
+
# Draw circles at both points
|
|
496
|
+
cv2.circle(img, (x1, y1), radius, color, cv2.FILLED)
|
|
497
|
+
cv2.circle(img, (x2, y2), radius, color, cv2.FILLED)
|
|
498
|
+
|
|
499
|
+
# Draw connecting line
|
|
500
|
+
cv2.line(img, (x1, y1), (x2, y2), color, thickness)
|
|
501
|
+
|
|
502
|
+
# Draw center point
|
|
503
|
+
cv2.circle(img, (cx, cy), radius // 2, (0, 255, 0), cv2.FILLED)
|
|
504
|
+
|
|
505
|
+
# Optional: Show distance value near the line
|
|
506
|
+
cv2.putText(
|
|
507
|
+
img,
|
|
508
|
+
f"{length:.1f}px",
|
|
509
|
+
(cx + 10, cy - 10),
|
|
510
|
+
cv2.FONT_HERSHEY_SIMPLEX,
|
|
511
|
+
0.6,
|
|
512
|
+
(255, 255, 255),
|
|
513
|
+
2,
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
return length, (x1, y1, x2, y2, cx, cy), img
|
|
517
|
+
|
|
518
|
+
return length, (x1, y1, x2, y2, cx, cy)
|
|
519
|
+
|
|
520
|
+
def get_head_pose_angles(self, matrix):
|
|
521
|
+
"""Extract yaw, pitch, roll (in degrees) from the 4x4 facial transformation matrix.
|
|
522
|
+
|
|
523
|
+
Args:
|
|
524
|
+
matrix (np.ndarray): 4x4 facial transformation matrix.
|
|
525
|
+
|
|
526
|
+
Returns:
|
|
527
|
+
tuple: Yaw, pitch, and roll angles in degrees.
|
|
528
|
+
"""
|
|
529
|
+
if matrix is None:
|
|
530
|
+
return 0.0, 0.0, 0.0
|
|
531
|
+
|
|
532
|
+
# Rotation part of the matrix
|
|
533
|
+
R = matrix[:3, :3]
|
|
534
|
+
|
|
535
|
+
# Yaw (left/right head turn)
|
|
536
|
+
yaw = np.arctan2(R[1, 0], R[0, 0]) * 180 / np.pi
|
|
537
|
+
|
|
538
|
+
# Pitch (up/down head tilt)
|
|
539
|
+
pitch = np.arctan2(-R[2, 0], np.sqrt(R[2, 1] ** 2 + R[2, 2] ** 2)) * 180 / np.pi
|
|
540
|
+
|
|
541
|
+
# Roll (head tilt left/right)
|
|
542
|
+
roll = np.arctan2(R[2, 1], R[2, 2]) * 180 / np.pi
|
|
543
|
+
|
|
544
|
+
return yaw, pitch, roll
|
|
545
|
+
|
|
546
|
+
def get_emotion(self, blend):
|
|
547
|
+
"""
|
|
548
|
+
Simple rule-based emotion detection using blendshapes (very fast).
|
|
549
|
+
|
|
550
|
+
Args:
|
|
551
|
+
blend (dict): Dictionary of blendshape coefficients.
|
|
552
|
+
|
|
553
|
+
Returns:
|
|
554
|
+
str: Detected emotion as a string.
|
|
555
|
+
"""
|
|
556
|
+
if not blend:
|
|
557
|
+
return "Unknown"
|
|
558
|
+
|
|
559
|
+
smile = blend.get("mouthSmileLeft", 0) + blend.get("mouthSmileRight", 0)
|
|
560
|
+
frown = blend.get("mouthFrownLeft", 0) + blend.get("mouthFrownRight", 0)
|
|
561
|
+
brow_up = (
|
|
562
|
+
blend.get("browInnerUp", 0)
|
|
563
|
+
+ blend.get("browOuterUpLeft", 0)
|
|
564
|
+
+ blend.get("browOuterUpRight", 0)
|
|
565
|
+
)
|
|
566
|
+
eye_blink = blend.get("eyeBlinkLeft", 0) + blend.get("eyeBlinkRight", 0)
|
|
567
|
+
mouth_open = (
|
|
568
|
+
blend.get("mouthLowerDownLeft", 0)
|
|
569
|
+
+ blend.get("mouthLowerDownRight", 0)
|
|
570
|
+
+ blend.get("jawOpen", 0)
|
|
571
|
+
)
|
|
572
|
+
|
|
573
|
+
if smile > 0.45 and eye_blink < 0.4:
|
|
574
|
+
return "😊 Happy"
|
|
575
|
+
elif frown > 0.4:
|
|
576
|
+
return "😠 Angry"
|
|
577
|
+
elif eye_blink > 0.75:
|
|
578
|
+
return "😲 Surprised"
|
|
579
|
+
elif mouth_open > 0.45:
|
|
580
|
+
return "😮 Shocked"
|
|
581
|
+
elif brow_up > 0.5:
|
|
582
|
+
return "🤨 Confused"
|
|
583
|
+
else:
|
|
584
|
+
return "😐 Neutral"
|
|
585
|
+
|
|
586
|
+
# ─────────────────────────── NEW METHODS ───────────────────────────
|
|
587
|
+
|
|
588
|
+
def get_eye_aspect_ratio(self, face, eye="left"):
|
|
589
|
+
"""Compute the Eye Aspect Ratio (EAR) — the standard blink-detection metric.
|
|
590
|
+
EAR = (vertical_dist_1 + vertical_dist_2) / (2 * horizontal_dist).
|
|
591
|
+
EAR drops sharply when the eye closes.
|
|
592
|
+
|
|
593
|
+
Args:
|
|
594
|
+
face: List of 478 [x, y] pixel coordinates for one detected face.
|
|
595
|
+
eye: 'left' or 'right'.
|
|
596
|
+
Returns:
|
|
597
|
+
float: EAR value. Typical open-eye range 0.25–0.35; blink < 0.22.
|
|
598
|
+
"""
|
|
599
|
+
if len(face) < 478:
|
|
600
|
+
return 0.0
|
|
601
|
+
pts = self.LEFT_EYE_BLINK if eye == "left" else self.RIGHT_EYE_BLINK
|
|
602
|
+
p1, p2, p3, p4, p5, p6 = (face[i] for i in pts)
|
|
603
|
+
vertical_1 = self.euclidean_distance(p2, p6)
|
|
604
|
+
vertical_2 = self.euclidean_distance(p3, p5)
|
|
605
|
+
horizontal = self.euclidean_distance(p1, p4)
|
|
606
|
+
if horizontal == 0:
|
|
607
|
+
return 0.0
|
|
608
|
+
return (vertical_1 + vertical_2) / (2.0 * horizontal)
|
|
609
|
+
|
|
610
|
+
def is_blinking(self, face, eye="left", ear_threshold=0.22):
|
|
611
|
+
"""Return True if the specified eye is currently closed (blinking).
|
|
612
|
+
|
|
613
|
+
Args:
|
|
614
|
+
face: List of 478 [x, y] pixel coordinates.
|
|
615
|
+
eye: 'left' or 'right'.
|
|
616
|
+
ear_threshold: EAR below which the eye is considered closed.
|
|
617
|
+
Returns:
|
|
618
|
+
bool
|
|
619
|
+
"""
|
|
620
|
+
return self.get_eye_aspect_ratio(face, eye) < ear_threshold
|
|
621
|
+
|
|
622
|
+
def is_mouth_open(self, face, ratio_threshold=0.15):
|
|
623
|
+
"""Return True if the mouth is open past the given ratio threshold.
|
|
624
|
+
|
|
625
|
+
Args:
|
|
626
|
+
face: List of 478 [x, y] pixel coordinates.
|
|
627
|
+
ratio_threshold: Mouth height/width ratio; 0 = closed, 0.5+ = wide open.
|
|
628
|
+
Returns:
|
|
629
|
+
bool
|
|
630
|
+
"""
|
|
631
|
+
return self.get_mouth_openness_ratio(face) > ratio_threshold
|
|
632
|
+
|
|
633
|
+
def get_forehead_center(self, face):
|
|
634
|
+
"""Return pixel position of forehead center (landmark 10).
|
|
635
|
+
Useful for placing AR elements (crowns, hats) above the head.
|
|
636
|
+
|
|
637
|
+
Args:
|
|
638
|
+
face: List of 478 [x, y] pixel coordinates.
|
|
639
|
+
Returns:
|
|
640
|
+
tuple(int, int) or None
|
|
641
|
+
"""
|
|
642
|
+
if len(face) <= self.FOREHEAD_CENTER:
|
|
643
|
+
return None
|
|
644
|
+
return tuple(face[self.FOREHEAD_CENTER])
|
|
645
|
+
|
|
646
|
+
def get_face_width(self, face):
|
|
647
|
+
"""Return cheek-to-cheek pixel distance — a stable proxy for face size.
|
|
648
|
+
Useful for camera-to-face distance estimation.
|
|
649
|
+
|
|
650
|
+
Args:
|
|
651
|
+
face: List of 478 [x, y] pixel coordinates.
|
|
652
|
+
Returns:
|
|
653
|
+
float: pixel distance between LEFT_CHEEK and RIGHT_CHEEK landmarks.
|
|
654
|
+
"""
|
|
655
|
+
if len(face) < 478:
|
|
656
|
+
return 0.0
|
|
657
|
+
return self.euclidean_distance(face[self.LEFT_CHEEK], face[self.RIGHT_CHEEK])
|
|
658
|
+
|
|
659
|
+
def draw_head_axes(self, image, matrix, origin=None, scale=60):
|
|
660
|
+
"""Draw 3-axis orientation arrows from the facial transformation matrix.
|
|
661
|
+
X=red (yaw), Y=green (pitch), Z=blue (roll).
|
|
662
|
+
|
|
663
|
+
Args:
|
|
664
|
+
image: BGR numpy array to annotate.
|
|
665
|
+
matrix: 4×4 numpy head-pose matrix from face_mesh_detection().
|
|
666
|
+
origin: (x, y) pixel anchor for the axes. Defaults to image center.
|
|
667
|
+
scale: Arrow length in pixels.
|
|
668
|
+
Returns:
|
|
669
|
+
Annotated BGR numpy array.
|
|
670
|
+
"""
|
|
671
|
+
if matrix is None:
|
|
672
|
+
return image
|
|
673
|
+
out = image.copy()
|
|
674
|
+
h, w = out.shape[:2]
|
|
675
|
+
if origin is None:
|
|
676
|
+
origin = (w // 2, h // 2)
|
|
677
|
+
R = matrix[:3, :3]
|
|
678
|
+
axes_3d = np.float32([[scale, 0, 0], [0, scale, 0], [0, 0, scale]])
|
|
679
|
+
colors = [(0, 0, 255), (0, 255, 0), (255, 0, 0)]
|
|
680
|
+
labels = ("X", "Y", "Z")
|
|
681
|
+
for axis, color, label in zip(axes_3d, colors, labels, strict=False):
|
|
682
|
+
rotated = R @ axis
|
|
683
|
+
end = (int(origin[0] + rotated[0]), int(origin[1] - rotated[1]))
|
|
684
|
+
cv2.arrowedLine(out, origin, end, color, 2, tipLength=0.3)
|
|
685
|
+
cv2.putText(
|
|
686
|
+
out, label, end, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA
|
|
687
|
+
)
|
|
688
|
+
return out
|
|
689
|
+
|
|
690
|
+
def count_faces(self, faces):
|
|
691
|
+
"""Return how many faces were detected.
|
|
692
|
+
|
|
693
|
+
Args:
|
|
694
|
+
faces: The faces list returned by face_mesh_detection().
|
|
695
|
+
Returns:
|
|
696
|
+
int
|
|
697
|
+
"""
|
|
698
|
+
return len(faces)
|
|
699
|
+
|
|
700
|
+
def get_all_emotions(self, blendshapes):
|
|
701
|
+
"""Return emotion label for every detected face in one call.
|
|
702
|
+
|
|
703
|
+
Args:
|
|
704
|
+
blendshapes: The blendshapes list from face_mesh_detection().
|
|
705
|
+
Returns:
|
|
706
|
+
List[str]: One emotion string per face.
|
|
707
|
+
"""
|
|
708
|
+
return [self.get_emotion(b) for b in blendshapes]
|
|
709
|
+
|
|
710
|
+
def get_all_gaze_directions(self, faces):
|
|
711
|
+
"""Return left-eye gaze direction for every detected face.
|
|
712
|
+
|
|
713
|
+
Args:
|
|
714
|
+
faces: The faces list from face_mesh_detection().
|
|
715
|
+
Returns:
|
|
716
|
+
List[str]: 'Left'|'Center'|'Right' per face.
|
|
717
|
+
"""
|
|
718
|
+
return [self.get_eye_gaze_direction(f, is_left_eye=True) for f in faces]
|
|
719
|
+
|
|
720
|
+
def get_nose_tip(self, face):
|
|
721
|
+
"""Return pixel coordinates of the nose tip (landmark 1).
|
|
722
|
+
Commonly used as a face anchor point for AR placement.
|
|
723
|
+
|
|
724
|
+
Args:
|
|
725
|
+
face: List of 478 [x, y] pixel coordinates.
|
|
726
|
+
Returns:
|
|
727
|
+
tuple(int, int) or None
|
|
728
|
+
"""
|
|
729
|
+
if len(face) <= self.NOSE_TIP:
|
|
730
|
+
return None
|
|
731
|
+
return tuple(face[self.NOSE_TIP])
|
|
732
|
+
|
|
733
|
+
def is_looking_at_camera(self, face, gaze_tolerance=0.18):
|
|
734
|
+
"""Return True if both eyes are gazing roughly toward the camera (center gaze).
|
|
735
|
+
|
|
736
|
+
Args:
|
|
737
|
+
face: List of 478 [x, y] pixel coordinates.
|
|
738
|
+
gaze_tolerance: Iris offset ratio below which gaze is 'Center'.
|
|
739
|
+
Returns:
|
|
740
|
+
bool
|
|
741
|
+
"""
|
|
742
|
+
left_gaze = self.get_eye_gaze_direction(face, is_left_eye=True)
|
|
743
|
+
right_gaze = self.get_eye_gaze_direction(face, is_left_eye=False)
|
|
744
|
+
return left_gaze == "Center" and right_gaze == "Center"
|
|
745
|
+
|
|
746
|
+
# ─────────────────────────── EXPRESSION DETECTION (Task 3) ───────────────────────────
|
|
747
|
+
|
|
748
|
+
def is_smiling(self, blend, threshold=0.4):
|
|
749
|
+
"""Return True if the average mouth-smile blendshape score exceeds the threshold.
|
|
750
|
+
|
|
751
|
+
Args:
|
|
752
|
+
blend: Dict of blendshape coefficients from face_mesh_detection().
|
|
753
|
+
threshold: Average of mouthSmileLeft + mouthSmileRight above which smiling is detected.
|
|
754
|
+
Returns:
|
|
755
|
+
bool
|
|
756
|
+
"""
|
|
757
|
+
left = blend.get("mouthSmileLeft", 0.0)
|
|
758
|
+
right = blend.get("mouthSmileRight", 0.0)
|
|
759
|
+
return (left + right) / 2.0 > threshold
|
|
760
|
+
|
|
761
|
+
def is_yawning(self, face, ratio_threshold=0.5):
|
|
762
|
+
"""Return True if the mouth openness ratio exceeds the yawn threshold.
|
|
763
|
+
|
|
764
|
+
Args:
|
|
765
|
+
face: List of 478 [x, y] pixel coordinates.
|
|
766
|
+
ratio_threshold: Mouth height/width ratio above which a yawn is detected.
|
|
767
|
+
Returns:
|
|
768
|
+
bool
|
|
769
|
+
"""
|
|
770
|
+
return self.get_mouth_openness_ratio(face) > ratio_threshold
|
|
771
|
+
|
|
772
|
+
def is_surprised(self, blend, face, brow_threshold=0.3, mouth_threshold=0.3):
|
|
773
|
+
"""Return True if both eyebrows are raised and mouth is open (surprise heuristic).
|
|
774
|
+
|
|
775
|
+
Args:
|
|
776
|
+
blend: Dict of blendshape coefficients.
|
|
777
|
+
face: List of 478 [x, y] pixel coordinates.
|
|
778
|
+
brow_threshold: browInnerUp score above which brows are considered raised.
|
|
779
|
+
mouth_threshold: Mouth openness ratio above which mouth is considered open.
|
|
780
|
+
Returns:
|
|
781
|
+
bool
|
|
782
|
+
"""
|
|
783
|
+
return (
|
|
784
|
+
self.get_eyebrow_raise(blend) > brow_threshold
|
|
785
|
+
and self.get_mouth_openness_ratio(face) > mouth_threshold
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
def get_eyebrow_raise(self, blend):
|
|
789
|
+
"""Return the browInnerUp blendshape score (0–1) as a proxy for eyebrow raise.
|
|
790
|
+
|
|
791
|
+
Args:
|
|
792
|
+
blend: Dict of blendshape coefficients.
|
|
793
|
+
Returns:
|
|
794
|
+
float: browInnerUp score; 0 = neutral, 1 = fully raised.
|
|
795
|
+
"""
|
|
796
|
+
return float(blend.get("browInnerUp", 0.0))
|
|
797
|
+
|
|
798
|
+
def is_eyes_closed(self, face, ear_threshold=0.22):
|
|
799
|
+
"""Return True if both eyes are closed (EAR below threshold for both).
|
|
800
|
+
|
|
801
|
+
Args:
|
|
802
|
+
face: List of 478 [x, y] pixel coordinates.
|
|
803
|
+
ear_threshold: EAR below which an eye is considered closed.
|
|
804
|
+
Returns:
|
|
805
|
+
bool
|
|
806
|
+
"""
|
|
807
|
+
left_ear = self.get_eye_aspect_ratio(face, eye="left")
|
|
808
|
+
right_ear = self.get_eye_aspect_ratio(face, eye="right")
|
|
809
|
+
return left_ear < ear_threshold and right_ear < ear_threshold
|
|
810
|
+
|
|
811
|
+
def is_drowsy(self, face, ear_threshold=0.22):
|
|
812
|
+
"""Return True if both eyes are closed, indicating potential drowsiness.
|
|
813
|
+
Delegates to is_eyes_closed with the same threshold.
|
|
814
|
+
|
|
815
|
+
Args:
|
|
816
|
+
face: List of 478 [x, y] pixel coordinates.
|
|
817
|
+
ear_threshold: EAR below which an eye is considered closed.
|
|
818
|
+
Returns:
|
|
819
|
+
bool
|
|
820
|
+
"""
|
|
821
|
+
return self.is_eyes_closed(face, ear_threshold=ear_threshold)
|
|
822
|
+
|
|
823
|
+
# ─────────────────────────── GEOMETRY & COMPOSITE (Task 4) ───────────────────────────
|
|
824
|
+
|
|
825
|
+
def get_face_bounding_box(self, face):
|
|
826
|
+
"""Return axis-aligned bounding box for the face as (x, y, w, h).
|
|
827
|
+
|
|
828
|
+
Args:
|
|
829
|
+
face: List of [x, y] pixel coordinates (any number of landmarks).
|
|
830
|
+
Returns:
|
|
831
|
+
tuple(int, int, int, int): (x, y, width, height) where (x, y) is the top-left corner.
|
|
832
|
+
"""
|
|
833
|
+
xs = [p[0] for p in face]
|
|
834
|
+
ys = [p[1] for p in face]
|
|
835
|
+
x = int(min(xs))
|
|
836
|
+
y = int(min(ys))
|
|
837
|
+
w = int(max(xs)) - x
|
|
838
|
+
h = int(max(ys)) - y
|
|
839
|
+
return (x, y, w, h)
|
|
840
|
+
|
|
841
|
+
def get_face_symmetry_score(self, face):
|
|
842
|
+
"""Estimate facial symmetry (0–1) by mirroring landmark pairs across the vertical midline.
|
|
843
|
+
1.0 = perfectly symmetric, 0.0 = highly asymmetric.
|
|
844
|
+
|
|
845
|
+
Args:
|
|
846
|
+
face: List of 478 [x, y] pixel coordinates.
|
|
847
|
+
Returns:
|
|
848
|
+
float: Symmetry score in [0, 1].
|
|
849
|
+
"""
|
|
850
|
+
if not face:
|
|
851
|
+
return 0.0
|
|
852
|
+
xs = [p[0] for p in face]
|
|
853
|
+
ys = [p[1] for p in face]
|
|
854
|
+
cx = sum(xs) / len(xs)
|
|
855
|
+
y_range = max(max(ys) - min(ys), 1)
|
|
856
|
+
diffs = []
|
|
857
|
+
for l_idx, r_idx in self._SYMMETRY_PAIRS:
|
|
858
|
+
if l_idx < len(face) and r_idx < len(face):
|
|
859
|
+
lx, ly = face[l_idx]
|
|
860
|
+
rx, ry = face[r_idx]
|
|
861
|
+
mirrored_lx = 2 * cx - lx
|
|
862
|
+
dx = abs(mirrored_lx - rx) / max(cx, 1)
|
|
863
|
+
dy = abs(ly - ry) / y_range
|
|
864
|
+
diffs.append((dx + dy) / 2)
|
|
865
|
+
if not diffs:
|
|
866
|
+
return 0.0
|
|
867
|
+
return float(max(0.0, 1.0 - sum(diffs) / len(diffs)))
|
|
868
|
+
|
|
869
|
+
def draw_face_oval(self, image, face):
|
|
870
|
+
"""Draw a green ellipse around the face bounding box on a copy of the image.
|
|
871
|
+
|
|
872
|
+
Args:
|
|
873
|
+
image: BGR numpy array.
|
|
874
|
+
face: List of [x, y] pixel coordinates.
|
|
875
|
+
Returns:
|
|
876
|
+
Annotated BGR numpy array (copy; original is not modified).
|
|
877
|
+
"""
|
|
878
|
+
out = image.copy()
|
|
879
|
+
x, y, w, h = self.get_face_bounding_box(face)
|
|
880
|
+
cx, cy = x + w // 2, y + h // 2
|
|
881
|
+
cv2.ellipse(
|
|
882
|
+
out, (cx, cy), (max(1, w // 2), max(1, h // 2)), 0, 0, 360, (0, 255, 0), 2
|
|
883
|
+
)
|
|
884
|
+
return out
|
|
885
|
+
|
|
886
|
+
def get_attention_level(self, face, blend):
|
|
887
|
+
"""Composite attention score (0–1) based on gaze direction and eye openness.
|
|
888
|
+
Full gaze toward camera = 1.0; looking away = 0.3; eye-closure penalty = -0.5.
|
|
889
|
+
|
|
890
|
+
Args:
|
|
891
|
+
face: List of 478 [x, y] pixel coordinates.
|
|
892
|
+
blend: Dict of blendshape coefficients (reserved for future extension).
|
|
893
|
+
Returns:
|
|
894
|
+
float: Attention score clamped to [0, 1].
|
|
895
|
+
"""
|
|
896
|
+
looking = self.is_looking_at_camera(face)
|
|
897
|
+
gaze_score = 1.0 if looking else 0.3
|
|
898
|
+
eye_penalty = 0.5 if self.is_eyes_closed(face) else 0.0
|
|
899
|
+
return float(max(0.0, gaze_score - eye_penalty))
|
|
900
|
+
|
|
901
|
+
def get_lip_separation(self, face):
|
|
902
|
+
"""Return pixel distance between the upper and lower lip center landmarks.
|
|
903
|
+
|
|
904
|
+
Args:
|
|
905
|
+
face: List of 478 [x, y] pixel coordinates.
|
|
906
|
+
Returns:
|
|
907
|
+
float: Lip separation in pixels; 0.0 if face has fewer than 15 landmarks.
|
|
908
|
+
"""
|
|
909
|
+
if len(face) < 15:
|
|
910
|
+
return 0.0
|
|
911
|
+
upper = face[self.UPPER_LIP_CENTER]
|
|
912
|
+
lower = face[self.LOWER_LIP_CENTER]
|
|
913
|
+
return float(self.euclidean_distance(upper, lower))
|