openvisionkit 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,919 @@
1
+ import math
2
+ import time
3
+ from pathlib import Path
4
+
5
+ import cv2
6
+ import mediapipe as mp
7
+ import numpy as np
8
+ from mediapipe.tasks import python
9
+ from mediapipe.tasks.python import vision
10
+ from mediapipe.tasks.python.vision.core.vision_task_running_mode import (
11
+ VisionTaskRunningMode,
12
+ )
13
+ from mediapipe.tasks.python.vision.pose_landmarker import PoseLandmarkerResult
14
+
15
+ _MODEL_DIR = Path(__file__).parent / "models"
16
+ _DEFAULT_MODEL = str(_MODEL_DIR / "pose_landmarker.task")
17
+
18
+
19
+ class PoseDetector:
20
+ def __init__(
21
+ self,
22
+ model_path: str = _DEFAULT_MODEL,
23
+ running_mode: VisionTaskRunningMode = vision.RunningMode.VIDEO,
24
+ num_poses: int = 1,
25
+ min_pose_detection_confidence: float = 0.5,
26
+ min_pose_presence_confidence: float = 0.5,
27
+ min_tracking_confidence: float = 0.5,
28
+ output_segmentation_masks: bool = False,
29
+ ):
30
+ """
31
+ Args:
32
+ model_path: Path to the .task model file
33
+ running_mode: IMAGE (static) or VIDEO (real-time stream - recommended)
34
+ num_poses: Maximum number of people to detect (1 is fastest)
35
+ min_pose_detection_confidence: Detection threshold
36
+ min_pose_presence_confidence: Pose presence threshold
37
+ min_tracking_confidence: Tracking threshold (only used in VIDEO mode)
38
+ output_segmentation_masks: Enable body segmentation (person vs background)
39
+
40
+ Note: For real-time applications, use VIDEO mode with num_poses=1 for best performance.
41
+ """
42
+ base_options = python.BaseOptions(model_asset_path=model_path)
43
+
44
+ options = vision.PoseLandmarkerOptions(
45
+ base_options=base_options,
46
+ running_mode=running_mode,
47
+ num_poses=num_poses,
48
+ min_pose_detection_confidence=min_pose_detection_confidence,
49
+ min_pose_presence_confidence=min_pose_presence_confidence,
50
+ min_tracking_confidence=min_tracking_confidence,
51
+ output_segmentation_masks=output_segmentation_masks,
52
+ )
53
+ self.running_mode = running_mode
54
+ self.pose_detector = vision.PoseLandmarker.create_from_options(options)
55
+ self.mp_drawing_utils = mp.tasks.vision.drawing_utils
56
+ self.mp_drawing_styles = mp.tasks.vision.drawing_styles
57
+ self.frame_count = 0 # Used for automatic timestamp in VIDEO mode
58
+
59
+ # WORKOUT COUNTER STATE
60
+ self.rep_count = 0
61
+ self.stage = "up" # "up" = arm extended (0%), "down" = curled (100%)
62
+ self.min_angle = 160.0 # Start assuming arm is mostly straight
63
+ self.max_angle = 30.0 # Expected minimum when fully curled
64
+ self.angle = 0.0 # current angle at the joint being monitored
65
+
66
+ self.rep_min_threshold = 40 # must go below this angle
67
+ self.rep_max_threshold = 140 # must extend above this
68
+ self.rep_start_time = None
69
+ self.min_rep_time = 0.5 # seconds (too fast = cheat)
70
+ self.session_start = time.time()
71
+ self.rep_times = []
72
+
73
+ def detect(
74
+ self,
75
+ img: cv2.typing.MatLike,
76
+ draw_landmarks: bool = True,
77
+ timestamp_ms: int | None = None,
78
+ to_draw_landmarks: bool = True,
79
+ ) -> tuple[cv2.typing.MatLike, PoseLandmarkerResult]:
80
+ """
81
+ Main method - detects pose and returns:
82
+ (annotated_image, full_detection_result)
83
+
84
+ Use this directly in your video_capture_template custom_logic.
85
+
86
+ For VIDEO mode: timestamp is auto-managed if not provided.
87
+
88
+ Args:
89
+ img: The input image (BGR format as read by OpenCV) to process for pose detection.
90
+ draw_landmarks: Whether to draw the detected pose landmarks on the image.
91
+ timestamp_ms: Optional timestamp in milliseconds for VIDEO mode (if not provided, it will be auto-calculated to ensure smooth tracking).
92
+ to_draw_landmarks: Whether to draw circles at the landmark positions on the image (for visualization).
93
+
94
+ Returns:
95
+ A tuple containing the annotated image (with landmarks drawn if enabled) and the full detection result from the pose detector, which includes landmark positions, visibility, and other relevant information.
96
+ """
97
+ # BGR → RGB (MediaPipe expects RGB)
98
+ rgb_frame = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
99
+ mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)
100
+
101
+ # Choose correct detection method based on running mode
102
+ if self.running_mode == vision.RunningMode.IMAGE:
103
+ detection_result = self.pose_detector.detect(mp_image)
104
+ else: # VIDEO mode
105
+ if timestamp_ms is None:
106
+ self.frame_count += 1
107
+ timestamp_ms = self.frame_count * 33 # ~30 FPS smooth tracking
108
+ detection_result = self.pose_detector.detect_for_video(
109
+ mp_image, timestamp_ms
110
+ )
111
+
112
+ annotated_image = img.copy()
113
+ # Draw landmarks with beautiful styles (color + thickness + visibility)
114
+ if draw_landmarks and detection_result.pose_landmarks and to_draw_landmarks:
115
+ for landmarks in detection_result.pose_landmarks:
116
+ # Landmark: x=0.7140882015228271, y=0.25891464948654175, z=-0.19056275486946106, visibility=0.999813973903656
117
+ # print("Landmark:", landmarks[0]) # Print the first landmark of each detected pose for debugging
118
+ self.mp_drawing_utils.draw_landmarks(
119
+ annotated_image,
120
+ landmarks,
121
+ vision.PoseLandmarksConnections.POSE_LANDMARKS,
122
+ )
123
+
124
+ return annotated_image, detection_result
125
+
126
+ def get_all_postion(
127
+ self,
128
+ img: cv2.typing.MatLike,
129
+ detection_result: PoseLandmarkerResult,
130
+ to_draw_landmarks: bool = True,
131
+ ) -> list | None:
132
+ """
133
+ Get list of all landmark positions for a specific pose (default is first detected pose).
134
+
135
+ Args:
136
+ img: The original image (used for dimensions)
137
+ detection_result: The full detection result from the process() method
138
+ to_draw_landmarks: Whether to draw circles at the landmark positions on the image (for visualization)
139
+
140
+ Returns:
141
+ A list of landmarks with their pixel coordinates and visibility, or None if no landmarks are detected.
142
+ """
143
+ self.list_of_landmarks = []
144
+ if detection_result:
145
+ h, w, _ = img.shape
146
+ for _id, landmarks in enumerate(detection_result.pose_landmarks):
147
+ for idx, landmark in enumerate(landmarks):
148
+ cx, cy, cz = int(landmark.x * w), int(landmark.y * h), landmark.z
149
+ self.list_of_landmarks.append(
150
+ {
151
+ "id": idx,
152
+ "x": cx,
153
+ "y": cy,
154
+ "z": cz,
155
+ "center": (cx, cy),
156
+ "visibility": landmark.visibility,
157
+ "presence": landmark.presence,
158
+ "name": landmark.name,
159
+ }
160
+ )
161
+ if to_draw_landmarks:
162
+ cv2.circle(img, (cx, cy), 5, (0, 255, 0), cv2.FILLED)
163
+ return self.list_of_landmarks
164
+
165
+ def get_landmark(
166
+ self,
167
+ detection_result: PoseLandmarkerResult,
168
+ pose_index: int = 0,
169
+ landmark_id: int = 0,
170
+ ) -> dict | None:
171
+ """
172
+ Get specific landmark position for a specific pose (default is first detected pose).
173
+
174
+ Args:
175
+ detection_result: The full detection result from the process() method
176
+ pose_index: Index of the detected pose (default is 0 for first detected pose)
177
+ landmark_id: ID of the landmark to retrieve (e.g., 0 for nose, 11 for left shoulder, etc.)
178
+
179
+ Returns:
180
+ A dictionary with the landmark's pixel coordinates, visibility, presence, and name, or None if the landmark is not detected.
181
+ """
182
+ if (
183
+ detection_result.pose_landmarks
184
+ and len(detection_result.pose_landmarks) > pose_index
185
+ ):
186
+ landmark = detection_result.pose_landmarks[pose_index][landmark_id]
187
+ return {
188
+ "x": landmark.x,
189
+ "y": landmark.y,
190
+ "z": landmark.z,
191
+ "visibility": landmark.visibility,
192
+ "presence": landmark.presence,
193
+ "name": landmark.name,
194
+ }
195
+ return None
196
+
197
+ def get_world_landmark(
198
+ self,
199
+ detection_result: PoseLandmarkerResult,
200
+ pose_index: int = 0,
201
+ landmark_id: int = 0,
202
+ ) -> dict | None:
203
+ """
204
+ Get 3D world coordinates (meters) - very useful for real 3D pose estimation and applications like AR/VR. Note that the world landmark coordinates are in meters with the origin at the center of the hips, and the y-axis pointing upwards.
205
+
206
+ Args:
207
+ detection_result: The full detection result from the process() method
208
+ pose_index: Index of the detected pose (default is 0 for first detected pose)
209
+ landmark_id: ID of the landmark to retrieve (e.g., 0 for nose, 11 for left shoulder, etc.)
210
+
211
+ Returns:
212
+ A dictionary with the landmark's 3D world coordinates (x, y, z in meters) and name, or None if the landmark is not detected.
213
+ """
214
+ if (
215
+ detection_result.pose_world_landmarks
216
+ and len(detection_result.pose_world_landmarks) > pose_index
217
+ ):
218
+ landmark = detection_result.pose_world_landmarks[pose_index][landmark_id]
219
+ return {
220
+ "x": landmark.x,
221
+ "y": landmark.y,
222
+ "z": landmark.z,
223
+ "name": landmark.name,
224
+ }
225
+ return None
226
+
227
+ def calculate_angle(
228
+ self,
229
+ image: cv2.typing.MatLike,
230
+ detection_result: PoseLandmarkerResult,
231
+ p1: int,
232
+ p2: int,
233
+ p3: int,
234
+ pose_index: int = 0,
235
+ to_draw_landmarks: bool = True,
236
+ ) -> tuple[cv2.typing.MatLike, float]:
237
+ """
238
+ Calculate angle (in degrees) at joint p2 formed by points p1-p2-p3.
239
+ Example: Left elbow = calculate_angle(result, 11, 13, 15)
240
+
241
+ Args:
242
+ image: The original image (used for dimensions and optional drawing)
243
+ detection_result: The full detection result from the process() method
244
+ p1, p2, p3: Landmark IDs for the three points to calculate the angle (e.g., for left elbow, p1=11 (left shoulder), p2=13 (left elbow), p3=15 (left wrist))
245
+ pose_index: Index of the detected pose to use (default is 0 for first detected pose)
246
+ to_draw_landmarks: Whether to draw circles at the landmark positions and lines between them on the image (for visualization). If enabled, it will draw the angle being calculated for better understanding of the joint movement.
247
+
248
+ Returns:
249
+ A tuple containing the annotated image (with landmarks and angle drawn if enabled) and the calculated
250
+ """
251
+ if (
252
+ not detection_result.pose_landmarks
253
+ or len(detection_result.pose_landmarks) <= pose_index
254
+ ):
255
+ return image, 0.0
256
+
257
+ lm = detection_result.pose_landmarks[pose_index]
258
+ # Get normalized points → convert to pixel
259
+ h, w = image.shape[:2]
260
+ pts = np.array([[lm[i].x * w, lm[i].y * h] for i in (p1, p2, p3)], dtype=int)
261
+ (x1, y1), (x2, y2), (x3, y3) = pts
262
+
263
+ # Angle calculation
264
+ a, b, c = pts.astype(float)
265
+ ba, bc = a - b, c - b
266
+ self.angle = np.degrees(
267
+ np.arccos(
268
+ np.clip(
269
+ np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6),
270
+ -1,
271
+ 1,
272
+ )
273
+ )
274
+ )
275
+
276
+ if to_draw_landmarks:
277
+ # Lines between the points to visualize the angle being calculated
278
+ cv2.line(image, (x1, y1), (x2, y2), (255, 255, 255), 3)
279
+ cv2.line(image, (x3, y3), (x2, y2), (255, 255, 255), 3)
280
+
281
+ # Draw circles at the landmark positions
282
+ for x, y in [(x1, y1), (x2, y2), (x3, y3)]:
283
+ cv2.circle(image, (x, y), 10, (0, 0, 255), cv2.FILLED)
284
+ cv2.circle(image, (x, y), 15, (0, 0, 255), 2)
285
+
286
+ # Put the angle text near the joint (p2)
287
+ cv2.putText(
288
+ image,
289
+ str(int(self.angle)),
290
+ (x2 - 50, y2 + 50),
291
+ cv2.FONT_HERSHEY_SIMPLEX,
292
+ 1,
293
+ (0, 0, 255),
294
+ 2,
295
+ )
296
+
297
+ return image, self.angle
298
+
299
+ def detect_exercise(self, img, detection_result, to_draw_exercise=True) -> str:
300
+ if not detection_result.pose_landmarks:
301
+ return "No Person"
302
+
303
+ lm = detection_result.pose_landmarks[0]
304
+
305
+ def pt(i):
306
+ return np.array([lm[i].x, lm[i].y])
307
+
308
+ def angle(a, b, c):
309
+ ba, bc = a - b, c - b
310
+ return np.degrees(
311
+ np.arccos(
312
+ np.clip(
313
+ np.dot(ba, bc)
314
+ / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6),
315
+ -1,
316
+ 1,
317
+ )
318
+ )
319
+ )
320
+
321
+ # -------------------------
322
+ # Angles
323
+ # -------------------------
324
+ left_elbow = angle(pt(11), pt(13), pt(15))
325
+ right_elbow = angle(pt(12), pt(14), pt(16))
326
+ elbow = min(left_elbow, right_elbow)
327
+
328
+ left_knee = angle(pt(23), pt(25), pt(27))
329
+ right_knee = angle(pt(24), pt(26), pt(28))
330
+ knee = min(left_knee, right_knee)
331
+
332
+ shoulder_y = (lm[11].y + lm[12].y) / 2
333
+ wrist_y = min(lm[15].y, lm[16].y)
334
+ hip_y = (lm[23].y + lm[24].y) / 2
335
+ knee_y = (lm[25].y + lm[26].y) / 2
336
+
337
+ # -------------------------
338
+ # SCORING SYSTEM
339
+ # -------------------------
340
+ scores = {
341
+ "Bicep Curl": 0,
342
+ "Shoulder Press": 0,
343
+ "Squat": 0,
344
+ "Push-Up": 0,
345
+ "Lunge": 0,
346
+ "Standing": 0,
347
+ }
348
+
349
+ # 🏋️ BICEP CURL
350
+ if 30 < elbow < 140:
351
+ scores["Bicep Curl"] += 2
352
+ if wrist_y > shoulder_y:
353
+ scores["Bicep Curl"] += 1
354
+
355
+ # 🏋️ SHOULDER PRESS
356
+ if wrist_y < shoulder_y:
357
+ scores["Shoulder Press"] += 2
358
+ if elbow > 120:
359
+ scores["Shoulder Press"] += 1
360
+
361
+ # 🏋️ SQUAT
362
+ if knee < 130:
363
+ scores["Squat"] += 2
364
+ if hip_y > knee_y:
365
+ scores["Squat"] += 1
366
+
367
+ # 🏋️ PUSH-UP
368
+ if elbow < 110:
369
+ scores["Push-Up"] += 1
370
+ if abs(shoulder_y - hip_y) < 0.08:
371
+ scores["Push-Up"] += 2
372
+
373
+ # 🏋️ LUNGE
374
+ if abs(lm[25].y - lm[26].y) > 0.1:
375
+ scores["Lunge"] += 2
376
+ if knee < 120:
377
+ scores["Lunge"] += 1
378
+
379
+ # 🧍 STANDING
380
+ if elbow > 150 and knee > 160:
381
+ scores["Standing"] += 3
382
+
383
+ # -------------------------
384
+ # PICK BEST MATCH
385
+ # -------------------------
386
+ exercise = max(scores, key=scores.get)
387
+
388
+ # -------------------------
389
+ # CONFIDENCE FILTER
390
+ # -------------------------
391
+ if scores[exercise] < 2:
392
+ return "Straight Pose"
393
+
394
+ if to_draw_exercise:
395
+ cv2.putText(
396
+ img, exercise, (30, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2
397
+ )
398
+
399
+ return exercise
400
+
401
+ def select_active_arm(self, detection_result: PoseLandmarkerResult):
402
+ # p1, p2, p3 = pose_detector.select_active_arm(result)
403
+ lm = detection_result.pose_landmarks[0]
404
+ left_vis = lm[11].visibility + lm[13].visibility + lm[15].visibility
405
+ right_vis = lm[12].visibility + lm[14].visibility + lm[16].visibility
406
+ if right_vis > left_vis:
407
+ return (12, 14, 16) # right arm
408
+ else:
409
+ return (11, 13, 15) # left arm
410
+
411
+ def get_workout_stats(self, frame, to_draw_landmarks=True):
412
+ duration = time.time() - self.session_start
413
+ avg_time = sum(self.rep_times) / len(self.rep_times) if self.rep_times else 0
414
+ calories = self.rep_count * 0.5
415
+ if to_draw_landmarks:
416
+ cv2.putText(
417
+ frame, f"Reps: {self.rep_count}", (30, 130), 0, 1, (0, 255, 0), 2
418
+ )
419
+ cv2.putText(
420
+ frame, f"Avg: {avg_time:.2f}s", (30, 160), 0, 1, (255, 255, 0), 2
421
+ )
422
+ cv2.putText(
423
+ frame, f"Time: {int(duration)}s", (30, 190), 0, 1, (255, 255, 255), 2
424
+ )
425
+ cv2.putText(
426
+ frame, f"Cal: {calories:.1f}", (30, 220), 0, 1, (0, 165, 255), 2
427
+ )
428
+ return {
429
+ "reps": self.rep_count,
430
+ "avg_time": avg_time,
431
+ "duration": duration,
432
+ "calories": calories,
433
+ }
434
+
435
+ def calculate_workout_percentage(self) -> tuple[float, float, int]:
436
+ angle = self.angle
437
+
438
+ if not hasattr(self, "rep_min"):
439
+ self.rep_min = angle
440
+ self.rep_max = angle
441
+
442
+ # Dynamically update min & max angles for the current rep to adapt to user's range of motion. This allows for more accurate percentage calculation and rep counting based on the user's actual movement, rather than relying on fixed angle thresholds which may not fit everyone.
443
+ self.rep_min = min(self.rep_min, angle)
444
+ self.rep_max = max(self.rep_max, angle)
445
+
446
+ # Avoid division by zero and bad percentage in the first few frames when the angles are still stabilizing
447
+ if abs(self.rep_max - self.rep_min) < 20:
448
+ return angle, 0.0, self.rep_count
449
+
450
+ # Calculate percentage based on current rep's min and max angles. High angle (arm straight) = 0%, Low angle (arm curled) = 100%
451
+ percent = np.interp(angle, [self.rep_min, self.rep_max], [0, 100]) # high → low
452
+ percent = np.clip(percent, 0, 100)
453
+
454
+ # When the use arm is curled down and reaches near the max curl position (percent > 80%), we mark the stage as "down" and start the rep timer. When the arm is extended back up and reaches near the starting position (percent < 20%), we check if the rep was valid based on the range of motion and time taken, print the rep status, and reset for the next rep.
455
+ if percent >= 80 and self.stage == "up":
456
+ self.stage = "down"
457
+ self.rep_start_time = time.time()
458
+ self.rep_count += 1
459
+
460
+ # When the arm is extended back up and reaches near the starting position (percent < 20%), we check if the rep was valid based on the range of motion and time taken, print the rep status, and reset for the next rep.
461
+ if percent <= 20 and self.stage == "down":
462
+ rep_time = time.time() - self.rep_start_time if self.rep_start_time else 1
463
+ self.stage = "up"
464
+ self.rep_min, self.rep_max = angle, angle
465
+ self.rep_times.append(rep_time)
466
+
467
+ # print(f"Angle: {angle:.1f} | RepMin: {self.rep_min:.1f} | RepMax: {self.rep_max:.1f} | %: {percent:.1f} | Reps: {self.rep_count}")
468
+
469
+ return angle, percent, self.rep_count
470
+
471
+ def draw_landmarks_on_image(
472
+ self,
473
+ image: cv2.typing.MatLike,
474
+ detection_result: PoseLandmarkerResult,
475
+ list_of_landmarks: list[int] | None = None,
476
+ ) -> cv2.typing.MatLike:
477
+ """
478
+ Draw pose landmarks on the image with beautiful styles (color + thickness + visibility).
479
+
480
+ Args:
481
+ image: The original image to draw the landmarks on.
482
+ detection_result: The full detection result from the process() method, which should include pose landmarks.
483
+ list_of_landmarks: Optional list of landmark IDs to draw (if None, draws all landmarks). This allows you to selectively draw only certain landmarks if desired.
484
+
485
+ Returns:
486
+ The image with pose landmarks drawn if detected and enabled, otherwise returns the original image.
487
+ """
488
+ annotated_image = image.copy()
489
+ if (
490
+ detection_result.pose_landmarks
491
+ and list_of_landmarks
492
+ and len(list_of_landmarks) > 0
493
+ ):
494
+ for landmark_id in list_of_landmarks:
495
+ landmark = detection_result.pose_landmarks[0][landmark_id]
496
+ print(f"Drawing landmark {landmark_id}: {landmark}")
497
+ cv2.circle(
498
+ annotated_image,
499
+ (
500
+ int(landmark.x * image.shape[1]),
501
+ int(landmark.y * image.shape[0]),
502
+ ),
503
+ 5,
504
+ (0, 255, 0),
505
+ cv2.FILLED,
506
+ )
507
+ return annotated_image
508
+
509
+ def draw_segmentation_mask(
510
+ self,
511
+ image: cv2.typing.MatLike,
512
+ detection_result: PoseLandmarkerResult,
513
+ alpha: float = 0.6,
514
+ color: tuple[int, int, int] = (0, 255, 0),
515
+ ) -> cv2.typing.MatLike:
516
+ """
517
+ Overlay segmentation mask if output_segmentation_masks=True.
518
+
519
+ Args:
520
+ image: The original image to draw the segmentation mask on.
521
+ detection_result: The full detection result from the process() method, which should include segmentation masks if output_segmentation_masks was enabled during initialization.
522
+ alpha: The transparency level of the segmentation mask overlay (0.0 to 1.0, where 0.0 is fully transparent and 1.0 is fully opaque).
523
+ color: The color to use for the segmentation mask overlay in BGR format (default is green). This color will be applied to the areas of the image where the segmentation mask indicates the presence of a person. You can change this color to visualize the segmentation
524
+ differently (e.g., (255, 0, 0) for red, (0, 0, 255) for blue, etc.).
525
+
526
+ Returns:
527
+ The image with the segmentation mask overlay if available, otherwise returns the original image.
528
+ """
529
+ if not detection_result.segmentation_masks:
530
+ return image
531
+
532
+ mask = detection_result.segmentation_masks[0].numpy_view() > 0.5
533
+
534
+ # Fix shape → (H, W, 1)
535
+ if mask.ndim == 2:
536
+ mask = mask[..., None]
537
+
538
+ # Ensure correct dtype
539
+ mask = mask.astype(bool)
540
+
541
+ color_arr = np.array(color, dtype=np.uint8)
542
+
543
+ # Apply overlay
544
+ overlay = np.where(mask, color_arr, image)
545
+
546
+ return cv2.addWeighted(image, 1 - alpha, overlay, alpha, 0)
547
+
548
+ # ─────────────────────────── NEW METHODS ───────────────────────────
549
+
550
+ def reset_workout(self):
551
+ """Reset all rep-counter state so the same instance can be reused for a new session.
552
+ Call between sets, people, or exercise changes.
553
+ """
554
+ self.rep_count = 0
555
+ self.stage = "up"
556
+ self.min_angle = 160.0
557
+ self.max_angle = 30.0
558
+ self.angle = 0.0
559
+ self.rep_start_time = None
560
+ self.session_start = time.time()
561
+ self.rep_times = []
562
+ if hasattr(self, "rep_min"):
563
+ del self.rep_min
564
+ if hasattr(self, "rep_max"):
565
+ del self.rep_max
566
+
567
+ def get_body_center(self, image, detection_result):
568
+ """Return the pixel midpoint of the hips — a stable body anchor.
569
+ Falls back to shoulder midpoint if hips are not visible.
570
+
571
+ Args:
572
+ image: BGR numpy array (used for shape).
573
+ detection_result: PoseLandmarkerResult from detect().
574
+ Returns:
575
+ tuple(int, int) or None if no pose detected.
576
+ """
577
+ if not detection_result.pose_landmarks:
578
+ return None
579
+ h, w = image.shape[:2]
580
+ lm = detection_result.pose_landmarks[0]
581
+ # Hips: left=23, right=24; Shoulders: left=11, right=12
582
+ left_id, right_id = (23, 24) if lm[23].visibility > 0.5 else (11, 12)
583
+ cx = int((lm[left_id].x + lm[right_id].x) / 2 * w)
584
+ cy = int((lm[left_id].y + lm[right_id].y) / 2 * h)
585
+ return (cx, cy)
586
+
587
+ def is_standing(self, detection_result):
588
+ """Return True when both knees are nearly straight (angle > 160°).
589
+
590
+ Args:
591
+ detection_result: PoseLandmarkerResult from detect().
592
+ Returns:
593
+ bool
594
+ """
595
+ if not detection_result.pose_landmarks:
596
+ return False
597
+ lm = detection_result.pose_landmarks[0]
598
+
599
+ def angle(a, b, c):
600
+ ba = np.array([lm[a].x - lm[b].x, lm[a].y - lm[b].y])
601
+ bc = np.array([lm[c].x - lm[b].x, lm[c].y - lm[b].y])
602
+ cos_val = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6)
603
+ return np.degrees(np.arccos(np.clip(cos_val, -1, 1)))
604
+
605
+ left_knee = angle(23, 25, 27)
606
+ right_knee = angle(24, 26, 28)
607
+ return left_knee > 160 and right_knee > 160
608
+
609
+ def is_sitting(self, detection_result):
610
+ """Return True when at least one knee angle is between 70° and 130° (seated position).
611
+
612
+ Args:
613
+ detection_result: PoseLandmarkerResult from detect().
614
+ Returns:
615
+ bool
616
+ """
617
+ if not detection_result.pose_landmarks:
618
+ return False
619
+ lm = detection_result.pose_landmarks[0]
620
+
621
+ def angle(a, b, c):
622
+ ba = np.array([lm[a].x - lm[b].x, lm[a].y - lm[b].y])
623
+ bc = np.array([lm[c].x - lm[b].x, lm[c].y - lm[b].y])
624
+ cos_val = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6)
625
+ return np.degrees(np.arccos(np.clip(cos_val, -1, 1)))
626
+
627
+ left_knee = angle(23, 25, 27)
628
+ right_knee = angle(24, 26, 28)
629
+ return (70 < left_knee < 130) or (70 < right_knee < 130)
630
+
631
+ def get_body_orientation(self, detection_result):
632
+ """Estimate whether the person is facing the camera, showing their side, or back.
633
+ Uses shoulder-width-to-hip-width ratio as the discriminator.
634
+
635
+ Args:
636
+ detection_result: PoseLandmarkerResult from detect().
637
+ Returns:
638
+ str: 'front' | 'side' | 'back' | 'unknown'
639
+ """
640
+ if not detection_result.pose_landmarks:
641
+ return "unknown"
642
+ lm = detection_result.pose_landmarks[0]
643
+ shoulder_width = abs(lm[11].x - lm[12].x)
644
+ hip_width = abs(lm[23].x - lm[24].x)
645
+ ratio = shoulder_width / (hip_width + 1e-6)
646
+ nose_vis = lm[0].visibility
647
+ if nose_vis > 0.7 and ratio > 0.6:
648
+ return "front"
649
+ elif ratio < 0.3:
650
+ return "side"
651
+ elif nose_vis < 0.3:
652
+ return "back"
653
+ return "front"
654
+
655
+ def count_visible_keypoints(self, detection_result, visibility_threshold=0.5):
656
+ """Return how many landmarks are visible above the threshold.
657
+
658
+ Args:
659
+ detection_result: PoseLandmarkerResult from detect().
660
+ visibility_threshold: Minimum visibility score to count a landmark.
661
+ Returns:
662
+ int
663
+ """
664
+ if not detection_result.pose_landmarks:
665
+ return 0
666
+ return sum(
667
+ 1
668
+ for lm in detection_result.pose_landmarks[0]
669
+ if lm.visibility >= visibility_threshold
670
+ )
671
+
672
+ def get_shoulder_angle(self, detection_result):
673
+ """Return the tilt angle (degrees) of the shoulder line relative to horizontal.
674
+ Positive = right shoulder higher, negative = left shoulder higher.
675
+ Useful for posture analysis.
676
+
677
+ Args:
678
+ detection_result: PoseLandmarkerResult from detect().
679
+ Returns:
680
+ float: angle in degrees, or 0.0 if not detected.
681
+ """
682
+ if not detection_result.pose_landmarks:
683
+ return 0.0
684
+ lm = detection_result.pose_landmarks[0]
685
+ dx = lm[12].x - lm[11].x
686
+ dy = lm[12].y - lm[11].y
687
+ return float(np.degrees(np.arctan2(dy, dx)))
688
+
689
+ def get_all_visible_landmarks(
690
+ self, detection_result: PoseLandmarkerResult, visibility_threshold: float = 0.5
691
+ ) -> list[dict]:
692
+ """
693
+ Return list of all landmarks that are clearly visible (visibility above threshold) across all detected poses. This can be useful for filtering out unreliable landmarks in downstream applications.
694
+ A landmark is considered visible if its visibility score is above the specified threshold. The returned list includes the pose index, landmark ID, pixel coordinates, visibility score, and landmark name for each visible landmark.
695
+
696
+ Args:
697
+ detection_result: The full detection result from the process() method
698
+ visibility_threshold: The minimum visibility score for a landmark to be considered visible (default is 0.5)
699
+
700
+ Returns:
701
+ A list of dictionaries, each containing information about a visible landmark (pose index, landmark ID
702
+ """
703
+ visible = []
704
+ if not detection_result.pose_landmarks:
705
+ return visible
706
+
707
+ for pose_idx, landmarks in enumerate(detection_result.pose_landmarks):
708
+ for i, lm in enumerate(landmarks):
709
+ if lm.visibility >= visibility_threshold:
710
+ visible.append(
711
+ {
712
+ "pose": pose_idx,
713
+ "id": i,
714
+ "x": lm.x,
715
+ "y": lm.y,
716
+ "visibility": lm.visibility,
717
+ "name": lm.name,
718
+ }
719
+ )
720
+ return visible
721
+
722
+ # ─────────────────────── POSTURE ANALYSIS METHODS ───────────────────────
723
+
724
+ def get_spine_angle(self, detection_result) -> float:
725
+ """Return the lateral lean of the spine in degrees (angle of shoulder-mid → hip-mid
726
+ vector away from vertical). Near 0° = upright; larger value = leaning.
727
+
728
+ Args:
729
+ detection_result: PoseLandmarkerResult from detect().
730
+ Returns:
731
+ float: angle in degrees, or 0.0 if no pose detected.
732
+ """
733
+ if not detection_result.pose_landmarks:
734
+ return 0.0
735
+ lms = detection_result.pose_landmarks[0]
736
+ shoulder_mid_x = (lms[11].x + lms[12].x) / 2
737
+ shoulder_mid_y = (lms[11].y + lms[12].y) / 2
738
+ hip_mid_x = (lms[23].x + lms[24].x) / 2
739
+ hip_mid_y = (lms[23].y + lms[24].y) / 2
740
+ dx = shoulder_mid_x - hip_mid_x
741
+ dy = shoulder_mid_y - hip_mid_y
742
+ return float(math.degrees(math.atan2(abs(dx), abs(dy) + 1e-6)))
743
+
744
+ def get_torso_tilt(self, detection_result) -> float:
745
+ """Return the tilt of the shoulder line relative to horizontal (degrees).
746
+ Positive = right shoulder lower; negative = left shoulder lower.
747
+
748
+ Args:
749
+ detection_result: PoseLandmarkerResult from detect().
750
+ Returns:
751
+ float: angle in degrees, or 0.0 if no pose detected.
752
+ """
753
+ if not detection_result.pose_landmarks:
754
+ return 0.0
755
+ lms = detection_result.pose_landmarks[0]
756
+ dx = lms[12].x - lms[11].x
757
+ dy = lms[12].y - lms[11].y
758
+ return float(math.degrees(math.atan2(dy, dx + 1e-6)))
759
+
760
+ def is_hunching(self, detection_result, threshold: float = 20) -> bool:
761
+ """Return True when the absolute torso tilt exceeds *threshold* degrees,
762
+ which is a simple proxy for asymmetric shoulder hunch.
763
+
764
+ Args:
765
+ detection_result: PoseLandmarkerResult from detect().
766
+ threshold: Tilt angle (degrees) above which hunching is flagged.
767
+ Returns:
768
+ bool
769
+ """
770
+ return abs(self.get_torso_tilt(detection_result)) > threshold
771
+
772
+ def get_symmetry_score(self, detection_result) -> float:
773
+ """Return a body-symmetry score in [0, 1] where 1.0 = perfect bilateral
774
+ symmetry and 0.0 = highly asymmetric. Compares mirrored left landmarks
775
+ against their right counterparts using visible pairs only.
776
+
777
+ Args:
778
+ detection_result: PoseLandmarkerResult from detect().
779
+ Returns:
780
+ float: symmetry score, or 0.0 if no pose detected.
781
+ """
782
+ if not detection_result.pose_landmarks:
783
+ return 0.0
784
+ lms = detection_result.pose_landmarks[0]
785
+ pairs = [(11, 12), (13, 14), (15, 16), (23, 24), (25, 26), (27, 28)]
786
+ mid_x = (lms[11].x + lms[12].x) / 2
787
+ diffs = []
788
+ for l_idx, r_idx in pairs:
789
+ lm_l, r = lms[l_idx], lms[r_idx]
790
+ if lm_l.visibility < 0.5 or r.visibility < 0.5:
791
+ continue
792
+ mirrored_lx = 2 * mid_x - lm_l.x
793
+ dx = abs(mirrored_lx - r.x)
794
+ dy = abs(lm_l.y - r.y)
795
+ diffs.append((dx + dy) / 2)
796
+ if not diffs:
797
+ return 0.0
798
+ return float(max(0.0, 1.0 - sum(diffs) / len(diffs) * 10))
799
+
800
+ # ──────────────────── ACTION AND SPATIAL DETECTION ──────────────────────
801
+
802
+ def is_arms_raised(self, detection_result, threshold: float = 0.2) -> bool:
803
+ """Return True when both wrists are at least *threshold* above their
804
+ respective shoulders (in normalised y, where smaller y = higher in frame).
805
+
806
+ Args:
807
+ detection_result: PoseLandmarkerResult from detect().
808
+ threshold: Minimum y-distance (normalised) wrists must be above shoulders.
809
+ Returns:
810
+ bool
811
+ """
812
+ if not detection_result.pose_landmarks:
813
+ return False
814
+ lms = detection_result.pose_landmarks[0]
815
+ return (
816
+ lms[11].y - lms[15].y >= threshold - 1e-9
817
+ and lms[12].y - lms[16].y >= threshold - 1e-9
818
+ )
819
+
820
+ def detect_fall(self, detection_result) -> bool:
821
+ """Return True when the nose (head) y-coordinate is below the hip midpoint
822
+ y-coordinate, indicating the person may have fallen.
823
+
824
+ Args:
825
+ detection_result: PoseLandmarkerResult from detect().
826
+ Returns:
827
+ bool
828
+ """
829
+ if not detection_result.pose_landmarks:
830
+ return False
831
+ lms = detection_result.pose_landmarks[0]
832
+ hip_y = (lms[23].y + lms[24].y) / 2
833
+ return lms[0].y > hip_y
834
+
835
+ def is_arms_crossed(self, detection_result) -> bool:
836
+ """Return True when the left wrist is on the right side of the body
837
+ midline and the right wrist is on the left side (arms crossed).
838
+
839
+ Args:
840
+ detection_result: PoseLandmarkerResult from detect().
841
+ Returns:
842
+ bool
843
+ """
844
+ if not detection_result.pose_landmarks:
845
+ return False
846
+ lms = detection_result.pose_landmarks[0]
847
+ mid_x = (lms[11].x + lms[12].x) / 2
848
+ return lms[15].x > mid_x and lms[16].x < mid_x
849
+
850
+ def get_knee_angle(self, detection_result, side: str = "left") -> float:
851
+ """Return the knee flexion angle (degrees) for the given side.
852
+ Uses hip → knee → ankle landmarks. ~180° = straight leg; ~90° = seated.
853
+
854
+ Args:
855
+ detection_result: PoseLandmarkerResult from detect().
856
+ side: "left" or "right".
857
+ Returns:
858
+ float: angle in degrees, or 0.0 if no pose detected.
859
+ """
860
+ if not detection_result.pose_landmarks:
861
+ return 0.0
862
+ lms = detection_result.pose_landmarks[0]
863
+ if side == "left":
864
+ hip, knee, ankle = lms[23], lms[25], lms[27]
865
+ else:
866
+ hip, knee, ankle = lms[24], lms[26], lms[28]
867
+ a = np.array([hip.x, hip.y])
868
+ b = np.array([knee.x, knee.y])
869
+ c = np.array([ankle.x, ankle.y])
870
+ ba, bc = a - b, c - b
871
+ cos_a = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6)
872
+ return float(math.degrees(math.acos(np.clip(cos_a, -1.0, 1.0))))
873
+
874
+ def get_hip_angle(self, detection_result, side: str = "left") -> float:
875
+ """Return the hip flexion angle (degrees) for the given side.
876
+ Uses shoulder → hip → knee landmarks. ~180° = standing upright; smaller = bent.
877
+
878
+ Args:
879
+ detection_result: PoseLandmarkerResult from detect().
880
+ side: "left" or "right".
881
+ Returns:
882
+ float: angle in degrees, or 0.0 if no pose detected.
883
+ """
884
+ if not detection_result.pose_landmarks:
885
+ return 0.0
886
+ lms = detection_result.pose_landmarks[0]
887
+ if side == "left":
888
+ shoulder, hip, knee = lms[11], lms[23], lms[25]
889
+ else:
890
+ shoulder, hip, knee = lms[12], lms[24], lms[26]
891
+ a = np.array([shoulder.x, shoulder.y])
892
+ b = np.array([hip.x, hip.y])
893
+ c = np.array([knee.x, knee.y])
894
+ ba, bc = a - b, c - b
895
+ cos_a = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6)
896
+ return float(math.degrees(math.acos(np.clip(cos_a, -1.0, 1.0))))
897
+
898
+ def get_body_bounding_box(self, detection_result, image) -> tuple:
899
+ """Return a bounding box (x, y, width, height) in pixel coordinates that
900
+ encloses all visible pose landmarks.
901
+
902
+ Args:
903
+ detection_result: PoseLandmarkerResult from detect().
904
+ image: BGR numpy array (used for pixel-space dimensions).
905
+ Returns:
906
+ tuple(int, int, int, int): (x, y, w, h), or (0, 0, 0, 0) if no pose.
907
+ """
908
+ if not detection_result.pose_landmarks:
909
+ return (0, 0, 0, 0)
910
+ lms = detection_result.pose_landmarks[0]
911
+ h, w = image.shape[:2]
912
+ visible = [(lm.x * w, lm.y * h) for lm in lms if lm.visibility > 0.5]
913
+ if not visible:
914
+ return (0, 0, 0, 0)
915
+ xs = [p[0] for p in visible]
916
+ ys = [p[1] for p in visible]
917
+ x1, y1 = int(min(xs)), int(min(ys))
918
+ x2, y2 = int(max(xs)), int(max(ys))
919
+ return (x1, y1, x2 - x1, y2 - y1)