kinemotion 0.66.8__py3-none-any.whl → 0.68.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kinemotion might be problematic. Click here for more details.

kinemotion/core/pose.py CHANGED
@@ -1,44 +1,125 @@
1
- """Pose tracking using MediaPipe Pose."""
1
+ """Pose tracking using MediaPipe Tasks API.
2
+
3
+ The MediaPipe Solutions API was removed in version 0.10.31.
4
+ This module now uses the Tasks API (PoseLandmarker).
5
+
6
+ Key differences from Solution API:
7
+ - Tasks API uses index-based landmark access (0-32) instead of enums
8
+ - Running modes: IMAGE, VIDEO, LIVE_STREAM
9
+ - No smooth_landmarks option (built into VIDEO mode)
10
+ - Has min_pose_presence_confidence parameter (no Solution API equivalent)
11
+
12
+ Configuration strategies for matching Solution API behavior:
13
+ - "video": Standard VIDEO mode with temporal smoothing
14
+ - "video_low_presence": VIDEO mode with lower min_pose_presence_confidence (0.2)
15
+ - "video_very_low_presence": VIDEO mode with very low min_pose_presence_confidence (0.1)
16
+ - "image": IMAGE mode (no temporal smoothing, relies on our smoothing)
17
+ """
18
+
19
+ from __future__ import annotations
2
20
 
3
21
  import cv2
4
22
  import mediapipe as mp
5
23
  import numpy as np
6
24
 
25
+ from .pose_landmarks import KINEMOTION_LANDMARKS, LANDMARK_INDICES
7
26
  from .timing import NULL_TIMER, Timer
8
27
 
28
+ # Running modes
29
+ _RUNNING_MODES = {
30
+ "image": mp.tasks.vision.RunningMode.IMAGE, # type: ignore[attr-defined]
31
+ "video": mp.tasks.vision.RunningMode.VIDEO, # type: ignore[attr-defined]
32
+ }
33
+
34
+ # Strategy configurations
35
+ _STRATEGY_CONFIGS: dict[str, dict[str, float | str]] = {
36
+ "video": {
37
+ "min_pose_presence_confidence": 0.5,
38
+ "running_mode": "video",
39
+ },
40
+ "video_low_presence": {
41
+ "min_pose_presence_confidence": 0.2,
42
+ "running_mode": "video",
43
+ },
44
+ "video_very_low_presence": {
45
+ "min_pose_presence_confidence": 0.1,
46
+ "running_mode": "video",
47
+ },
48
+ "image": {
49
+ "min_pose_presence_confidence": 0.5,
50
+ "running_mode": "image",
51
+ },
52
+ }
53
+
54
+
55
+ class MediaPipePoseTracker:
56
+ """Tracks human pose landmarks in video frames using MediaPipe Tasks API.
9
57
 
10
- class PoseTracker:
11
- """Tracks human pose landmarks in video frames using MediaPipe."""
58
+ Args:
59
+ min_detection_confidence: Minimum confidence for pose detection (0.0-1.0)
60
+ min_tracking_confidence: Minimum confidence for pose tracking (0.0-1.0)
61
+ model_type: Model variant ("lite", "full", "heavy")
62
+ strategy: Configuration strategy ("video", "video_low_presence", "image")
63
+ timer: Optional Timer for measuring operations
64
+
65
+ Note: The Solution API's smooth_landmarks parameter cannot be replicated
66
+ exactly. VIDEO mode has built-in temporal smoothing that cannot be disabled.
67
+ """
12
68
 
13
- def __init__(
69
+ def __init__( # noqa: PLR0913
14
70
  self,
15
71
  min_detection_confidence: float = 0.5,
16
72
  min_tracking_confidence: float = 0.5,
73
+ model_type: str = "lite",
74
+ strategy: str = "video_low_presence",
17
75
  timer: Timer | None = None,
18
76
  ) -> None:
19
- """
20
- Initialize the pose tracker.
21
-
22
- Args:
23
- min_detection_confidence: Minimum confidence for pose detection
24
- min_tracking_confidence: Minimum confidence for pose tracking
25
- timer: Optional Timer for measuring operations
26
- """
77
+ """Initialize the pose tracker."""
27
78
  self.timer = timer or NULL_TIMER
28
- self.mp_pose = mp.solutions.pose # type: ignore[attr-defined]
29
- self.pose = self.mp_pose.Pose(
30
- static_image_mode=False, # Use tracking mode for better performance
31
- min_detection_confidence=min_detection_confidence,
79
+ self.mp_pose = mp.tasks.vision # type: ignore[attr-defined]
80
+ self.model_type = model_type
81
+ self.strategy = strategy
82
+
83
+ # Get strategy configuration
84
+ config = _STRATEGY_CONFIGS.get(strategy, _STRATEGY_CONFIGS["video_low_presence"])
85
+ min_pose_presence = config["min_pose_presence_confidence"]
86
+ running_mode_name = str(config["running_mode"])
87
+ running_mode = _RUNNING_MODES[running_mode_name]
88
+
89
+ # Get model path
90
+ from .model_downloader import get_model_path
91
+
92
+ model_path = str(get_model_path(model_type))
93
+
94
+ # Create base options
95
+ base_options = mp.tasks.BaseOptions(model_asset_path=model_path) # type: ignore[attr-defined]
96
+
97
+ # Create pose landmarker options
98
+ options = mp.tasks.vision.PoseLandmarkerOptions( # type: ignore[attr-defined]
99
+ base_options=base_options,
100
+ running_mode=running_mode,
101
+ min_pose_detection_confidence=min_detection_confidence,
102
+ min_pose_presence_confidence=min_pose_presence,
32
103
  min_tracking_confidence=min_tracking_confidence,
33
- model_complexity=1,
104
+ output_segmentation_masks=False,
34
105
  )
35
106
 
36
- def process_frame(self, frame: np.ndarray) -> dict[str, tuple[float, float, float]] | None:
37
- """
38
- Process a single frame and extract pose landmarks.
107
+ # Create the landmarker
108
+ with self.timer.measure("model_load"):
109
+ self.landmarker = self.mp_pose.PoseLandmarker.create_from_options(options)
110
+
111
+ self.running_mode = running_mode
112
+
113
+ def process_frame(
114
+ self,
115
+ frame: np.ndarray,
116
+ timestamp_ms: int = 0,
117
+ ) -> dict[str, tuple[float, float, float]] | None:
118
+ """Process a single frame and extract pose landmarks.
39
119
 
40
120
  Args:
41
121
  frame: BGR image frame
122
+ timestamp_ms: Frame timestamp in milliseconds (required for VIDEO mode)
42
123
 
43
124
  Returns:
44
125
  Dictionary mapping landmark names to (x, y, visibility) tuples,
@@ -48,44 +129,619 @@ class PoseTracker:
48
129
  with self.timer.measure("frame_conversion"):
49
130
  rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
50
131
 
132
+ # Create MediaPipe Image
133
+ with self.timer.measure("image_creation"):
134
+ mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame) # type: ignore[attr-defined]
135
+
51
136
  # Process the frame
52
137
  with self.timer.measure("mediapipe_inference"):
53
- results = self.pose.process(rgb_frame)
138
+ if self.running_mode == mp.tasks.vision.RunningMode.VIDEO: # type: ignore[attr-defined]
139
+ results = self.landmarker.detect_for_video(mp_image, timestamp_ms)
140
+ else: # IMAGE mode
141
+ results = self.landmarker.detect(mp_image)
54
142
 
55
143
  if not results.pose_landmarks:
56
144
  return None
57
145
 
58
- # Extract key landmarks for feet tracking and CoM estimation
146
+ # Extract landmarks (first pose only)
59
147
  with self.timer.measure("landmark_extraction"):
60
- landmarks = {}
61
- landmark_names = {
62
- # Feet landmarks
63
- self.mp_pose.PoseLandmark.LEFT_ANKLE: "left_ankle",
64
- self.mp_pose.PoseLandmark.RIGHT_ANKLE: "right_ankle",
65
- self.mp_pose.PoseLandmark.LEFT_HEEL: "left_heel",
66
- self.mp_pose.PoseLandmark.RIGHT_HEEL: "right_heel",
67
- self.mp_pose.PoseLandmark.LEFT_FOOT_INDEX: "left_foot_index",
68
- self.mp_pose.PoseLandmark.RIGHT_FOOT_INDEX: "right_foot_index",
69
- # Torso landmarks for CoM estimation
70
- self.mp_pose.PoseLandmark.LEFT_HIP: "left_hip",
71
- self.mp_pose.PoseLandmark.RIGHT_HIP: "right_hip",
72
- self.mp_pose.PoseLandmark.LEFT_SHOULDER: "left_shoulder",
73
- self.mp_pose.PoseLandmark.RIGHT_SHOULDER: "right_shoulder",
74
- # Additional landmarks for better CoM estimation
75
- self.mp_pose.PoseLandmark.NOSE: "nose",
76
- self.mp_pose.PoseLandmark.LEFT_KNEE: "left_knee",
77
- self.mp_pose.PoseLandmark.RIGHT_KNEE: "right_knee",
78
- }
79
-
80
- for landmark_id, name in landmark_names.items():
81
- lm = results.pose_landmarks.landmark[landmark_id]
82
- landmarks[name] = (lm.x, lm.y, lm.visibility)
148
+ landmarks = _extract_landmarks_from_results(results.pose_landmarks[0])
83
149
 
84
150
  return landmarks
85
151
 
86
152
  def close(self) -> None:
87
- """Release resources."""
88
- self.pose.close()
153
+ """Release resources.
154
+
155
+ Note: Tasks API landmarker doesn't have explicit close method.
156
+ Resources are released when the object is garbage collected.
157
+ """
158
+ pass
159
+
160
+
161
+ class PoseTrackerFactory:
162
+ """Factory for creating pose trackers with automatic backend selection.
163
+
164
+ Supports multiple backends with auto-detection:
165
+ - RTMPose CUDA: NVIDIA GPU acceleration (fastest, 133 FPS)
166
+ - RTMPose CoreML: Apple Silicon acceleration (42 FPS)
167
+ - RTMPose CPU: Optimized CPU implementation (40-68 FPS)
168
+ - MediaPipe: Fallback baseline (48 FPS)
169
+
170
+ Usage:
171
+ # Auto-detect best backend
172
+ tracker = PoseTrackerFactory.create()
173
+
174
+ # Force specific backend
175
+ tracker = PoseTrackerFactory.create(backend='rtmpose-cuda')
176
+
177
+ # Check available backends
178
+ available = PoseTrackerFactory.get_available_backends()
179
+ """
180
+
181
+ # Backend class mappings
182
+ _BACKENDS: dict[str, type] = {}
183
+
184
+ @classmethod
185
+ def create(
186
+ cls,
187
+ backend: str = "auto",
188
+ mode: str = "lightweight",
189
+ **kwargs: object,
190
+ ):
191
+ """Create a pose tracker with the specified backend.
192
+
193
+ Args:
194
+ backend: Backend selection:
195
+ - 'auto': Auto-detect best available backend
196
+ - 'mediapipe': MediaPipe Tasks API (baseline)
197
+ - 'rtmpose-cpu': RTMPose optimized CPU
198
+ - 'rtmpose-cuda': RTMPose with CUDA (NVIDIA GPU)
199
+ - 'rtmpose-coreml': RTMPose with CoreML (Apple Silicon)
200
+ mode: RTMPose performance mode ('lightweight', 'balanced', 'performance')
201
+ Only used for RTMPose backends
202
+ **kwargs: Additional arguments passed to tracker constructor
203
+
204
+ Returns:
205
+ Configured pose tracker instance
206
+
207
+ Raises:
208
+ ValueError: If backend is not available or recognized
209
+ """
210
+ # Auto-detect backend
211
+ if backend == "auto":
212
+ backend = cls._detect_best_backend()
213
+ backend = cls._check_backend_available(backend)
214
+
215
+ # Check environment variable override
216
+ import os
217
+
218
+ env_backend = os.environ.get("POSE_TRACKER_BACKEND")
219
+ if env_backend:
220
+ backend = cls._normalize_backend_name(env_backend)
221
+
222
+ # Verify backend is available
223
+ backend = cls._check_backend_available(backend)
224
+
225
+ # Get tracker class
226
+ tracker_class = cls._get_tracker_class(backend)
227
+
228
+ # Create tracker with appropriate arguments
229
+ return cls._create_tracker(tracker_class, backend, mode, kwargs)
230
+
231
+ @classmethod
232
+ def _detect_best_backend(cls) -> str:
233
+ """Detect the best available backend.
234
+
235
+ Priority order:
236
+ 1. CUDA (NVIDIA GPU) - fastest
237
+ 2. CoreML (Apple Silicon) - good performance
238
+ 3. RTMPose CPU - optimized CPU
239
+ 4. MediaPipe - baseline fallback
240
+
241
+ Returns:
242
+ Backend name string
243
+ """
244
+ # Check for CUDA (NVIDIA GPU)
245
+ try:
246
+ import torch
247
+
248
+ if torch.cuda.is_available():
249
+ return "rtmpose-cuda"
250
+ except ImportError:
251
+ pass
252
+
253
+ # Check for CoreML (Apple Silicon)
254
+ import sys
255
+
256
+ if sys.platform == "darwin":
257
+ return "rtmpose-coreml"
258
+
259
+ # Check for RTMPose CPU
260
+ try:
261
+ from kinemotion.core.rtmpose_cpu import (
262
+ OptimizedCPUTracker as _RTMPoseCPU, # type: ignore
263
+ )
264
+
265
+ _ = _RTMPoseCPU # Mark as intentionally used for availability check
266
+
267
+ return "rtmpose-cpu"
268
+ except ImportError:
269
+ pass
270
+
271
+ # Fallback to MediaPipe
272
+ return "mediapipe"
273
+
274
+ @classmethod
275
+ def _check_backend_available(cls, backend: str) -> str:
276
+ """Check if a backend is available and return a fallback if not.
277
+
278
+ Args:
279
+ backend: Requested backend name
280
+
281
+ Returns:
282
+ Available backend name (may be different from requested)
283
+
284
+ Raises:
285
+ ValueError: If no backend is available
286
+ """
287
+ normalized = cls._normalize_backend_name(backend)
288
+
289
+ # Check if specific backend can be imported
290
+ if normalized == "rtmpose-cuda":
291
+ try:
292
+ import torch # noqa: F401
293
+
294
+ if not torch.cuda.is_available():
295
+ # CUDA not available, fall back to CPU
296
+ return cls._check_backend_available("rtmpose-cpu")
297
+ # CUDA is available, use rtmpose-cuda
298
+ return normalized
299
+ except ImportError:
300
+ return cls._check_backend_available("rtmpose-cpu")
301
+
302
+ if normalized == "rtmpose-coreml":
303
+ import sys
304
+
305
+ if sys.platform != "darwin":
306
+ # Not macOS, fall back to CPU
307
+ return cls._check_backend_available("rtmpose-cpu")
308
+
309
+ if normalized == "rtmpose-cpu":
310
+ try:
311
+ from kinemotion.core.rtmpose_cpu import (
312
+ OptimizedCPUTracker as _RTMPoseCPU,
313
+ ) # type: ignore
314
+
315
+ _ = _RTMPoseCPU # Mark as intentionally used for availability check
316
+
317
+ return normalized
318
+ except ImportError:
319
+ # RTMPose not available, fall back to MediaPipe
320
+ return "mediapipe"
321
+
322
+ if normalized == "mediapipe":
323
+ try:
324
+ import mediapipe as _mp # noqa: F401
325
+
326
+ _ = _mp # Mark as intentionally used for availability check
327
+ return normalized
328
+ except ImportError as err:
329
+ raise ValueError(
330
+ "No pose tracking backend available. Please install mediapipe or rtmlib."
331
+ ) from err
332
+
333
+ raise ValueError(f"Unknown backend: {backend}")
334
+
335
+ @classmethod
336
+ def _normalize_backend_name(cls, backend: str) -> str:
337
+ """Normalize backend name to canonical form.
338
+
339
+ Args:
340
+ backend: User-provided backend name
341
+
342
+ Returns:
343
+ Canonical backend name
344
+ """
345
+ # Normalize various aliases to canonical names
346
+ aliases = {
347
+ "mp": "mediapipe",
348
+ "mediapipe": "mediapipe",
349
+ "rtmpose": "rtmpose-cpu",
350
+ "rtmpose-cpu": "rtmpose-cpu",
351
+ "rtmpose_cpu": "rtmpose-cpu",
352
+ "cpu": "rtmpose-cpu",
353
+ "cuda": "rtmpose-cuda",
354
+ "rtmpose-cuda": "rtmpose-cuda",
355
+ "rtmpose_cuda": "rtmpose-cuda",
356
+ "gpu": "rtmpose-cuda",
357
+ "mps": "rtmpose-coreml",
358
+ "coreml": "rtmpose-coreml",
359
+ "rtmpose-coreml": "rtmpose-coreml",
360
+ "rtmpose_coreml": "rtmpose-coreml",
361
+ }
362
+ return aliases.get(backend.lower(), backend)
363
+
364
+ @classmethod
365
+ def _get_tracker_class(cls, backend: str):
366
+ """Get the tracker class for a backend.
367
+
368
+ Args:
369
+ backend: Canonical backend name
370
+
371
+ Returns:
372
+ Tracker class
373
+
374
+ Raises:
375
+ ValueError: If backend is not recognized
376
+ """
377
+ if backend == "mediapipe":
378
+ return MediaPipePoseTracker
379
+
380
+ if backend == "rtmpose-cpu":
381
+ try:
382
+ from kinemotion.core.rtmpose_cpu import OptimizedCPUTracker
383
+
384
+ return OptimizedCPUTracker
385
+ except ImportError as e:
386
+ raise ValueError(f"RTMPose CPU backend requested but not available: {e}") from e
387
+
388
+ if backend in ("rtmpose-cuda", "rtmpose-coreml"):
389
+ try:
390
+ from kinemotion.core.rtmpose_wrapper import RTMPoseWrapper
391
+
392
+ return RTMPoseWrapper
393
+ except ImportError as e:
394
+ raise ValueError(
395
+ f"RTMPose wrapper backend requested but not available: {e}"
396
+ ) from e
397
+
398
+ raise ValueError(f"Unknown backend: {backend}")
399
+
400
+ @classmethod
401
+ def _create_tracker(
402
+ cls,
403
+ tracker_class: type,
404
+ backend: str,
405
+ mode: str,
406
+ kwargs: dict[str, object],
407
+ ):
408
+ """Create a tracker instance with appropriate arguments.
409
+
410
+ Args:
411
+ tracker_class: Tracker class to instantiate
412
+ backend: Backend name (for parameter mapping)
413
+ mode: RTMPose mode (only used for RTMPose backends)
414
+ kwargs: Additional arguments from user
415
+
416
+ Returns:
417
+ Tracker instance
418
+ """
419
+ # MediaPipe-specific arguments
420
+ if backend == "mediapipe":
421
+ # Remove RTMPose-specific arguments
422
+ rttmpose_keys = {"mode", "backend", "device", "pose_input_size"}
423
+ filtered_kwargs = {k: v for k, v in kwargs.items() if k not in rttmpose_keys}
424
+ return tracker_class(**filtered_kwargs)
425
+
426
+ # OptimizedCPUTracker (CPU-only, doesn't accept device parameter)
427
+ if backend == "rtmpose-cpu":
428
+ # Remove RTMPoseWrapper-specific and MediaPipe-specific arguments
429
+ unsupported_keys = {
430
+ "backend",
431
+ "device",
432
+ "min_detection_confidence",
433
+ "min_tracking_confidence",
434
+ }
435
+ filtered_kwargs = {k: v for k, v in kwargs.items() if k not in unsupported_keys}
436
+ filtered_kwargs.setdefault("mode", mode)
437
+ return tracker_class(**filtered_kwargs)
438
+
439
+ # RTMPoseWrapper (CUDA/CoreML, requires device parameter)
440
+ # Remove MediaPipe-specific arguments
441
+ mediapipe_keys = {"min_detection_confidence", "min_tracking_confidence"}
442
+ filtered_kwargs = {k: v for k, v in kwargs.items() if k not in mediapipe_keys}
443
+
444
+ device = backend.split("-")[-1] # Extract 'cuda', 'cpu', 'coreml'
445
+ if device == "coreml":
446
+ device = "mps" # RTMLib uses 'mps' for Apple Silicon
447
+
448
+ filtered_kwargs.setdefault("device", device)
449
+ filtered_kwargs.setdefault("mode", mode)
450
+
451
+ return tracker_class(**filtered_kwargs)
452
+
453
+ @classmethod
454
+ def get_available_backends(cls) -> list[str]:
455
+ """Get list of available backends on current system.
456
+
457
+ Returns:
458
+ List of available backend names
459
+ """
460
+ available = []
461
+
462
+ # Always have MediaPipe as fallback
463
+ try:
464
+ import mediapipe as _mp # noqa: F401
465
+
466
+ _ = _mp # Mark as intentionally used for availability check
467
+ available.append("mediapipe")
468
+ except ImportError:
469
+ pass
470
+
471
+ # Check RTMPose CPU
472
+ try:
473
+ from kinemotion.core.rtmpose_cpu import (
474
+ OptimizedCPUTracker as _RTMPoseCPU,
475
+ ) # type: ignore
476
+
477
+ _ = _RTMPoseCPU # Mark as intentionally used for availability check
478
+
479
+ available.append("rtmpose-cpu")
480
+ except ImportError:
481
+ pass
482
+
483
+ # Check CUDA
484
+ try:
485
+ import torch
486
+
487
+ if torch.cuda.is_available():
488
+ from kinemotion.core.rtmpose_wrapper import (
489
+ RTMPoseWrapper as _RTMPoseWrapper,
490
+ ) # type: ignore
491
+
492
+ _ = _RTMPoseWrapper # Mark as intentionally used for availability check
493
+
494
+ available.append("rtmpose-cuda")
495
+ except ImportError:
496
+ pass
497
+
498
+ # Check CoreML (Apple Silicon)
499
+ import sys
500
+
501
+ if sys.platform == "darwin":
502
+ try:
503
+ from kinemotion.core.rtmpose_wrapper import (
504
+ RTMPoseWrapper as _RTMPoseWrapperMPS,
505
+ ) # type: ignore
506
+
507
+ _ = _RTMPoseWrapperMPS # Mark as intentionally used for availability check
508
+
509
+ available.append("rtmpose-coreml")
510
+ except ImportError:
511
+ pass
512
+
513
+ return available
514
+
515
+ @classmethod
516
+ def get_backend_info(cls, backend: str) -> dict[str, str]:
517
+ """Get information about a backend.
518
+
519
+ Args:
520
+ backend: Backend name
521
+
522
+ Returns:
523
+ Dictionary with backend information
524
+ """
525
+ info = {
526
+ "mediapipe": {
527
+ "name": "MediaPipe",
528
+ "description": "Baseline pose tracking using MediaPipe Tasks API",
529
+ "performance": "~48 FPS",
530
+ "accuracy": "Baseline (reference)",
531
+ "requirements": "mediapipe package",
532
+ },
533
+ "rtmpose-cpu": {
534
+ "name": "RTMPose CPU",
535
+ "description": "Optimized CPU implementation with ONNX Runtime",
536
+ "performance": "~40-68 FPS (134% of MediaPipe)",
537
+ "accuracy": "9-12px mean difference (1-5% metric accuracy)",
538
+ "requirements": "rtmlib package",
539
+ },
540
+ "rtmpose-cuda": {
541
+ "name": "RTMPose CUDA",
542
+ "description": "NVIDIA GPU acceleration with CUDA",
543
+ "performance": "~133 FPS (271% of MediaPipe)",
544
+ "accuracy": "9-12px mean difference (1-5% metric accuracy)",
545
+ "requirements": "rtmlib + CUDA-capable GPU",
546
+ },
547
+ "rtmpose-coreml": {
548
+ "name": "RTMPose CoreML",
549
+ "description": "Apple Silicon acceleration with CoreML",
550
+ "performance": "~42 FPS (94% of MediaPipe)",
551
+ "accuracy": "9-12px mean difference (1-5% metric accuracy)",
552
+ "requirements": "rtmlib + Apple Silicon",
553
+ },
554
+ }
555
+
556
+ normalized = cls._normalize_backend_name(backend)
557
+ return info.get(normalized, {})
558
+
559
+
560
+ def get_tracker_info(tracker: object) -> str:
561
+ """Get detailed information about a pose tracker instance.
562
+
563
+ Args:
564
+ tracker: Pose tracker instance
565
+
566
+ Returns:
567
+ Formatted string with tracker details
568
+ """
569
+ tracker_class = type(tracker).__name__
570
+ module = type(tracker).__module__
571
+
572
+ info = f"{tracker_class} (from {module})"
573
+
574
+ # Add backend-specific details
575
+ if tracker_class == "MediaPipePoseTracker":
576
+ info += " [MediaPipe Tasks API]"
577
+ elif tracker_class == "OptimizedCPUTracker":
578
+ # Check if ONNX Runtime has CUDA
579
+ try:
580
+ import onnxruntime as ort
581
+
582
+ providers = ort.get_available_providers()
583
+ if "CUDAExecutionProvider" in providers:
584
+ # Check what providers the session is actually using
585
+ det_session = getattr(tracker, "det_session", None)
586
+ if det_session is not None:
587
+ active_providers = det_session.get_providers()
588
+ if "CUDAExecutionProvider" in active_providers:
589
+ info += " [ONNX Runtime: CUDA]"
590
+ else:
591
+ info += " [ONNX Runtime: CPU]"
592
+ else:
593
+ info += " [ONNX Runtime]"
594
+ else:
595
+ info += " [ONNX Runtime: CPU]"
596
+ except ImportError:
597
+ info += " [ONNX Runtime]"
598
+ elif tracker_class == "RTMPoseWrapper":
599
+ device = getattr(tracker, "device", None)
600
+ if device:
601
+ if device == "cuda":
602
+ try:
603
+ import torch
604
+
605
+ if torch.cuda.is_available():
606
+ device_name = torch.cuda.get_device_name(0)
607
+ info += f" [PyTorch CUDA: {device_name}]"
608
+ else:
609
+ info += " [PyTorch: CPU fallback]"
610
+ except ImportError:
611
+ info += " [PyTorch CUDA]"
612
+ elif device == "mps":
613
+ info += " [PyTorch: Apple Silicon GPU]"
614
+ else:
615
+ info += f" [PyTorch: {device}]"
616
+ else:
617
+ info += " [PyTorch]"
618
+
619
+ return info
620
+
621
+
622
+ def _extract_landmarks_from_results(
623
+ pose_landmarks: mp.tasks.vision.components.containers.NormalizedLandmark, # type: ignore[valid-type]
624
+ ) -> dict[str, tuple[float, float, float]]:
625
+ """Extract kinemotion landmarks from pose landmarker result.
626
+
627
+ Args:
628
+ pose_landmarks: MediaPipe pose landmarks (list of 33 landmarks)
629
+
630
+ Returns:
631
+ Dictionary mapping landmark names to (x, y, visibility) tuples
632
+ """
633
+ landmarks: dict[str, tuple[float, float, float]] = {}
634
+
635
+ for name in KINEMOTION_LANDMARKS:
636
+ idx = LANDMARK_INDICES[name]
637
+ if idx < len(pose_landmarks):
638
+ lm = pose_landmarks[idx]
639
+ # Tasks API uses presence in addition to visibility
640
+ # Use visibility for consistency with Solution API
641
+ visibility = getattr(lm, "visibility", 1.0)
642
+ landmarks[name] = (lm.x, lm.y, visibility)
643
+
644
+ return landmarks
645
+
646
+
647
+ # Legacy compatibility aliases for Solution API enum values
648
+ class _LegacyPoseLandmark:
649
+ """Compatibility shim for Solution API enum values."""
650
+
651
+ LEFT_ANKLE = 27
652
+ RIGHT_ANKLE = 28
653
+ LEFT_HEEL = 29
654
+ RIGHT_HEEL = 30
655
+ LEFT_FOOT_INDEX = 31
656
+ RIGHT_FOOT_INDEX = 32
657
+ LEFT_HIP = 23
658
+ RIGHT_HIP = 24
659
+ LEFT_SHOULDER = 11
660
+ RIGHT_SHOULDER = 12
661
+ NOSE = 0
662
+ LEFT_KNEE = 25
663
+ RIGHT_KNEE = 26
664
+
665
+
666
+ PoseLandmark = _LegacyPoseLandmark
667
+
668
+
669
+ def compute_center_of_mass(
670
+ landmarks: dict[str, tuple[float, float, float]],
671
+ visibility_threshold: float = 0.5,
672
+ ) -> tuple[float, float, float]:
673
+ """
674
+ Compute approximate center of mass (CoM) from body landmarks.
675
+
676
+ Uses biomechanical segment weights based on Dempster's body segment parameters:
677
+ - Head: 8% of body mass (represented by nose)
678
+ - Trunk (shoulders to hips): 50% of body mass
679
+ - Thighs: 2 × 10% = 20% of body mass
680
+ - Legs (knees to ankles): 2 × 5% = 10% of body mass
681
+ - Feet: 2 × 1.5% = 3% of body mass
682
+
683
+ The CoM is estimated as a weighted average of these segments, with
684
+ weights corresponding to their proportion of total body mass.
685
+
686
+ Args:
687
+ landmarks: Dictionary of landmark positions (x, y, visibility)
688
+ visibility_threshold: Minimum visibility to include landmark in calculation
689
+
690
+ Returns:
691
+ (x, y, visibility) tuple for estimated CoM position
692
+ visibility = average visibility of all segments used
693
+ """
694
+ segments: list = []
695
+ weights: list = []
696
+ visibilities: list = []
697
+
698
+ # Add body segments
699
+ _add_head_segment(segments, weights, visibilities, landmarks, visibility_threshold)
700
+ _add_trunk_segment(segments, weights, visibilities, landmarks, visibility_threshold)
701
+
702
+ # Add bilateral limb segments
703
+ for side in ["left", "right"]:
704
+ _add_limb_segment(
705
+ segments,
706
+ weights,
707
+ visibilities,
708
+ landmarks,
709
+ side,
710
+ "hip",
711
+ "knee",
712
+ 0.10,
713
+ visibility_threshold,
714
+ )
715
+ _add_limb_segment(
716
+ segments,
717
+ weights,
718
+ visibilities,
719
+ landmarks,
720
+ side,
721
+ "knee",
722
+ "ankle",
723
+ 0.05,
724
+ visibility_threshold,
725
+ )
726
+ _add_foot_segment(segments, weights, visibilities, landmarks, side, visibility_threshold)
727
+
728
+ # Fallback if no segments found
729
+ if not segments:
730
+ if "left_hip" in landmarks and "right_hip" in landmarks:
731
+ lh_x, lh_y, lh_vis = landmarks["left_hip"]
732
+ rh_x, rh_y, rh_vis = landmarks["right_hip"]
733
+ return ((lh_x + rh_x) / 2, (lh_y + rh_y) / 2, (lh_vis + rh_vis) / 2)
734
+ return (0.5, 0.5, 0.0)
735
+
736
+ # Normalize weights and compute weighted average
737
+ total_weight = sum(weights)
738
+ normalized_weights = [w / total_weight for w in weights]
739
+
740
+ com_x = float(sum(p[0] * w for p, w in zip(segments, normalized_weights, strict=True)))
741
+ com_y = float(sum(p[1] * w for p, w in zip(segments, normalized_weights, strict=True)))
742
+ com_visibility = float(np.mean(visibilities)) if visibilities else 0.0
743
+
744
+ return (com_x, com_y, com_visibility)
89
745
 
90
746
 
91
747
  def _add_head_segment(
@@ -180,81 +836,3 @@ def _add_foot_segment(
180
836
  segments.append((foot_x, foot_y))
181
837
  weights.append(0.015)
182
838
  visibilities.append(foot_vis)
183
-
184
-
185
- def compute_center_of_mass(
186
- landmarks: dict[str, tuple[float, float, float]],
187
- visibility_threshold: float = 0.5,
188
- ) -> tuple[float, float, float]:
189
- """
190
- Compute approximate center of mass (CoM) from body landmarks.
191
-
192
- Uses biomechanical segment weights based on Dempster's body segment parameters:
193
- - Head: 8% of body mass (represented by nose)
194
- - Trunk (shoulders to hips): 50% of body mass
195
- - Thighs: 2 × 10% = 20% of body mass
196
- - Legs (knees to ankles): 2 × 5% = 10% of body mass
197
- - Feet: 2 × 1.5% = 3% of body mass
198
-
199
- The CoM is estimated as a weighted average of these segments, with
200
- weights corresponding to their proportion of total body mass.
201
-
202
- Args:
203
- landmarks: Dictionary of landmark positions (x, y, visibility)
204
- visibility_threshold: Minimum visibility to include landmark in calculation
205
-
206
- Returns:
207
- (x, y, visibility) tuple for estimated CoM position
208
- visibility = average visibility of all segments used
209
- """
210
- segments: list = []
211
- weights: list = []
212
- visibilities: list = []
213
-
214
- # Add body segments
215
- _add_head_segment(segments, weights, visibilities, landmarks, visibility_threshold)
216
- _add_trunk_segment(segments, weights, visibilities, landmarks, visibility_threshold)
217
-
218
- # Add bilateral limb segments
219
- for side in ["left", "right"]:
220
- _add_limb_segment(
221
- segments,
222
- weights,
223
- visibilities,
224
- landmarks,
225
- side,
226
- "hip",
227
- "knee",
228
- 0.10,
229
- visibility_threshold,
230
- )
231
- _add_limb_segment(
232
- segments,
233
- weights,
234
- visibilities,
235
- landmarks,
236
- side,
237
- "knee",
238
- "ankle",
239
- 0.05,
240
- visibility_threshold,
241
- )
242
- _add_foot_segment(segments, weights, visibilities, landmarks, side, visibility_threshold)
243
-
244
- # Fallback if no segments found
245
- if not segments:
246
- if "left_hip" in landmarks and "right_hip" in landmarks:
247
- lh_x, lh_y, lh_vis = landmarks["left_hip"]
248
- rh_x, rh_y, rh_vis = landmarks["right_hip"]
249
- return ((lh_x + rh_x) / 2, (lh_y + rh_y) / 2, (lh_vis + rh_vis) / 2)
250
- return (0.5, 0.5, 0.0)
251
-
252
- # Normalize weights and compute weighted average
253
- total_weight = sum(weights)
254
- normalized_weights = [w / total_weight for w in weights]
255
-
256
- com_x = float(sum(p[0] * w for p, w in zip(segments, normalized_weights, strict=True)))
257
- com_y = float(sum(p[1] * w for p, w in zip(segments, normalized_weights, strict=True)))
258
- com_visibility = float(np.mean(visibilities)) if visibilities else 0.0
259
-
260
- return (com_x, com_y, com_visibility)