kinemotion 0.66.8__py3-none-any.whl → 0.68.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kinemotion might be problematic. Click here for more details.
- kinemotion/cli.py +4 -3
- kinemotion/cmj/api.py +45 -17
- kinemotion/cmj/cli.py +21 -0
- kinemotion/core/__init__.py +15 -2
- kinemotion/core/debug_overlay_utils.py +2 -2
- kinemotion/core/model_downloader.py +172 -0
- kinemotion/core/pipeline_utils.py +4 -4
- kinemotion/core/pose.py +703 -125
- kinemotion/core/pose_landmarks.py +67 -0
- kinemotion/core/rtmpose_cpu.py +626 -0
- kinemotion/core/rtmpose_wrapper.py +190 -0
- kinemotion/core/timing.py +4 -2
- kinemotion/core/video_io.py +30 -0
- kinemotion/dropjump/api.py +40 -16
- kinemotion/dropjump/cli.py +27 -1
- kinemotion/models/__init__.py +0 -0
- kinemotion/models/pose_landmarker_lite.task +0 -0
- kinemotion/models/rtmpose-s_simcc-body7_pt-body7-halpe26_700e-256x192-7f134165_20230605.onnx +0 -0
- kinemotion/models/yolox_tiny_8xb8-300e_humanart-6f3252f9.onnx +0 -0
- {kinemotion-0.66.8.dist-info → kinemotion-0.68.0.dist-info}/METADATA +7 -2
- {kinemotion-0.66.8.dist-info → kinemotion-0.68.0.dist-info}/RECORD +24 -16
- {kinemotion-0.66.8.dist-info → kinemotion-0.68.0.dist-info}/WHEEL +0 -0
- {kinemotion-0.66.8.dist-info → kinemotion-0.68.0.dist-info}/entry_points.txt +0 -0
- {kinemotion-0.66.8.dist-info → kinemotion-0.68.0.dist-info}/licenses/LICENSE +0 -0
kinemotion/core/pose.py
CHANGED
|
@@ -1,44 +1,125 @@
|
|
|
1
|
-
"""Pose tracking using MediaPipe
|
|
1
|
+
"""Pose tracking using MediaPipe Tasks API.
|
|
2
|
+
|
|
3
|
+
The MediaPipe Solutions API was removed in version 0.10.31.
|
|
4
|
+
This module now uses the Tasks API (PoseLandmarker).
|
|
5
|
+
|
|
6
|
+
Key differences from Solution API:
|
|
7
|
+
- Tasks API uses index-based landmark access (0-32) instead of enums
|
|
8
|
+
- Running modes: IMAGE, VIDEO, LIVE_STREAM
|
|
9
|
+
- No smooth_landmarks option (built into VIDEO mode)
|
|
10
|
+
- Has min_pose_presence_confidence parameter (no Solution API equivalent)
|
|
11
|
+
|
|
12
|
+
Configuration strategies for matching Solution API behavior:
|
|
13
|
+
- "video": Standard VIDEO mode with temporal smoothing
|
|
14
|
+
- "video_low_presence": VIDEO mode with lower min_pose_presence_confidence (0.2)
|
|
15
|
+
- "video_very_low_presence": VIDEO mode with very low min_pose_presence_confidence (0.1)
|
|
16
|
+
- "image": IMAGE mode (no temporal smoothing, relies on our smoothing)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
2
20
|
|
|
3
21
|
import cv2
|
|
4
22
|
import mediapipe as mp
|
|
5
23
|
import numpy as np
|
|
6
24
|
|
|
25
|
+
from .pose_landmarks import KINEMOTION_LANDMARKS, LANDMARK_INDICES
|
|
7
26
|
from .timing import NULL_TIMER, Timer
|
|
8
27
|
|
|
28
|
+
# Running modes
|
|
29
|
+
_RUNNING_MODES = {
|
|
30
|
+
"image": mp.tasks.vision.RunningMode.IMAGE, # type: ignore[attr-defined]
|
|
31
|
+
"video": mp.tasks.vision.RunningMode.VIDEO, # type: ignore[attr-defined]
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# Strategy configurations
|
|
35
|
+
_STRATEGY_CONFIGS: dict[str, dict[str, float | str]] = {
|
|
36
|
+
"video": {
|
|
37
|
+
"min_pose_presence_confidence": 0.5,
|
|
38
|
+
"running_mode": "video",
|
|
39
|
+
},
|
|
40
|
+
"video_low_presence": {
|
|
41
|
+
"min_pose_presence_confidence": 0.2,
|
|
42
|
+
"running_mode": "video",
|
|
43
|
+
},
|
|
44
|
+
"video_very_low_presence": {
|
|
45
|
+
"min_pose_presence_confidence": 0.1,
|
|
46
|
+
"running_mode": "video",
|
|
47
|
+
},
|
|
48
|
+
"image": {
|
|
49
|
+
"min_pose_presence_confidence": 0.5,
|
|
50
|
+
"running_mode": "image",
|
|
51
|
+
},
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class MediaPipePoseTracker:
|
|
56
|
+
"""Tracks human pose landmarks in video frames using MediaPipe Tasks API.
|
|
9
57
|
|
|
10
|
-
|
|
11
|
-
|
|
58
|
+
Args:
|
|
59
|
+
min_detection_confidence: Minimum confidence for pose detection (0.0-1.0)
|
|
60
|
+
min_tracking_confidence: Minimum confidence for pose tracking (0.0-1.0)
|
|
61
|
+
model_type: Model variant ("lite", "full", "heavy")
|
|
62
|
+
strategy: Configuration strategy ("video", "video_low_presence", "image")
|
|
63
|
+
timer: Optional Timer for measuring operations
|
|
64
|
+
|
|
65
|
+
Note: The Solution API's smooth_landmarks parameter cannot be replicated
|
|
66
|
+
exactly. VIDEO mode has built-in temporal smoothing that cannot be disabled.
|
|
67
|
+
"""
|
|
12
68
|
|
|
13
|
-
def __init__(
|
|
69
|
+
def __init__( # noqa: PLR0913
|
|
14
70
|
self,
|
|
15
71
|
min_detection_confidence: float = 0.5,
|
|
16
72
|
min_tracking_confidence: float = 0.5,
|
|
73
|
+
model_type: str = "lite",
|
|
74
|
+
strategy: str = "video_low_presence",
|
|
17
75
|
timer: Timer | None = None,
|
|
18
76
|
) -> None:
|
|
19
|
-
"""
|
|
20
|
-
Initialize the pose tracker.
|
|
21
|
-
|
|
22
|
-
Args:
|
|
23
|
-
min_detection_confidence: Minimum confidence for pose detection
|
|
24
|
-
min_tracking_confidence: Minimum confidence for pose tracking
|
|
25
|
-
timer: Optional Timer for measuring operations
|
|
26
|
-
"""
|
|
77
|
+
"""Initialize the pose tracker."""
|
|
27
78
|
self.timer = timer or NULL_TIMER
|
|
28
|
-
self.mp_pose = mp.
|
|
29
|
-
self.
|
|
30
|
-
|
|
31
|
-
|
|
79
|
+
self.mp_pose = mp.tasks.vision # type: ignore[attr-defined]
|
|
80
|
+
self.model_type = model_type
|
|
81
|
+
self.strategy = strategy
|
|
82
|
+
|
|
83
|
+
# Get strategy configuration
|
|
84
|
+
config = _STRATEGY_CONFIGS.get(strategy, _STRATEGY_CONFIGS["video_low_presence"])
|
|
85
|
+
min_pose_presence = config["min_pose_presence_confidence"]
|
|
86
|
+
running_mode_name = str(config["running_mode"])
|
|
87
|
+
running_mode = _RUNNING_MODES[running_mode_name]
|
|
88
|
+
|
|
89
|
+
# Get model path
|
|
90
|
+
from .model_downloader import get_model_path
|
|
91
|
+
|
|
92
|
+
model_path = str(get_model_path(model_type))
|
|
93
|
+
|
|
94
|
+
# Create base options
|
|
95
|
+
base_options = mp.tasks.BaseOptions(model_asset_path=model_path) # type: ignore[attr-defined]
|
|
96
|
+
|
|
97
|
+
# Create pose landmarker options
|
|
98
|
+
options = mp.tasks.vision.PoseLandmarkerOptions( # type: ignore[attr-defined]
|
|
99
|
+
base_options=base_options,
|
|
100
|
+
running_mode=running_mode,
|
|
101
|
+
min_pose_detection_confidence=min_detection_confidence,
|
|
102
|
+
min_pose_presence_confidence=min_pose_presence,
|
|
32
103
|
min_tracking_confidence=min_tracking_confidence,
|
|
33
|
-
|
|
104
|
+
output_segmentation_masks=False,
|
|
34
105
|
)
|
|
35
106
|
|
|
36
|
-
|
|
37
|
-
""
|
|
38
|
-
|
|
107
|
+
# Create the landmarker
|
|
108
|
+
with self.timer.measure("model_load"):
|
|
109
|
+
self.landmarker = self.mp_pose.PoseLandmarker.create_from_options(options)
|
|
110
|
+
|
|
111
|
+
self.running_mode = running_mode
|
|
112
|
+
|
|
113
|
+
def process_frame(
|
|
114
|
+
self,
|
|
115
|
+
frame: np.ndarray,
|
|
116
|
+
timestamp_ms: int = 0,
|
|
117
|
+
) -> dict[str, tuple[float, float, float]] | None:
|
|
118
|
+
"""Process a single frame and extract pose landmarks.
|
|
39
119
|
|
|
40
120
|
Args:
|
|
41
121
|
frame: BGR image frame
|
|
122
|
+
timestamp_ms: Frame timestamp in milliseconds (required for VIDEO mode)
|
|
42
123
|
|
|
43
124
|
Returns:
|
|
44
125
|
Dictionary mapping landmark names to (x, y, visibility) tuples,
|
|
@@ -48,44 +129,619 @@ class PoseTracker:
|
|
|
48
129
|
with self.timer.measure("frame_conversion"):
|
|
49
130
|
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
50
131
|
|
|
132
|
+
# Create MediaPipe Image
|
|
133
|
+
with self.timer.measure("image_creation"):
|
|
134
|
+
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame) # type: ignore[attr-defined]
|
|
135
|
+
|
|
51
136
|
# Process the frame
|
|
52
137
|
with self.timer.measure("mediapipe_inference"):
|
|
53
|
-
|
|
138
|
+
if self.running_mode == mp.tasks.vision.RunningMode.VIDEO: # type: ignore[attr-defined]
|
|
139
|
+
results = self.landmarker.detect_for_video(mp_image, timestamp_ms)
|
|
140
|
+
else: # IMAGE mode
|
|
141
|
+
results = self.landmarker.detect(mp_image)
|
|
54
142
|
|
|
55
143
|
if not results.pose_landmarks:
|
|
56
144
|
return None
|
|
57
145
|
|
|
58
|
-
# Extract
|
|
146
|
+
# Extract landmarks (first pose only)
|
|
59
147
|
with self.timer.measure("landmark_extraction"):
|
|
60
|
-
landmarks =
|
|
61
|
-
landmark_names = {
|
|
62
|
-
# Feet landmarks
|
|
63
|
-
self.mp_pose.PoseLandmark.LEFT_ANKLE: "left_ankle",
|
|
64
|
-
self.mp_pose.PoseLandmark.RIGHT_ANKLE: "right_ankle",
|
|
65
|
-
self.mp_pose.PoseLandmark.LEFT_HEEL: "left_heel",
|
|
66
|
-
self.mp_pose.PoseLandmark.RIGHT_HEEL: "right_heel",
|
|
67
|
-
self.mp_pose.PoseLandmark.LEFT_FOOT_INDEX: "left_foot_index",
|
|
68
|
-
self.mp_pose.PoseLandmark.RIGHT_FOOT_INDEX: "right_foot_index",
|
|
69
|
-
# Torso landmarks for CoM estimation
|
|
70
|
-
self.mp_pose.PoseLandmark.LEFT_HIP: "left_hip",
|
|
71
|
-
self.mp_pose.PoseLandmark.RIGHT_HIP: "right_hip",
|
|
72
|
-
self.mp_pose.PoseLandmark.LEFT_SHOULDER: "left_shoulder",
|
|
73
|
-
self.mp_pose.PoseLandmark.RIGHT_SHOULDER: "right_shoulder",
|
|
74
|
-
# Additional landmarks for better CoM estimation
|
|
75
|
-
self.mp_pose.PoseLandmark.NOSE: "nose",
|
|
76
|
-
self.mp_pose.PoseLandmark.LEFT_KNEE: "left_knee",
|
|
77
|
-
self.mp_pose.PoseLandmark.RIGHT_KNEE: "right_knee",
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
for landmark_id, name in landmark_names.items():
|
|
81
|
-
lm = results.pose_landmarks.landmark[landmark_id]
|
|
82
|
-
landmarks[name] = (lm.x, lm.y, lm.visibility)
|
|
148
|
+
landmarks = _extract_landmarks_from_results(results.pose_landmarks[0])
|
|
83
149
|
|
|
84
150
|
return landmarks
|
|
85
151
|
|
|
86
152
|
def close(self) -> None:
|
|
87
|
-
"""Release resources.
|
|
88
|
-
|
|
153
|
+
"""Release resources.
|
|
154
|
+
|
|
155
|
+
Note: Tasks API landmarker doesn't have explicit close method.
|
|
156
|
+
Resources are released when the object is garbage collected.
|
|
157
|
+
"""
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class PoseTrackerFactory:
|
|
162
|
+
"""Factory for creating pose trackers with automatic backend selection.
|
|
163
|
+
|
|
164
|
+
Supports multiple backends with auto-detection:
|
|
165
|
+
- RTMPose CUDA: NVIDIA GPU acceleration (fastest, 133 FPS)
|
|
166
|
+
- RTMPose CoreML: Apple Silicon acceleration (42 FPS)
|
|
167
|
+
- RTMPose CPU: Optimized CPU implementation (40-68 FPS)
|
|
168
|
+
- MediaPipe: Fallback baseline (48 FPS)
|
|
169
|
+
|
|
170
|
+
Usage:
|
|
171
|
+
# Auto-detect best backend
|
|
172
|
+
tracker = PoseTrackerFactory.create()
|
|
173
|
+
|
|
174
|
+
# Force specific backend
|
|
175
|
+
tracker = PoseTrackerFactory.create(backend='rtmpose-cuda')
|
|
176
|
+
|
|
177
|
+
# Check available backends
|
|
178
|
+
available = PoseTrackerFactory.get_available_backends()
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
# Backend class mappings
|
|
182
|
+
_BACKENDS: dict[str, type] = {}
|
|
183
|
+
|
|
184
|
+
@classmethod
|
|
185
|
+
def create(
|
|
186
|
+
cls,
|
|
187
|
+
backend: str = "auto",
|
|
188
|
+
mode: str = "lightweight",
|
|
189
|
+
**kwargs: object,
|
|
190
|
+
):
|
|
191
|
+
"""Create a pose tracker with the specified backend.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
backend: Backend selection:
|
|
195
|
+
- 'auto': Auto-detect best available backend
|
|
196
|
+
- 'mediapipe': MediaPipe Tasks API (baseline)
|
|
197
|
+
- 'rtmpose-cpu': RTMPose optimized CPU
|
|
198
|
+
- 'rtmpose-cuda': RTMPose with CUDA (NVIDIA GPU)
|
|
199
|
+
- 'rtmpose-coreml': RTMPose with CoreML (Apple Silicon)
|
|
200
|
+
mode: RTMPose performance mode ('lightweight', 'balanced', 'performance')
|
|
201
|
+
Only used for RTMPose backends
|
|
202
|
+
**kwargs: Additional arguments passed to tracker constructor
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
Configured pose tracker instance
|
|
206
|
+
|
|
207
|
+
Raises:
|
|
208
|
+
ValueError: If backend is not available or recognized
|
|
209
|
+
"""
|
|
210
|
+
# Auto-detect backend
|
|
211
|
+
if backend == "auto":
|
|
212
|
+
backend = cls._detect_best_backend()
|
|
213
|
+
backend = cls._check_backend_available(backend)
|
|
214
|
+
|
|
215
|
+
# Check environment variable override
|
|
216
|
+
import os
|
|
217
|
+
|
|
218
|
+
env_backend = os.environ.get("POSE_TRACKER_BACKEND")
|
|
219
|
+
if env_backend:
|
|
220
|
+
backend = cls._normalize_backend_name(env_backend)
|
|
221
|
+
|
|
222
|
+
# Verify backend is available
|
|
223
|
+
backend = cls._check_backend_available(backend)
|
|
224
|
+
|
|
225
|
+
# Get tracker class
|
|
226
|
+
tracker_class = cls._get_tracker_class(backend)
|
|
227
|
+
|
|
228
|
+
# Create tracker with appropriate arguments
|
|
229
|
+
return cls._create_tracker(tracker_class, backend, mode, kwargs)
|
|
230
|
+
|
|
231
|
+
@classmethod
|
|
232
|
+
def _detect_best_backend(cls) -> str:
|
|
233
|
+
"""Detect the best available backend.
|
|
234
|
+
|
|
235
|
+
Priority order:
|
|
236
|
+
1. CUDA (NVIDIA GPU) - fastest
|
|
237
|
+
2. CoreML (Apple Silicon) - good performance
|
|
238
|
+
3. RTMPose CPU - optimized CPU
|
|
239
|
+
4. MediaPipe - baseline fallback
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
Backend name string
|
|
243
|
+
"""
|
|
244
|
+
# Check for CUDA (NVIDIA GPU)
|
|
245
|
+
try:
|
|
246
|
+
import torch
|
|
247
|
+
|
|
248
|
+
if torch.cuda.is_available():
|
|
249
|
+
return "rtmpose-cuda"
|
|
250
|
+
except ImportError:
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
# Check for CoreML (Apple Silicon)
|
|
254
|
+
import sys
|
|
255
|
+
|
|
256
|
+
if sys.platform == "darwin":
|
|
257
|
+
return "rtmpose-coreml"
|
|
258
|
+
|
|
259
|
+
# Check for RTMPose CPU
|
|
260
|
+
try:
|
|
261
|
+
from kinemotion.core.rtmpose_cpu import (
|
|
262
|
+
OptimizedCPUTracker as _RTMPoseCPU, # type: ignore
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
_ = _RTMPoseCPU # Mark as intentionally used for availability check
|
|
266
|
+
|
|
267
|
+
return "rtmpose-cpu"
|
|
268
|
+
except ImportError:
|
|
269
|
+
pass
|
|
270
|
+
|
|
271
|
+
# Fallback to MediaPipe
|
|
272
|
+
return "mediapipe"
|
|
273
|
+
|
|
274
|
+
@classmethod
|
|
275
|
+
def _check_backend_available(cls, backend: str) -> str:
|
|
276
|
+
"""Check if a backend is available and return a fallback if not.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
backend: Requested backend name
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
Available backend name (may be different from requested)
|
|
283
|
+
|
|
284
|
+
Raises:
|
|
285
|
+
ValueError: If no backend is available
|
|
286
|
+
"""
|
|
287
|
+
normalized = cls._normalize_backend_name(backend)
|
|
288
|
+
|
|
289
|
+
# Check if specific backend can be imported
|
|
290
|
+
if normalized == "rtmpose-cuda":
|
|
291
|
+
try:
|
|
292
|
+
import torch # noqa: F401
|
|
293
|
+
|
|
294
|
+
if not torch.cuda.is_available():
|
|
295
|
+
# CUDA not available, fall back to CPU
|
|
296
|
+
return cls._check_backend_available("rtmpose-cpu")
|
|
297
|
+
# CUDA is available, use rtmpose-cuda
|
|
298
|
+
return normalized
|
|
299
|
+
except ImportError:
|
|
300
|
+
return cls._check_backend_available("rtmpose-cpu")
|
|
301
|
+
|
|
302
|
+
if normalized == "rtmpose-coreml":
|
|
303
|
+
import sys
|
|
304
|
+
|
|
305
|
+
if sys.platform != "darwin":
|
|
306
|
+
# Not macOS, fall back to CPU
|
|
307
|
+
return cls._check_backend_available("rtmpose-cpu")
|
|
308
|
+
|
|
309
|
+
if normalized == "rtmpose-cpu":
|
|
310
|
+
try:
|
|
311
|
+
from kinemotion.core.rtmpose_cpu import (
|
|
312
|
+
OptimizedCPUTracker as _RTMPoseCPU,
|
|
313
|
+
) # type: ignore
|
|
314
|
+
|
|
315
|
+
_ = _RTMPoseCPU # Mark as intentionally used for availability check
|
|
316
|
+
|
|
317
|
+
return normalized
|
|
318
|
+
except ImportError:
|
|
319
|
+
# RTMPose not available, fall back to MediaPipe
|
|
320
|
+
return "mediapipe"
|
|
321
|
+
|
|
322
|
+
if normalized == "mediapipe":
|
|
323
|
+
try:
|
|
324
|
+
import mediapipe as _mp # noqa: F401
|
|
325
|
+
|
|
326
|
+
_ = _mp # Mark as intentionally used for availability check
|
|
327
|
+
return normalized
|
|
328
|
+
except ImportError as err:
|
|
329
|
+
raise ValueError(
|
|
330
|
+
"No pose tracking backend available. Please install mediapipe or rtmlib."
|
|
331
|
+
) from err
|
|
332
|
+
|
|
333
|
+
raise ValueError(f"Unknown backend: {backend}")
|
|
334
|
+
|
|
335
|
+
@classmethod
|
|
336
|
+
def _normalize_backend_name(cls, backend: str) -> str:
|
|
337
|
+
"""Normalize backend name to canonical form.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
backend: User-provided backend name
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
Canonical backend name
|
|
344
|
+
"""
|
|
345
|
+
# Normalize various aliases to canonical names
|
|
346
|
+
aliases = {
|
|
347
|
+
"mp": "mediapipe",
|
|
348
|
+
"mediapipe": "mediapipe",
|
|
349
|
+
"rtmpose": "rtmpose-cpu",
|
|
350
|
+
"rtmpose-cpu": "rtmpose-cpu",
|
|
351
|
+
"rtmpose_cpu": "rtmpose-cpu",
|
|
352
|
+
"cpu": "rtmpose-cpu",
|
|
353
|
+
"cuda": "rtmpose-cuda",
|
|
354
|
+
"rtmpose-cuda": "rtmpose-cuda",
|
|
355
|
+
"rtmpose_cuda": "rtmpose-cuda",
|
|
356
|
+
"gpu": "rtmpose-cuda",
|
|
357
|
+
"mps": "rtmpose-coreml",
|
|
358
|
+
"coreml": "rtmpose-coreml",
|
|
359
|
+
"rtmpose-coreml": "rtmpose-coreml",
|
|
360
|
+
"rtmpose_coreml": "rtmpose-coreml",
|
|
361
|
+
}
|
|
362
|
+
return aliases.get(backend.lower(), backend)
|
|
363
|
+
|
|
364
|
+
@classmethod
|
|
365
|
+
def _get_tracker_class(cls, backend: str):
|
|
366
|
+
"""Get the tracker class for a backend.
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
backend: Canonical backend name
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
Tracker class
|
|
373
|
+
|
|
374
|
+
Raises:
|
|
375
|
+
ValueError: If backend is not recognized
|
|
376
|
+
"""
|
|
377
|
+
if backend == "mediapipe":
|
|
378
|
+
return MediaPipePoseTracker
|
|
379
|
+
|
|
380
|
+
if backend == "rtmpose-cpu":
|
|
381
|
+
try:
|
|
382
|
+
from kinemotion.core.rtmpose_cpu import OptimizedCPUTracker
|
|
383
|
+
|
|
384
|
+
return OptimizedCPUTracker
|
|
385
|
+
except ImportError as e:
|
|
386
|
+
raise ValueError(f"RTMPose CPU backend requested but not available: {e}") from e
|
|
387
|
+
|
|
388
|
+
if backend in ("rtmpose-cuda", "rtmpose-coreml"):
|
|
389
|
+
try:
|
|
390
|
+
from kinemotion.core.rtmpose_wrapper import RTMPoseWrapper
|
|
391
|
+
|
|
392
|
+
return RTMPoseWrapper
|
|
393
|
+
except ImportError as e:
|
|
394
|
+
raise ValueError(
|
|
395
|
+
f"RTMPose wrapper backend requested but not available: {e}"
|
|
396
|
+
) from e
|
|
397
|
+
|
|
398
|
+
raise ValueError(f"Unknown backend: {backend}")
|
|
399
|
+
|
|
400
|
+
@classmethod
|
|
401
|
+
def _create_tracker(
|
|
402
|
+
cls,
|
|
403
|
+
tracker_class: type,
|
|
404
|
+
backend: str,
|
|
405
|
+
mode: str,
|
|
406
|
+
kwargs: dict[str, object],
|
|
407
|
+
):
|
|
408
|
+
"""Create a tracker instance with appropriate arguments.
|
|
409
|
+
|
|
410
|
+
Args:
|
|
411
|
+
tracker_class: Tracker class to instantiate
|
|
412
|
+
backend: Backend name (for parameter mapping)
|
|
413
|
+
mode: RTMPose mode (only used for RTMPose backends)
|
|
414
|
+
kwargs: Additional arguments from user
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
Tracker instance
|
|
418
|
+
"""
|
|
419
|
+
# MediaPipe-specific arguments
|
|
420
|
+
if backend == "mediapipe":
|
|
421
|
+
# Remove RTMPose-specific arguments
|
|
422
|
+
rttmpose_keys = {"mode", "backend", "device", "pose_input_size"}
|
|
423
|
+
filtered_kwargs = {k: v for k, v in kwargs.items() if k not in rttmpose_keys}
|
|
424
|
+
return tracker_class(**filtered_kwargs)
|
|
425
|
+
|
|
426
|
+
# OptimizedCPUTracker (CPU-only, doesn't accept device parameter)
|
|
427
|
+
if backend == "rtmpose-cpu":
|
|
428
|
+
# Remove RTMPoseWrapper-specific and MediaPipe-specific arguments
|
|
429
|
+
unsupported_keys = {
|
|
430
|
+
"backend",
|
|
431
|
+
"device",
|
|
432
|
+
"min_detection_confidence",
|
|
433
|
+
"min_tracking_confidence",
|
|
434
|
+
}
|
|
435
|
+
filtered_kwargs = {k: v for k, v in kwargs.items() if k not in unsupported_keys}
|
|
436
|
+
filtered_kwargs.setdefault("mode", mode)
|
|
437
|
+
return tracker_class(**filtered_kwargs)
|
|
438
|
+
|
|
439
|
+
# RTMPoseWrapper (CUDA/CoreML, requires device parameter)
|
|
440
|
+
# Remove MediaPipe-specific arguments
|
|
441
|
+
mediapipe_keys = {"min_detection_confidence", "min_tracking_confidence"}
|
|
442
|
+
filtered_kwargs = {k: v for k, v in kwargs.items() if k not in mediapipe_keys}
|
|
443
|
+
|
|
444
|
+
device = backend.split("-")[-1] # Extract 'cuda', 'cpu', 'coreml'
|
|
445
|
+
if device == "coreml":
|
|
446
|
+
device = "mps" # RTMLib uses 'mps' for Apple Silicon
|
|
447
|
+
|
|
448
|
+
filtered_kwargs.setdefault("device", device)
|
|
449
|
+
filtered_kwargs.setdefault("mode", mode)
|
|
450
|
+
|
|
451
|
+
return tracker_class(**filtered_kwargs)
|
|
452
|
+
|
|
453
|
+
@classmethod
|
|
454
|
+
def get_available_backends(cls) -> list[str]:
|
|
455
|
+
"""Get list of available backends on current system.
|
|
456
|
+
|
|
457
|
+
Returns:
|
|
458
|
+
List of available backend names
|
|
459
|
+
"""
|
|
460
|
+
available = []
|
|
461
|
+
|
|
462
|
+
# Always have MediaPipe as fallback
|
|
463
|
+
try:
|
|
464
|
+
import mediapipe as _mp # noqa: F401
|
|
465
|
+
|
|
466
|
+
_ = _mp # Mark as intentionally used for availability check
|
|
467
|
+
available.append("mediapipe")
|
|
468
|
+
except ImportError:
|
|
469
|
+
pass
|
|
470
|
+
|
|
471
|
+
# Check RTMPose CPU
|
|
472
|
+
try:
|
|
473
|
+
from kinemotion.core.rtmpose_cpu import (
|
|
474
|
+
OptimizedCPUTracker as _RTMPoseCPU,
|
|
475
|
+
) # type: ignore
|
|
476
|
+
|
|
477
|
+
_ = _RTMPoseCPU # Mark as intentionally used for availability check
|
|
478
|
+
|
|
479
|
+
available.append("rtmpose-cpu")
|
|
480
|
+
except ImportError:
|
|
481
|
+
pass
|
|
482
|
+
|
|
483
|
+
# Check CUDA
|
|
484
|
+
try:
|
|
485
|
+
import torch
|
|
486
|
+
|
|
487
|
+
if torch.cuda.is_available():
|
|
488
|
+
from kinemotion.core.rtmpose_wrapper import (
|
|
489
|
+
RTMPoseWrapper as _RTMPoseWrapper,
|
|
490
|
+
) # type: ignore
|
|
491
|
+
|
|
492
|
+
_ = _RTMPoseWrapper # Mark as intentionally used for availability check
|
|
493
|
+
|
|
494
|
+
available.append("rtmpose-cuda")
|
|
495
|
+
except ImportError:
|
|
496
|
+
pass
|
|
497
|
+
|
|
498
|
+
# Check CoreML (Apple Silicon)
|
|
499
|
+
import sys
|
|
500
|
+
|
|
501
|
+
if sys.platform == "darwin":
|
|
502
|
+
try:
|
|
503
|
+
from kinemotion.core.rtmpose_wrapper import (
|
|
504
|
+
RTMPoseWrapper as _RTMPoseWrapperMPS,
|
|
505
|
+
) # type: ignore
|
|
506
|
+
|
|
507
|
+
_ = _RTMPoseWrapperMPS # Mark as intentionally used for availability check
|
|
508
|
+
|
|
509
|
+
available.append("rtmpose-coreml")
|
|
510
|
+
except ImportError:
|
|
511
|
+
pass
|
|
512
|
+
|
|
513
|
+
return available
|
|
514
|
+
|
|
515
|
+
@classmethod
|
|
516
|
+
def get_backend_info(cls, backend: str) -> dict[str, str]:
|
|
517
|
+
"""Get information about a backend.
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
backend: Backend name
|
|
521
|
+
|
|
522
|
+
Returns:
|
|
523
|
+
Dictionary with backend information
|
|
524
|
+
"""
|
|
525
|
+
info = {
|
|
526
|
+
"mediapipe": {
|
|
527
|
+
"name": "MediaPipe",
|
|
528
|
+
"description": "Baseline pose tracking using MediaPipe Tasks API",
|
|
529
|
+
"performance": "~48 FPS",
|
|
530
|
+
"accuracy": "Baseline (reference)",
|
|
531
|
+
"requirements": "mediapipe package",
|
|
532
|
+
},
|
|
533
|
+
"rtmpose-cpu": {
|
|
534
|
+
"name": "RTMPose CPU",
|
|
535
|
+
"description": "Optimized CPU implementation with ONNX Runtime",
|
|
536
|
+
"performance": "~40-68 FPS (134% of MediaPipe)",
|
|
537
|
+
"accuracy": "9-12px mean difference (1-5% metric accuracy)",
|
|
538
|
+
"requirements": "rtmlib package",
|
|
539
|
+
},
|
|
540
|
+
"rtmpose-cuda": {
|
|
541
|
+
"name": "RTMPose CUDA",
|
|
542
|
+
"description": "NVIDIA GPU acceleration with CUDA",
|
|
543
|
+
"performance": "~133 FPS (271% of MediaPipe)",
|
|
544
|
+
"accuracy": "9-12px mean difference (1-5% metric accuracy)",
|
|
545
|
+
"requirements": "rtmlib + CUDA-capable GPU",
|
|
546
|
+
},
|
|
547
|
+
"rtmpose-coreml": {
|
|
548
|
+
"name": "RTMPose CoreML",
|
|
549
|
+
"description": "Apple Silicon acceleration with CoreML",
|
|
550
|
+
"performance": "~42 FPS (94% of MediaPipe)",
|
|
551
|
+
"accuracy": "9-12px mean difference (1-5% metric accuracy)",
|
|
552
|
+
"requirements": "rtmlib + Apple Silicon",
|
|
553
|
+
},
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
normalized = cls._normalize_backend_name(backend)
|
|
557
|
+
return info.get(normalized, {})
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def get_tracker_info(tracker: object) -> str:
|
|
561
|
+
"""Get detailed information about a pose tracker instance.
|
|
562
|
+
|
|
563
|
+
Args:
|
|
564
|
+
tracker: Pose tracker instance
|
|
565
|
+
|
|
566
|
+
Returns:
|
|
567
|
+
Formatted string with tracker details
|
|
568
|
+
"""
|
|
569
|
+
tracker_class = type(tracker).__name__
|
|
570
|
+
module = type(tracker).__module__
|
|
571
|
+
|
|
572
|
+
info = f"{tracker_class} (from {module})"
|
|
573
|
+
|
|
574
|
+
# Add backend-specific details
|
|
575
|
+
if tracker_class == "MediaPipePoseTracker":
|
|
576
|
+
info += " [MediaPipe Tasks API]"
|
|
577
|
+
elif tracker_class == "OptimizedCPUTracker":
|
|
578
|
+
# Check if ONNX Runtime has CUDA
|
|
579
|
+
try:
|
|
580
|
+
import onnxruntime as ort
|
|
581
|
+
|
|
582
|
+
providers = ort.get_available_providers()
|
|
583
|
+
if "CUDAExecutionProvider" in providers:
|
|
584
|
+
# Check what providers the session is actually using
|
|
585
|
+
det_session = getattr(tracker, "det_session", None)
|
|
586
|
+
if det_session is not None:
|
|
587
|
+
active_providers = det_session.get_providers()
|
|
588
|
+
if "CUDAExecutionProvider" in active_providers:
|
|
589
|
+
info += " [ONNX Runtime: CUDA]"
|
|
590
|
+
else:
|
|
591
|
+
info += " [ONNX Runtime: CPU]"
|
|
592
|
+
else:
|
|
593
|
+
info += " [ONNX Runtime]"
|
|
594
|
+
else:
|
|
595
|
+
info += " [ONNX Runtime: CPU]"
|
|
596
|
+
except ImportError:
|
|
597
|
+
info += " [ONNX Runtime]"
|
|
598
|
+
elif tracker_class == "RTMPoseWrapper":
|
|
599
|
+
device = getattr(tracker, "device", None)
|
|
600
|
+
if device:
|
|
601
|
+
if device == "cuda":
|
|
602
|
+
try:
|
|
603
|
+
import torch
|
|
604
|
+
|
|
605
|
+
if torch.cuda.is_available():
|
|
606
|
+
device_name = torch.cuda.get_device_name(0)
|
|
607
|
+
info += f" [PyTorch CUDA: {device_name}]"
|
|
608
|
+
else:
|
|
609
|
+
info += " [PyTorch: CPU fallback]"
|
|
610
|
+
except ImportError:
|
|
611
|
+
info += " [PyTorch CUDA]"
|
|
612
|
+
elif device == "mps":
|
|
613
|
+
info += " [PyTorch: Apple Silicon GPU]"
|
|
614
|
+
else:
|
|
615
|
+
info += f" [PyTorch: {device}]"
|
|
616
|
+
else:
|
|
617
|
+
info += " [PyTorch]"
|
|
618
|
+
|
|
619
|
+
return info
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
def _extract_landmarks_from_results(
|
|
623
|
+
pose_landmarks: mp.tasks.vision.components.containers.NormalizedLandmark, # type: ignore[valid-type]
|
|
624
|
+
) -> dict[str, tuple[float, float, float]]:
|
|
625
|
+
"""Extract kinemotion landmarks from pose landmarker result.
|
|
626
|
+
|
|
627
|
+
Args:
|
|
628
|
+
pose_landmarks: MediaPipe pose landmarks (list of 33 landmarks)
|
|
629
|
+
|
|
630
|
+
Returns:
|
|
631
|
+
Dictionary mapping landmark names to (x, y, visibility) tuples
|
|
632
|
+
"""
|
|
633
|
+
landmarks: dict[str, tuple[float, float, float]] = {}
|
|
634
|
+
|
|
635
|
+
for name in KINEMOTION_LANDMARKS:
|
|
636
|
+
idx = LANDMARK_INDICES[name]
|
|
637
|
+
if idx < len(pose_landmarks):
|
|
638
|
+
lm = pose_landmarks[idx]
|
|
639
|
+
# Tasks API uses presence in addition to visibility
|
|
640
|
+
# Use visibility for consistency with Solution API
|
|
641
|
+
visibility = getattr(lm, "visibility", 1.0)
|
|
642
|
+
landmarks[name] = (lm.x, lm.y, visibility)
|
|
643
|
+
|
|
644
|
+
return landmarks
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
# Legacy compatibility aliases for Solution API enum values
|
|
648
|
+
class _LegacyPoseLandmark:
|
|
649
|
+
"""Compatibility shim for Solution API enum values."""
|
|
650
|
+
|
|
651
|
+
LEFT_ANKLE = 27
|
|
652
|
+
RIGHT_ANKLE = 28
|
|
653
|
+
LEFT_HEEL = 29
|
|
654
|
+
RIGHT_HEEL = 30
|
|
655
|
+
LEFT_FOOT_INDEX = 31
|
|
656
|
+
RIGHT_FOOT_INDEX = 32
|
|
657
|
+
LEFT_HIP = 23
|
|
658
|
+
RIGHT_HIP = 24
|
|
659
|
+
LEFT_SHOULDER = 11
|
|
660
|
+
RIGHT_SHOULDER = 12
|
|
661
|
+
NOSE = 0
|
|
662
|
+
LEFT_KNEE = 25
|
|
663
|
+
RIGHT_KNEE = 26
|
|
664
|
+
|
|
665
|
+
|
|
666
|
+
PoseLandmark = _LegacyPoseLandmark
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def compute_center_of_mass(
|
|
670
|
+
landmarks: dict[str, tuple[float, float, float]],
|
|
671
|
+
visibility_threshold: float = 0.5,
|
|
672
|
+
) -> tuple[float, float, float]:
|
|
673
|
+
"""
|
|
674
|
+
Compute approximate center of mass (CoM) from body landmarks.
|
|
675
|
+
|
|
676
|
+
Uses biomechanical segment weights based on Dempster's body segment parameters:
|
|
677
|
+
- Head: 8% of body mass (represented by nose)
|
|
678
|
+
- Trunk (shoulders to hips): 50% of body mass
|
|
679
|
+
- Thighs: 2 × 10% = 20% of body mass
|
|
680
|
+
- Legs (knees to ankles): 2 × 5% = 10% of body mass
|
|
681
|
+
- Feet: 2 × 1.5% = 3% of body mass
|
|
682
|
+
|
|
683
|
+
The CoM is estimated as a weighted average of these segments, with
|
|
684
|
+
weights corresponding to their proportion of total body mass.
|
|
685
|
+
|
|
686
|
+
Args:
|
|
687
|
+
landmarks: Dictionary of landmark positions (x, y, visibility)
|
|
688
|
+
visibility_threshold: Minimum visibility to include landmark in calculation
|
|
689
|
+
|
|
690
|
+
Returns:
|
|
691
|
+
(x, y, visibility) tuple for estimated CoM position
|
|
692
|
+
visibility = average visibility of all segments used
|
|
693
|
+
"""
|
|
694
|
+
segments: list = []
|
|
695
|
+
weights: list = []
|
|
696
|
+
visibilities: list = []
|
|
697
|
+
|
|
698
|
+
# Add body segments
|
|
699
|
+
_add_head_segment(segments, weights, visibilities, landmarks, visibility_threshold)
|
|
700
|
+
_add_trunk_segment(segments, weights, visibilities, landmarks, visibility_threshold)
|
|
701
|
+
|
|
702
|
+
# Add bilateral limb segments
|
|
703
|
+
for side in ["left", "right"]:
|
|
704
|
+
_add_limb_segment(
|
|
705
|
+
segments,
|
|
706
|
+
weights,
|
|
707
|
+
visibilities,
|
|
708
|
+
landmarks,
|
|
709
|
+
side,
|
|
710
|
+
"hip",
|
|
711
|
+
"knee",
|
|
712
|
+
0.10,
|
|
713
|
+
visibility_threshold,
|
|
714
|
+
)
|
|
715
|
+
_add_limb_segment(
|
|
716
|
+
segments,
|
|
717
|
+
weights,
|
|
718
|
+
visibilities,
|
|
719
|
+
landmarks,
|
|
720
|
+
side,
|
|
721
|
+
"knee",
|
|
722
|
+
"ankle",
|
|
723
|
+
0.05,
|
|
724
|
+
visibility_threshold,
|
|
725
|
+
)
|
|
726
|
+
_add_foot_segment(segments, weights, visibilities, landmarks, side, visibility_threshold)
|
|
727
|
+
|
|
728
|
+
# Fallback if no segments found
|
|
729
|
+
if not segments:
|
|
730
|
+
if "left_hip" in landmarks and "right_hip" in landmarks:
|
|
731
|
+
lh_x, lh_y, lh_vis = landmarks["left_hip"]
|
|
732
|
+
rh_x, rh_y, rh_vis = landmarks["right_hip"]
|
|
733
|
+
return ((lh_x + rh_x) / 2, (lh_y + rh_y) / 2, (lh_vis + rh_vis) / 2)
|
|
734
|
+
return (0.5, 0.5, 0.0)
|
|
735
|
+
|
|
736
|
+
# Normalize weights and compute weighted average
|
|
737
|
+
total_weight = sum(weights)
|
|
738
|
+
normalized_weights = [w / total_weight for w in weights]
|
|
739
|
+
|
|
740
|
+
com_x = float(sum(p[0] * w for p, w in zip(segments, normalized_weights, strict=True)))
|
|
741
|
+
com_y = float(sum(p[1] * w for p, w in zip(segments, normalized_weights, strict=True)))
|
|
742
|
+
com_visibility = float(np.mean(visibilities)) if visibilities else 0.0
|
|
743
|
+
|
|
744
|
+
return (com_x, com_y, com_visibility)
|
|
89
745
|
|
|
90
746
|
|
|
91
747
|
def _add_head_segment(
|
|
@@ -180,81 +836,3 @@ def _add_foot_segment(
|
|
|
180
836
|
segments.append((foot_x, foot_y))
|
|
181
837
|
weights.append(0.015)
|
|
182
838
|
visibilities.append(foot_vis)
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
def compute_center_of_mass(
|
|
186
|
-
landmarks: dict[str, tuple[float, float, float]],
|
|
187
|
-
visibility_threshold: float = 0.5,
|
|
188
|
-
) -> tuple[float, float, float]:
|
|
189
|
-
"""
|
|
190
|
-
Compute approximate center of mass (CoM) from body landmarks.
|
|
191
|
-
|
|
192
|
-
Uses biomechanical segment weights based on Dempster's body segment parameters:
|
|
193
|
-
- Head: 8% of body mass (represented by nose)
|
|
194
|
-
- Trunk (shoulders to hips): 50% of body mass
|
|
195
|
-
- Thighs: 2 × 10% = 20% of body mass
|
|
196
|
-
- Legs (knees to ankles): 2 × 5% = 10% of body mass
|
|
197
|
-
- Feet: 2 × 1.5% = 3% of body mass
|
|
198
|
-
|
|
199
|
-
The CoM is estimated as a weighted average of these segments, with
|
|
200
|
-
weights corresponding to their proportion of total body mass.
|
|
201
|
-
|
|
202
|
-
Args:
|
|
203
|
-
landmarks: Dictionary of landmark positions (x, y, visibility)
|
|
204
|
-
visibility_threshold: Minimum visibility to include landmark in calculation
|
|
205
|
-
|
|
206
|
-
Returns:
|
|
207
|
-
(x, y, visibility) tuple for estimated CoM position
|
|
208
|
-
visibility = average visibility of all segments used
|
|
209
|
-
"""
|
|
210
|
-
segments: list = []
|
|
211
|
-
weights: list = []
|
|
212
|
-
visibilities: list = []
|
|
213
|
-
|
|
214
|
-
# Add body segments
|
|
215
|
-
_add_head_segment(segments, weights, visibilities, landmarks, visibility_threshold)
|
|
216
|
-
_add_trunk_segment(segments, weights, visibilities, landmarks, visibility_threshold)
|
|
217
|
-
|
|
218
|
-
# Add bilateral limb segments
|
|
219
|
-
for side in ["left", "right"]:
|
|
220
|
-
_add_limb_segment(
|
|
221
|
-
segments,
|
|
222
|
-
weights,
|
|
223
|
-
visibilities,
|
|
224
|
-
landmarks,
|
|
225
|
-
side,
|
|
226
|
-
"hip",
|
|
227
|
-
"knee",
|
|
228
|
-
0.10,
|
|
229
|
-
visibility_threshold,
|
|
230
|
-
)
|
|
231
|
-
_add_limb_segment(
|
|
232
|
-
segments,
|
|
233
|
-
weights,
|
|
234
|
-
visibilities,
|
|
235
|
-
landmarks,
|
|
236
|
-
side,
|
|
237
|
-
"knee",
|
|
238
|
-
"ankle",
|
|
239
|
-
0.05,
|
|
240
|
-
visibility_threshold,
|
|
241
|
-
)
|
|
242
|
-
_add_foot_segment(segments, weights, visibilities, landmarks, side, visibility_threshold)
|
|
243
|
-
|
|
244
|
-
# Fallback if no segments found
|
|
245
|
-
if not segments:
|
|
246
|
-
if "left_hip" in landmarks and "right_hip" in landmarks:
|
|
247
|
-
lh_x, lh_y, lh_vis = landmarks["left_hip"]
|
|
248
|
-
rh_x, rh_y, rh_vis = landmarks["right_hip"]
|
|
249
|
-
return ((lh_x + rh_x) / 2, (lh_y + rh_y) / 2, (lh_vis + rh_vis) / 2)
|
|
250
|
-
return (0.5, 0.5, 0.0)
|
|
251
|
-
|
|
252
|
-
# Normalize weights and compute weighted average
|
|
253
|
-
total_weight = sum(weights)
|
|
254
|
-
normalized_weights = [w / total_weight for w in weights]
|
|
255
|
-
|
|
256
|
-
com_x = float(sum(p[0] * w for p, w in zip(segments, normalized_weights, strict=True)))
|
|
257
|
-
com_y = float(sum(p[1] * w for p, w in zip(segments, normalized_weights, strict=True)))
|
|
258
|
-
com_visibility = float(np.mean(visibilities)) if visibilities else 0.0
|
|
259
|
-
|
|
260
|
-
return (com_x, com_y, com_visibility)
|