kinemotion 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kinemotion might be problematic. Click here for more details.
- kinemotion/__init__.py +3 -0
- {dropjump → kinemotion}/cli.py +141 -35
- kinemotion/core/__init__.py +40 -0
- kinemotion/core/filtering.py +345 -0
- kinemotion/core/pose.py +221 -0
- {dropjump → kinemotion/core}/smoothing.py +144 -0
- kinemotion/core/video_io.py +122 -0
- kinemotion/dropjump/__init__.py +29 -0
- dropjump/contact_detection.py → kinemotion/dropjump/analysis.py +81 -2
- dropjump/video_io.py → kinemotion/dropjump/debug_overlay.py +49 -140
- {dropjump → kinemotion/dropjump}/kinematics.py +4 -1
- {kinemotion-0.1.0.dist-info → kinemotion-0.2.0.dist-info}/METADATA +162 -26
- kinemotion-0.2.0.dist-info/RECORD +16 -0
- kinemotion-0.2.0.dist-info/entry_points.txt +2 -0
- dropjump/__init__.py +0 -3
- dropjump/pose_tracker.py +0 -74
- kinemotion-0.1.0.dist-info/RECORD +0 -12
- kinemotion-0.1.0.dist-info/entry_points.txt +0 -2
- {kinemotion-0.1.0.dist-info → kinemotion-0.2.0.dist-info}/WHEEL +0 -0
- {kinemotion-0.1.0.dist-info → kinemotion-0.2.0.dist-info}/licenses/LICENSE +0 -0
kinemotion/core/pose.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""Pose tracking using MediaPipe Pose."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
import cv2
|
|
5
|
+
import mediapipe as mp
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PoseTracker:
|
|
10
|
+
"""Tracks human pose landmarks in video frames using MediaPipe."""
|
|
11
|
+
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
min_detection_confidence: float = 0.5,
|
|
15
|
+
min_tracking_confidence: float = 0.5,
|
|
16
|
+
):
|
|
17
|
+
"""
|
|
18
|
+
Initialize the pose tracker.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
min_detection_confidence: Minimum confidence for pose detection
|
|
22
|
+
min_tracking_confidence: Minimum confidence for pose tracking
|
|
23
|
+
"""
|
|
24
|
+
self.mp_pose = mp.solutions.pose
|
|
25
|
+
self.pose = self.mp_pose.Pose(
|
|
26
|
+
min_detection_confidence=min_detection_confidence,
|
|
27
|
+
min_tracking_confidence=min_tracking_confidence,
|
|
28
|
+
model_complexity=1,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def process_frame(
|
|
32
|
+
self, frame: np.ndarray
|
|
33
|
+
) -> dict[str, tuple[float, float, float]] | None:
|
|
34
|
+
"""
|
|
35
|
+
Process a single frame and extract pose landmarks.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
frame: BGR image frame
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Dictionary mapping landmark names to (x, y, visibility) tuples,
|
|
42
|
+
or None if no pose detected. Coordinates are normalized (0-1).
|
|
43
|
+
"""
|
|
44
|
+
# Convert BGR to RGB
|
|
45
|
+
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
46
|
+
|
|
47
|
+
# Process the frame
|
|
48
|
+
results = self.pose.process(rgb_frame)
|
|
49
|
+
|
|
50
|
+
if not results.pose_landmarks:
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
# Extract key landmarks for feet tracking and CoM estimation
|
|
54
|
+
landmarks = {}
|
|
55
|
+
landmark_names = {
|
|
56
|
+
# Feet landmarks
|
|
57
|
+
self.mp_pose.PoseLandmark.LEFT_ANKLE: "left_ankle",
|
|
58
|
+
self.mp_pose.PoseLandmark.RIGHT_ANKLE: "right_ankle",
|
|
59
|
+
self.mp_pose.PoseLandmark.LEFT_HEEL: "left_heel",
|
|
60
|
+
self.mp_pose.PoseLandmark.RIGHT_HEEL: "right_heel",
|
|
61
|
+
self.mp_pose.PoseLandmark.LEFT_FOOT_INDEX: "left_foot_index",
|
|
62
|
+
self.mp_pose.PoseLandmark.RIGHT_FOOT_INDEX: "right_foot_index",
|
|
63
|
+
# Torso landmarks for CoM estimation
|
|
64
|
+
self.mp_pose.PoseLandmark.LEFT_HIP: "left_hip",
|
|
65
|
+
self.mp_pose.PoseLandmark.RIGHT_HIP: "right_hip",
|
|
66
|
+
self.mp_pose.PoseLandmark.LEFT_SHOULDER: "left_shoulder",
|
|
67
|
+
self.mp_pose.PoseLandmark.RIGHT_SHOULDER: "right_shoulder",
|
|
68
|
+
# Additional landmarks for better CoM estimation
|
|
69
|
+
self.mp_pose.PoseLandmark.NOSE: "nose",
|
|
70
|
+
self.mp_pose.PoseLandmark.LEFT_KNEE: "left_knee",
|
|
71
|
+
self.mp_pose.PoseLandmark.RIGHT_KNEE: "right_knee",
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
for landmark_id, name in landmark_names.items():
|
|
75
|
+
lm = results.pose_landmarks.landmark[landmark_id]
|
|
76
|
+
landmarks[name] = (lm.x, lm.y, lm.visibility)
|
|
77
|
+
|
|
78
|
+
return landmarks
|
|
79
|
+
|
|
80
|
+
def close(self) -> None:
|
|
81
|
+
"""Release resources."""
|
|
82
|
+
self.pose.close()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def compute_center_of_mass(
|
|
86
|
+
landmarks: dict[str, tuple[float, float, float]],
|
|
87
|
+
visibility_threshold: float = 0.5,
|
|
88
|
+
) -> tuple[float, float, float]:
|
|
89
|
+
"""
|
|
90
|
+
Compute approximate center of mass (CoM) from body landmarks.
|
|
91
|
+
|
|
92
|
+
Uses biomechanical segment weights based on Dempster's body segment parameters:
|
|
93
|
+
- Head: 8% of body mass (represented by nose)
|
|
94
|
+
- Trunk (shoulders to hips): 50% of body mass
|
|
95
|
+
- Thighs: 2 × 10% = 20% of body mass
|
|
96
|
+
- Legs (knees to ankles): 2 × 5% = 10% of body mass
|
|
97
|
+
- Feet: 2 × 1.5% = 3% of body mass
|
|
98
|
+
|
|
99
|
+
The CoM is estimated as a weighted average of these segments, with
|
|
100
|
+
weights corresponding to their proportion of total body mass.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
landmarks: Dictionary of landmark positions (x, y, visibility)
|
|
104
|
+
visibility_threshold: Minimum visibility to include landmark in calculation
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
(x, y, visibility) tuple for estimated CoM position
|
|
108
|
+
visibility = average visibility of all segments used
|
|
109
|
+
"""
|
|
110
|
+
# Define segment representatives and their weights (as fraction of body mass)
|
|
111
|
+
# Each segment uses midpoint or average of its bounding landmarks
|
|
112
|
+
segments = []
|
|
113
|
+
segment_weights = []
|
|
114
|
+
visibilities = []
|
|
115
|
+
|
|
116
|
+
# Head segment: 8% (use nose as proxy)
|
|
117
|
+
if "nose" in landmarks:
|
|
118
|
+
x, y, vis = landmarks["nose"]
|
|
119
|
+
if vis > visibility_threshold:
|
|
120
|
+
segments.append((x, y))
|
|
121
|
+
segment_weights.append(0.08)
|
|
122
|
+
visibilities.append(vis)
|
|
123
|
+
|
|
124
|
+
# Trunk segment: 50% (midpoint between shoulders and hips)
|
|
125
|
+
trunk_landmarks = ["left_shoulder", "right_shoulder", "left_hip", "right_hip"]
|
|
126
|
+
trunk_positions = [
|
|
127
|
+
(x, y, vis)
|
|
128
|
+
for key in trunk_landmarks
|
|
129
|
+
if key in landmarks
|
|
130
|
+
for x, y, vis in [landmarks[key]]
|
|
131
|
+
if vis > visibility_threshold
|
|
132
|
+
]
|
|
133
|
+
if len(trunk_positions) >= 2:
|
|
134
|
+
trunk_x = float(np.mean([pos[0] for pos in trunk_positions]))
|
|
135
|
+
trunk_y = float(np.mean([pos[1] for pos in trunk_positions]))
|
|
136
|
+
trunk_vis = float(np.mean([pos[2] for pos in trunk_positions]))
|
|
137
|
+
segments.append((trunk_x, trunk_y))
|
|
138
|
+
segment_weights.append(0.50)
|
|
139
|
+
visibilities.append(trunk_vis)
|
|
140
|
+
|
|
141
|
+
# Thigh segment: 20% total (midpoint hip to knee for each leg)
|
|
142
|
+
for side in ["left", "right"]:
|
|
143
|
+
hip_key = f"{side}_hip"
|
|
144
|
+
knee_key = f"{side}_knee"
|
|
145
|
+
if hip_key in landmarks and knee_key in landmarks:
|
|
146
|
+
hip_x, hip_y, hip_vis = landmarks[hip_key]
|
|
147
|
+
knee_x, knee_y, knee_vis = landmarks[knee_key]
|
|
148
|
+
if hip_vis > visibility_threshold and knee_vis > visibility_threshold:
|
|
149
|
+
thigh_x = (hip_x + knee_x) / 2
|
|
150
|
+
thigh_y = (hip_y + knee_y) / 2
|
|
151
|
+
thigh_vis = (hip_vis + knee_vis) / 2
|
|
152
|
+
segments.append((thigh_x, thigh_y))
|
|
153
|
+
segment_weights.append(0.10) # 10% per leg
|
|
154
|
+
visibilities.append(thigh_vis)
|
|
155
|
+
|
|
156
|
+
# Lower leg segment: 10% total (midpoint knee to ankle for each leg)
|
|
157
|
+
for side in ["left", "right"]:
|
|
158
|
+
knee_key = f"{side}_knee"
|
|
159
|
+
ankle_key = f"{side}_ankle"
|
|
160
|
+
if knee_key in landmarks and ankle_key in landmarks:
|
|
161
|
+
knee_x, knee_y, knee_vis = landmarks[knee_key]
|
|
162
|
+
ankle_x, ankle_y, ankle_vis = landmarks[ankle_key]
|
|
163
|
+
if knee_vis > visibility_threshold and ankle_vis > visibility_threshold:
|
|
164
|
+
leg_x = (knee_x + ankle_x) / 2
|
|
165
|
+
leg_y = (knee_y + ankle_y) / 2
|
|
166
|
+
leg_vis = (knee_vis + ankle_vis) / 2
|
|
167
|
+
segments.append((leg_x, leg_y))
|
|
168
|
+
segment_weights.append(0.05) # 5% per leg
|
|
169
|
+
visibilities.append(leg_vis)
|
|
170
|
+
|
|
171
|
+
# Foot segment: 3% total (average of ankle, heel, foot_index)
|
|
172
|
+
for side in ["left", "right"]:
|
|
173
|
+
foot_keys = [f"{side}_ankle", f"{side}_heel", f"{side}_foot_index"]
|
|
174
|
+
foot_positions = [
|
|
175
|
+
(x, y, vis)
|
|
176
|
+
for key in foot_keys
|
|
177
|
+
if key in landmarks
|
|
178
|
+
for x, y, vis in [landmarks[key]]
|
|
179
|
+
if vis > visibility_threshold
|
|
180
|
+
]
|
|
181
|
+
if foot_positions:
|
|
182
|
+
foot_x = float(np.mean([pos[0] for pos in foot_positions]))
|
|
183
|
+
foot_y = float(np.mean([pos[1] for pos in foot_positions]))
|
|
184
|
+
foot_vis = float(np.mean([pos[2] for pos in foot_positions]))
|
|
185
|
+
segments.append((foot_x, foot_y))
|
|
186
|
+
segment_weights.append(0.015) # 1.5% per foot
|
|
187
|
+
visibilities.append(foot_vis)
|
|
188
|
+
|
|
189
|
+
# If no segments found, fall back to hip average
|
|
190
|
+
if not segments:
|
|
191
|
+
if "left_hip" in landmarks and "right_hip" in landmarks:
|
|
192
|
+
lh_x, lh_y, lh_vis = landmarks["left_hip"]
|
|
193
|
+
rh_x, rh_y, rh_vis = landmarks["right_hip"]
|
|
194
|
+
return (
|
|
195
|
+
(lh_x + rh_x) / 2,
|
|
196
|
+
(lh_y + rh_y) / 2,
|
|
197
|
+
(lh_vis + rh_vis) / 2,
|
|
198
|
+
)
|
|
199
|
+
# Ultimate fallback: center of frame
|
|
200
|
+
return (0.5, 0.5, 0.0)
|
|
201
|
+
|
|
202
|
+
# Normalize weights to sum to 1.0
|
|
203
|
+
total_weight = sum(segment_weights)
|
|
204
|
+
normalized_weights = [w / total_weight for w in segment_weights]
|
|
205
|
+
|
|
206
|
+
# Compute weighted average of segment positions
|
|
207
|
+
com_x = float(
|
|
208
|
+
sum(
|
|
209
|
+
pos[0] * weight
|
|
210
|
+
for pos, weight in zip(segments, normalized_weights, strict=True)
|
|
211
|
+
)
|
|
212
|
+
)
|
|
213
|
+
com_y = float(
|
|
214
|
+
sum(
|
|
215
|
+
pos[1] * weight
|
|
216
|
+
for pos, weight in zip(segments, normalized_weights, strict=True)
|
|
217
|
+
)
|
|
218
|
+
)
|
|
219
|
+
com_visibility = float(np.mean(visibilities)) if visibilities else 0.0
|
|
220
|
+
|
|
221
|
+
return (com_x, com_y, com_visibility)
|
|
@@ -4,6 +4,11 @@
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
from scipy.signal import savgol_filter
|
|
6
6
|
|
|
7
|
+
from .filtering import (
|
|
8
|
+
bilateral_temporal_filter,
|
|
9
|
+
reject_outliers,
|
|
10
|
+
)
|
|
11
|
+
|
|
7
12
|
|
|
8
13
|
def smooth_landmarks(
|
|
9
14
|
landmark_sequence: list[dict[str, tuple[float, float, float]] | None],
|
|
@@ -221,3 +226,142 @@ def compute_acceleration_from_derivative(
|
|
|
221
226
|
)
|
|
222
227
|
|
|
223
228
|
return acceleration # type: ignore[no-any-return]
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def smooth_landmarks_advanced(
|
|
232
|
+
landmark_sequence: list[dict[str, tuple[float, float, float]] | None],
|
|
233
|
+
window_length: int = 5,
|
|
234
|
+
polyorder: int = 2,
|
|
235
|
+
use_outlier_rejection: bool = True,
|
|
236
|
+
use_bilateral: bool = False,
|
|
237
|
+
ransac_threshold: float = 0.02,
|
|
238
|
+
bilateral_sigma_spatial: float = 3.0,
|
|
239
|
+
bilateral_sigma_intensity: float = 0.02,
|
|
240
|
+
) -> list[dict[str, tuple[float, float, float]] | None]:
|
|
241
|
+
"""
|
|
242
|
+
Advanced landmark smoothing with outlier rejection and bilateral filtering.
|
|
243
|
+
|
|
244
|
+
Combines multiple techniques for robust smoothing:
|
|
245
|
+
1. Outlier rejection (RANSAC + median filtering)
|
|
246
|
+
2. Optional bilateral filtering (edge-preserving)
|
|
247
|
+
3. Savitzky-Golay smoothing
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
landmark_sequence: List of landmark dictionaries from each frame
|
|
251
|
+
window_length: Length of filter window (must be odd, >= polyorder + 2)
|
|
252
|
+
polyorder: Order of polynomial used to fit samples
|
|
253
|
+
use_outlier_rejection: Apply outlier detection and removal
|
|
254
|
+
use_bilateral: Use bilateral filter instead of Savitzky-Golay
|
|
255
|
+
ransac_threshold: Threshold for RANSAC outlier detection
|
|
256
|
+
bilateral_sigma_spatial: Spatial sigma for bilateral filter
|
|
257
|
+
bilateral_sigma_intensity: Intensity sigma for bilateral filter
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
Smoothed landmark sequence with same structure as input
|
|
261
|
+
"""
|
|
262
|
+
if len(landmark_sequence) < window_length:
|
|
263
|
+
# Not enough frames to smooth effectively
|
|
264
|
+
return landmark_sequence
|
|
265
|
+
|
|
266
|
+
# Ensure window_length is odd
|
|
267
|
+
if window_length % 2 == 0:
|
|
268
|
+
window_length += 1
|
|
269
|
+
|
|
270
|
+
# Extract landmark names from first valid frame
|
|
271
|
+
landmark_names = None
|
|
272
|
+
for frame_landmarks in landmark_sequence:
|
|
273
|
+
if frame_landmarks is not None:
|
|
274
|
+
landmark_names = list(frame_landmarks.keys())
|
|
275
|
+
break
|
|
276
|
+
|
|
277
|
+
if landmark_names is None:
|
|
278
|
+
return landmark_sequence
|
|
279
|
+
|
|
280
|
+
# Build arrays for each landmark coordinate
|
|
281
|
+
smoothed_sequence: list[dict[str, tuple[float, float, float]] | None] = []
|
|
282
|
+
|
|
283
|
+
for landmark_name in landmark_names:
|
|
284
|
+
# Extract x, y coordinates for this landmark across all frames
|
|
285
|
+
x_coords = []
|
|
286
|
+
y_coords = []
|
|
287
|
+
valid_frames = []
|
|
288
|
+
|
|
289
|
+
for i, frame_landmarks in enumerate(landmark_sequence):
|
|
290
|
+
if frame_landmarks is not None and landmark_name in frame_landmarks:
|
|
291
|
+
x, y, vis = frame_landmarks[landmark_name]
|
|
292
|
+
x_coords.append(x)
|
|
293
|
+
y_coords.append(y)
|
|
294
|
+
valid_frames.append(i)
|
|
295
|
+
|
|
296
|
+
if len(x_coords) < window_length:
|
|
297
|
+
continue
|
|
298
|
+
|
|
299
|
+
x_array = np.array(x_coords)
|
|
300
|
+
y_array = np.array(y_coords)
|
|
301
|
+
|
|
302
|
+
# Step 1: Outlier rejection
|
|
303
|
+
if use_outlier_rejection:
|
|
304
|
+
x_array, _ = reject_outliers(
|
|
305
|
+
x_array,
|
|
306
|
+
use_ransac=True,
|
|
307
|
+
use_median=True,
|
|
308
|
+
ransac_threshold=ransac_threshold,
|
|
309
|
+
)
|
|
310
|
+
y_array, _ = reject_outliers(
|
|
311
|
+
y_array,
|
|
312
|
+
use_ransac=True,
|
|
313
|
+
use_median=True,
|
|
314
|
+
ransac_threshold=ransac_threshold,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
# Step 2: Smoothing (bilateral or Savitzky-Golay)
|
|
318
|
+
if use_bilateral:
|
|
319
|
+
x_smooth = bilateral_temporal_filter(
|
|
320
|
+
x_array,
|
|
321
|
+
window_size=window_length,
|
|
322
|
+
sigma_spatial=bilateral_sigma_spatial,
|
|
323
|
+
sigma_intensity=bilateral_sigma_intensity,
|
|
324
|
+
)
|
|
325
|
+
y_smooth = bilateral_temporal_filter(
|
|
326
|
+
y_array,
|
|
327
|
+
window_size=window_length,
|
|
328
|
+
sigma_spatial=bilateral_sigma_spatial,
|
|
329
|
+
sigma_intensity=bilateral_sigma_intensity,
|
|
330
|
+
)
|
|
331
|
+
else:
|
|
332
|
+
# Standard Savitzky-Golay
|
|
333
|
+
x_smooth = savgol_filter(x_array, window_length, polyorder)
|
|
334
|
+
y_smooth = savgol_filter(y_array, window_length, polyorder)
|
|
335
|
+
|
|
336
|
+
# Store smoothed values back
|
|
337
|
+
for idx, frame_idx in enumerate(valid_frames):
|
|
338
|
+
if frame_idx >= len(smoothed_sequence):
|
|
339
|
+
smoothed_sequence.extend(
|
|
340
|
+
[{}] * (frame_idx - len(smoothed_sequence) + 1)
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# Ensure smoothed_sequence[frame_idx] is a dict, not None
|
|
344
|
+
if smoothed_sequence[frame_idx] is None:
|
|
345
|
+
smoothed_sequence[frame_idx] = {}
|
|
346
|
+
|
|
347
|
+
if (
|
|
348
|
+
landmark_name not in smoothed_sequence[frame_idx] # type: ignore[operator]
|
|
349
|
+
and landmark_sequence[frame_idx] is not None
|
|
350
|
+
):
|
|
351
|
+
# Keep original visibility
|
|
352
|
+
orig_vis = landmark_sequence[frame_idx][landmark_name][2] # type: ignore[index]
|
|
353
|
+
smoothed_sequence[frame_idx][landmark_name] = ( # type: ignore[index]
|
|
354
|
+
float(x_smooth[idx]),
|
|
355
|
+
float(y_smooth[idx]),
|
|
356
|
+
orig_vis,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Fill in any missing frames with original data
|
|
360
|
+
for i in range(len(landmark_sequence)):
|
|
361
|
+
if i >= len(smoothed_sequence) or not smoothed_sequence[i]:
|
|
362
|
+
if i < len(smoothed_sequence):
|
|
363
|
+
smoothed_sequence[i] = landmark_sequence[i]
|
|
364
|
+
else:
|
|
365
|
+
smoothed_sequence.append(landmark_sequence[i])
|
|
366
|
+
|
|
367
|
+
return smoothed_sequence
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Generic video I/O functionality for all jump analysis types."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import subprocess
|
|
5
|
+
|
|
6
|
+
import cv2
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class VideoProcessor:
|
|
11
|
+
"""
|
|
12
|
+
Handles video reading and processing.
|
|
13
|
+
|
|
14
|
+
IMPORTANT: This class preserves the exact aspect ratio of the source video.
|
|
15
|
+
No dimensions are hardcoded - all dimensions are extracted from actual frame data.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, video_path: str):
|
|
19
|
+
"""
|
|
20
|
+
Initialize video processor.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
video_path: Path to input video file
|
|
24
|
+
"""
|
|
25
|
+
self.video_path = video_path
|
|
26
|
+
self.cap = cv2.VideoCapture(video_path)
|
|
27
|
+
|
|
28
|
+
if not self.cap.isOpened():
|
|
29
|
+
raise ValueError(f"Could not open video: {video_path}")
|
|
30
|
+
|
|
31
|
+
self.fps = self.cap.get(cv2.CAP_PROP_FPS)
|
|
32
|
+
self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
33
|
+
|
|
34
|
+
# Read first frame to get actual dimensions
|
|
35
|
+
# This is critical for preserving aspect ratio, especially with mobile videos
|
|
36
|
+
# that have rotation metadata. OpenCV properties (CAP_PROP_FRAME_WIDTH/HEIGHT)
|
|
37
|
+
# may return incorrect dimensions, so we read the actual frame data.
|
|
38
|
+
ret, first_frame = self.cap.read()
|
|
39
|
+
if ret:
|
|
40
|
+
# frame.shape is (height, width, channels) - extract actual dimensions
|
|
41
|
+
self.height, self.width = first_frame.shape[:2]
|
|
42
|
+
self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # Reset to beginning
|
|
43
|
+
else:
|
|
44
|
+
# Fallback to video properties if can't read frame
|
|
45
|
+
self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
46
|
+
self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
47
|
+
|
|
48
|
+
# Calculate display dimensions considering SAR (Sample Aspect Ratio)
|
|
49
|
+
# Mobile videos often have non-square pixels encoded in SAR metadata
|
|
50
|
+
# OpenCV doesn't directly expose SAR, but we need to handle display correctly
|
|
51
|
+
self.display_width = self.width
|
|
52
|
+
self.display_height = self.height
|
|
53
|
+
self._calculate_display_dimensions()
|
|
54
|
+
|
|
55
|
+
def _calculate_display_dimensions(self) -> None:
|
|
56
|
+
"""
|
|
57
|
+
Calculate display dimensions by reading SAR metadata from video file.
|
|
58
|
+
|
|
59
|
+
Many mobile videos use non-square pixels (SAR != 1:1), which means
|
|
60
|
+
the encoded dimensions differ from how the video should be displayed.
|
|
61
|
+
We use ffprobe to extract this metadata.
|
|
62
|
+
"""
|
|
63
|
+
try:
|
|
64
|
+
# Use ffprobe to get SAR metadata
|
|
65
|
+
result = subprocess.run(
|
|
66
|
+
[
|
|
67
|
+
"ffprobe",
|
|
68
|
+
"-v",
|
|
69
|
+
"quiet",
|
|
70
|
+
"-print_format",
|
|
71
|
+
"json",
|
|
72
|
+
"-show_streams",
|
|
73
|
+
"-select_streams",
|
|
74
|
+
"v:0",
|
|
75
|
+
self.video_path,
|
|
76
|
+
],
|
|
77
|
+
capture_output=True,
|
|
78
|
+
text=True,
|
|
79
|
+
timeout=5,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
if result.returncode == 0:
|
|
83
|
+
data = json.loads(result.stdout)
|
|
84
|
+
if "streams" in data and len(data["streams"]) > 0:
|
|
85
|
+
stream = data["streams"][0]
|
|
86
|
+
sar_str = stream.get("sample_aspect_ratio", "1:1")
|
|
87
|
+
|
|
88
|
+
# Parse SAR (e.g., "270:473")
|
|
89
|
+
if sar_str and ":" in sar_str:
|
|
90
|
+
sar_parts = sar_str.split(":")
|
|
91
|
+
sar_width = int(sar_parts[0])
|
|
92
|
+
sar_height = int(sar_parts[1])
|
|
93
|
+
|
|
94
|
+
# Calculate display dimensions
|
|
95
|
+
# DAR = (width * SAR_width) / (height * SAR_height)
|
|
96
|
+
if sar_width != sar_height:
|
|
97
|
+
self.display_width = int(
|
|
98
|
+
self.width * sar_width / sar_height
|
|
99
|
+
)
|
|
100
|
+
self.display_height = self.height
|
|
101
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, json.JSONDecodeError):
|
|
102
|
+
# If ffprobe fails, keep original dimensions (square pixels)
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
def read_frame(self) -> np.ndarray | None:
|
|
106
|
+
"""Read next frame from video."""
|
|
107
|
+
ret, frame = self.cap.read()
|
|
108
|
+
return frame if ret else None
|
|
109
|
+
|
|
110
|
+
def reset(self) -> None:
|
|
111
|
+
"""Reset video to beginning."""
|
|
112
|
+
self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
|
|
113
|
+
|
|
114
|
+
def close(self) -> None:
|
|
115
|
+
"""Release video capture."""
|
|
116
|
+
self.cap.release()
|
|
117
|
+
|
|
118
|
+
def __enter__(self) -> "VideoProcessor":
|
|
119
|
+
return self
|
|
120
|
+
|
|
121
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None: # type: ignore[no-untyped-def]
|
|
122
|
+
self.close()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Drop jump analysis module."""
|
|
2
|
+
|
|
3
|
+
from .analysis import (
|
|
4
|
+
ContactState,
|
|
5
|
+
calculate_adaptive_threshold,
|
|
6
|
+
compute_average_foot_position,
|
|
7
|
+
detect_ground_contact,
|
|
8
|
+
find_interpolated_phase_transitions_with_curvature,
|
|
9
|
+
interpolate_threshold_crossing,
|
|
10
|
+
refine_transition_with_curvature,
|
|
11
|
+
)
|
|
12
|
+
from .debug_overlay import DebugOverlayRenderer
|
|
13
|
+
from .kinematics import DropJumpMetrics, calculate_drop_jump_metrics
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
# Contact detection
|
|
17
|
+
"ContactState",
|
|
18
|
+
"detect_ground_contact",
|
|
19
|
+
"compute_average_foot_position",
|
|
20
|
+
"calculate_adaptive_threshold",
|
|
21
|
+
"interpolate_threshold_crossing",
|
|
22
|
+
"refine_transition_with_curvature",
|
|
23
|
+
"find_interpolated_phase_transitions_with_curvature",
|
|
24
|
+
# Metrics
|
|
25
|
+
"DropJumpMetrics",
|
|
26
|
+
"calculate_drop_jump_metrics",
|
|
27
|
+
# Debug overlay
|
|
28
|
+
"DebugOverlayRenderer",
|
|
29
|
+
]
|
|
@@ -4,7 +4,7 @@ from enum import Enum
|
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
|
|
7
|
-
from .smoothing import (
|
|
7
|
+
from ..core.smoothing import (
|
|
8
8
|
compute_acceleration_from_derivative,
|
|
9
9
|
compute_velocity_from_derivative,
|
|
10
10
|
)
|
|
@@ -18,6 +18,77 @@ class ContactState(Enum):
|
|
|
18
18
|
UNKNOWN = "unknown"
|
|
19
19
|
|
|
20
20
|
|
|
21
|
+
def calculate_adaptive_threshold(
|
|
22
|
+
positions: np.ndarray,
|
|
23
|
+
fps: float,
|
|
24
|
+
baseline_duration: float = 3.0,
|
|
25
|
+
multiplier: float = 1.5,
|
|
26
|
+
smoothing_window: int = 5,
|
|
27
|
+
polyorder: int = 2,
|
|
28
|
+
) -> float:
|
|
29
|
+
"""
|
|
30
|
+
Calculate adaptive velocity threshold based on baseline motion characteristics.
|
|
31
|
+
|
|
32
|
+
Analyzes the first few seconds of video (assumed to be relatively stationary,
|
|
33
|
+
e.g., athlete standing on box) to determine the noise floor, then sets threshold
|
|
34
|
+
as a multiple of this baseline noise.
|
|
35
|
+
|
|
36
|
+
This adapts to:
|
|
37
|
+
- Different camera distances (closer = more pixel movement)
|
|
38
|
+
- Different lighting conditions (affects tracking quality)
|
|
39
|
+
- Different frame rates (higher fps = smoother motion)
|
|
40
|
+
- Video compression artifacts
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
positions: Array of vertical positions (0-1 normalized)
|
|
44
|
+
fps: Video frame rate
|
|
45
|
+
baseline_duration: Duration in seconds to analyze for baseline (default: 3.0s)
|
|
46
|
+
multiplier: Factor above baseline noise to set threshold (default: 1.5x)
|
|
47
|
+
smoothing_window: Window size for velocity computation
|
|
48
|
+
polyorder: Polynomial order for Savitzky-Golay filter (default: 2)
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
Adaptive velocity threshold value
|
|
52
|
+
|
|
53
|
+
Example:
|
|
54
|
+
At 30fps with 3s baseline:
|
|
55
|
+
- Analyzes first 90 frames
|
|
56
|
+
- Computes velocity for this "stationary" period
|
|
57
|
+
- 95th percentile velocity = 0.012 (noise level)
|
|
58
|
+
- Threshold = 0.012 × 1.5 = 0.018
|
|
59
|
+
"""
|
|
60
|
+
if len(positions) < 2:
|
|
61
|
+
return 0.02 # Fallback to default
|
|
62
|
+
|
|
63
|
+
# Calculate number of frames for baseline analysis
|
|
64
|
+
baseline_frames = int(fps * baseline_duration)
|
|
65
|
+
baseline_frames = min(baseline_frames, len(positions))
|
|
66
|
+
|
|
67
|
+
if baseline_frames < smoothing_window:
|
|
68
|
+
return 0.02 # Not enough data, use default
|
|
69
|
+
|
|
70
|
+
# Extract baseline period (assumed relatively stationary)
|
|
71
|
+
baseline_positions = positions[:baseline_frames]
|
|
72
|
+
|
|
73
|
+
# Compute velocity for baseline period using derivative
|
|
74
|
+
baseline_velocities = compute_velocity_from_derivative(
|
|
75
|
+
baseline_positions, window_length=smoothing_window, polyorder=polyorder
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
# Calculate noise floor as 95th percentile of baseline velocities
|
|
79
|
+
# Using 95th percentile instead of max to be robust against outliers
|
|
80
|
+
noise_floor = float(np.percentile(np.abs(baseline_velocities), 95))
|
|
81
|
+
|
|
82
|
+
# Set threshold as multiplier of noise floor
|
|
83
|
+
# Minimum threshold to avoid being too sensitive
|
|
84
|
+
adaptive_threshold = max(noise_floor * multiplier, 0.005)
|
|
85
|
+
|
|
86
|
+
# Maximum threshold to ensure we still detect contact
|
|
87
|
+
adaptive_threshold = min(adaptive_threshold, 0.05)
|
|
88
|
+
|
|
89
|
+
return adaptive_threshold
|
|
90
|
+
|
|
91
|
+
|
|
21
92
|
def detect_ground_contact(
|
|
22
93
|
foot_positions: np.ndarray,
|
|
23
94
|
velocity_threshold: float = 0.02,
|
|
@@ -245,6 +316,7 @@ def refine_transition_with_curvature(
|
|
|
245
316
|
transition_type: str,
|
|
246
317
|
search_window: int = 3,
|
|
247
318
|
smoothing_window: int = 5,
|
|
319
|
+
polyorder: int = 2,
|
|
248
320
|
) -> float:
|
|
249
321
|
"""
|
|
250
322
|
Refine phase transition timing using trajectory curvature analysis.
|
|
@@ -259,6 +331,7 @@ def refine_transition_with_curvature(
|
|
|
259
331
|
transition_type: Type of transition ("landing" or "takeoff")
|
|
260
332
|
search_window: Number of frames to search around estimate
|
|
261
333
|
smoothing_window: Window size for acceleration computation
|
|
334
|
+
polyorder: Polynomial order for Savitzky-Golay filter (default: 2)
|
|
262
335
|
|
|
263
336
|
Returns:
|
|
264
337
|
Refined fractional frame index
|
|
@@ -268,7 +341,7 @@ def refine_transition_with_curvature(
|
|
|
268
341
|
|
|
269
342
|
# Compute acceleration (second derivative)
|
|
270
343
|
acceleration = compute_acceleration_from_derivative(
|
|
271
|
-
foot_positions, window_length=smoothing_window, polyorder=
|
|
344
|
+
foot_positions, window_length=smoothing_window, polyorder=polyorder
|
|
272
345
|
)
|
|
273
346
|
|
|
274
347
|
# Define search range around estimated transition
|
|
@@ -319,6 +392,7 @@ def find_interpolated_phase_transitions_with_curvature(
|
|
|
319
392
|
contact_states: list[ContactState],
|
|
320
393
|
velocity_threshold: float,
|
|
321
394
|
smoothing_window: int = 5,
|
|
395
|
+
polyorder: int = 2,
|
|
322
396
|
use_curvature: bool = True,
|
|
323
397
|
) -> list[tuple[float, float, ContactState]]:
|
|
324
398
|
"""
|
|
@@ -334,6 +408,7 @@ def find_interpolated_phase_transitions_with_curvature(
|
|
|
334
408
|
contact_states: List of ContactState for each frame
|
|
335
409
|
velocity_threshold: Threshold used for contact detection
|
|
336
410
|
smoothing_window: Window size for velocity/acceleration smoothing
|
|
411
|
+
polyorder: Polynomial order for Savitzky-Golay filter (default: 2)
|
|
337
412
|
use_curvature: Whether to apply curvature-based refinement
|
|
338
413
|
|
|
339
414
|
Returns:
|
|
@@ -362,6 +437,7 @@ def find_interpolated_phase_transitions_with_curvature(
|
|
|
362
437
|
"landing",
|
|
363
438
|
search_window=3,
|
|
364
439
|
smoothing_window=smoothing_window,
|
|
440
|
+
polyorder=polyorder,
|
|
365
441
|
)
|
|
366
442
|
# Refine takeoff (end of ground contact)
|
|
367
443
|
refined_end = refine_transition_with_curvature(
|
|
@@ -370,6 +446,7 @@ def find_interpolated_phase_transitions_with_curvature(
|
|
|
370
446
|
"takeoff",
|
|
371
447
|
search_window=3,
|
|
372
448
|
smoothing_window=smoothing_window,
|
|
449
|
+
polyorder=polyorder,
|
|
373
450
|
)
|
|
374
451
|
|
|
375
452
|
elif state == ContactState.IN_AIR:
|
|
@@ -380,6 +457,7 @@ def find_interpolated_phase_transitions_with_curvature(
|
|
|
380
457
|
"takeoff",
|
|
381
458
|
search_window=3,
|
|
382
459
|
smoothing_window=smoothing_window,
|
|
460
|
+
polyorder=polyorder,
|
|
383
461
|
)
|
|
384
462
|
refined_end = refine_transition_with_curvature(
|
|
385
463
|
foot_positions,
|
|
@@ -387,6 +465,7 @@ def find_interpolated_phase_transitions_with_curvature(
|
|
|
387
465
|
"landing",
|
|
388
466
|
search_window=3,
|
|
389
467
|
smoothing_window=smoothing_window,
|
|
468
|
+
polyorder=polyorder,
|
|
390
469
|
)
|
|
391
470
|
|
|
392
471
|
refined_phases.append((refined_start, refined_end, state))
|