kinemotion 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kinemotion might be problematic. Click here for more details.

dropjump/video_io.py ADDED
@@ -0,0 +1,337 @@
1
+ """Video I/O and debug overlay rendering."""
2
+
3
+
4
+ import json
5
+ import subprocess
6
+
7
+ import cv2
8
+ import numpy as np
9
+
10
+ from .contact_detection import ContactState, compute_average_foot_position
11
+ from .kinematics import DropJumpMetrics
12
+
13
+
14
+ class VideoProcessor:
15
+ """
16
+ Handles video reading and processing.
17
+
18
+ IMPORTANT: This class preserves the exact aspect ratio of the source video.
19
+ No dimensions are hardcoded - all dimensions are extracted from actual frame data.
20
+ """
21
+
22
+ def __init__(self, video_path: str):
23
+ """
24
+ Initialize video processor.
25
+
26
+ Args:
27
+ video_path: Path to input video file
28
+ """
29
+ self.video_path = video_path
30
+ self.cap = cv2.VideoCapture(video_path)
31
+
32
+ if not self.cap.isOpened():
33
+ raise ValueError(f"Could not open video: {video_path}")
34
+
35
+ self.fps = self.cap.get(cv2.CAP_PROP_FPS)
36
+ self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
37
+
38
+ # Read first frame to get actual dimensions
39
+ # This is critical for preserving aspect ratio, especially with mobile videos
40
+ # that have rotation metadata. OpenCV properties (CAP_PROP_FRAME_WIDTH/HEIGHT)
41
+ # may return incorrect dimensions, so we read the actual frame data.
42
+ ret, first_frame = self.cap.read()
43
+ if ret:
44
+ # frame.shape is (height, width, channels) - extract actual dimensions
45
+ self.height, self.width = first_frame.shape[:2]
46
+ self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # Reset to beginning
47
+ else:
48
+ # Fallback to video properties if can't read frame
49
+ self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
50
+ self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
51
+
52
+ # Calculate display dimensions considering SAR (Sample Aspect Ratio)
53
+ # Mobile videos often have non-square pixels encoded in SAR metadata
54
+ # OpenCV doesn't directly expose SAR, but we need to handle display correctly
55
+ self.display_width = self.width
56
+ self.display_height = self.height
57
+ self._calculate_display_dimensions()
58
+
59
+ def _calculate_display_dimensions(self) -> None:
60
+ """
61
+ Calculate display dimensions by reading SAR metadata from video file.
62
+
63
+ Many mobile videos use non-square pixels (SAR != 1:1), which means
64
+ the encoded dimensions differ from how the video should be displayed.
65
+ We use ffprobe to extract this metadata.
66
+ """
67
+ try:
68
+ # Use ffprobe to get SAR metadata
69
+ result = subprocess.run(
70
+ [
71
+ "ffprobe",
72
+ "-v",
73
+ "quiet",
74
+ "-print_format",
75
+ "json",
76
+ "-show_streams",
77
+ "-select_streams",
78
+ "v:0",
79
+ self.video_path,
80
+ ],
81
+ capture_output=True,
82
+ text=True,
83
+ timeout=5,
84
+ )
85
+
86
+ if result.returncode == 0:
87
+ data = json.loads(result.stdout)
88
+ if "streams" in data and len(data["streams"]) > 0:
89
+ stream = data["streams"][0]
90
+ sar_str = stream.get("sample_aspect_ratio", "1:1")
91
+
92
+ # Parse SAR (e.g., "270:473")
93
+ if sar_str and ":" in sar_str:
94
+ sar_parts = sar_str.split(":")
95
+ sar_width = int(sar_parts[0])
96
+ sar_height = int(sar_parts[1])
97
+
98
+ # Calculate display dimensions
99
+ # DAR = (width * SAR_width) / (height * SAR_height)
100
+ if sar_width != sar_height:
101
+ self.display_width = int(
102
+ self.width * sar_width / sar_height
103
+ )
104
+ self.display_height = self.height
105
+ except (subprocess.TimeoutExpired, FileNotFoundError, json.JSONDecodeError):
106
+ # If ffprobe fails, keep original dimensions (square pixels)
107
+ pass
108
+
109
+ def read_frame(self) -> np.ndarray | None:
110
+ """Read next frame from video."""
111
+ ret, frame = self.cap.read()
112
+ return frame if ret else None
113
+
114
+ def reset(self) -> None:
115
+ """Reset video to beginning."""
116
+ self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
117
+
118
+ def close(self) -> None:
119
+ """Release video capture."""
120
+ self.cap.release()
121
+
122
+ def __enter__(self) -> "VideoProcessor":
123
+ return self
124
+
125
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None: # type: ignore[no-untyped-def]
126
+ self.close()
127
+
128
+
129
+ class DebugOverlayRenderer:
130
+ """Renders debug information on video frames."""
131
+
132
+ def __init__(
133
+ self,
134
+ output_path: str,
135
+ width: int,
136
+ height: int,
137
+ display_width: int,
138
+ display_height: int,
139
+ fps: float,
140
+ ):
141
+ """
142
+ Initialize overlay renderer.
143
+
144
+ Args:
145
+ output_path: Path for output video
146
+ width: Encoded frame width (from source video)
147
+ height: Encoded frame height (from source video)
148
+ display_width: Display width (considering SAR)
149
+ display_height: Display height (considering SAR)
150
+ fps: Frames per second
151
+ """
152
+ self.width = width
153
+ self.height = height
154
+ self.display_width = display_width
155
+ self.display_height = display_height
156
+ self.needs_resize = (display_width != width) or (display_height != height)
157
+
158
+ # Try H.264 codec first (better quality/compatibility), fallback to mp4v
159
+ fourcc = cv2.VideoWriter_fourcc(*"avc1") # type: ignore[attr-defined]
160
+ # IMPORTANT: cv2.VideoWriter expects (width, height) tuple - NOT (height, width)
161
+ # Write at display dimensions so video displays correctly without SAR metadata
162
+ self.writer = cv2.VideoWriter(
163
+ output_path, fourcc, fps, (display_width, display_height)
164
+ )
165
+
166
+ # Check if writer opened successfully, fallback to mp4v if not
167
+ if not self.writer.isOpened():
168
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v") # type: ignore[attr-defined]
169
+ self.writer = cv2.VideoWriter(
170
+ output_path, fourcc, fps, (display_width, display_height)
171
+ )
172
+
173
+ if not self.writer.isOpened():
174
+ raise ValueError(
175
+ f"Failed to create video writer for {output_path} with dimensions "
176
+ f"{display_width}x{display_height}"
177
+ )
178
+
179
+ def render_frame(
180
+ self,
181
+ frame: np.ndarray,
182
+ landmarks: dict[str, tuple[float, float, float]] | None,
183
+ contact_state: ContactState,
184
+ frame_idx: int,
185
+ metrics: DropJumpMetrics | None = None,
186
+ ) -> np.ndarray:
187
+ """
188
+ Render debug overlay on frame.
189
+
190
+ Args:
191
+ frame: Original video frame
192
+ landmarks: Pose landmarks for this frame
193
+ contact_state: Ground contact state
194
+ frame_idx: Current frame index
195
+ metrics: Drop-jump metrics (optional)
196
+
197
+ Returns:
198
+ Frame with debug overlay
199
+ """
200
+ annotated = frame.copy()
201
+
202
+ # Draw landmarks if available
203
+ if landmarks:
204
+ foot_x, foot_y = compute_average_foot_position(landmarks)
205
+ px = int(foot_x * self.width)
206
+ py = int(foot_y * self.height)
207
+
208
+ # Draw foot position circle
209
+ color = (
210
+ (0, 255, 0) if contact_state == ContactState.ON_GROUND else (0, 0, 255)
211
+ )
212
+ cv2.circle(annotated, (px, py), 10, color, -1)
213
+
214
+ # Draw individual foot landmarks
215
+ foot_keys = ["left_ankle", "right_ankle", "left_heel", "right_heel"]
216
+ for key in foot_keys:
217
+ if key in landmarks:
218
+ x, y, vis = landmarks[key]
219
+ if vis > 0.5:
220
+ lx = int(x * self.width)
221
+ ly = int(y * self.height)
222
+ cv2.circle(annotated, (lx, ly), 5, (255, 255, 0), -1)
223
+
224
+ # Draw contact state
225
+ state_text = f"State: {contact_state.value}"
226
+ state_color = (
227
+ (0, 255, 0) if contact_state == ContactState.ON_GROUND else (0, 0, 255)
228
+ )
229
+ cv2.putText(
230
+ annotated,
231
+ state_text,
232
+ (10, 30),
233
+ cv2.FONT_HERSHEY_SIMPLEX,
234
+ 1,
235
+ state_color,
236
+ 2,
237
+ )
238
+
239
+ # Draw frame number
240
+ cv2.putText(
241
+ annotated,
242
+ f"Frame: {frame_idx}",
243
+ (10, 70),
244
+ cv2.FONT_HERSHEY_SIMPLEX,
245
+ 0.7,
246
+ (255, 255, 255),
247
+ 2,
248
+ )
249
+
250
+ # Draw metrics if in relevant phase
251
+ if metrics:
252
+ y_offset = 110
253
+ if (
254
+ metrics.contact_start_frame
255
+ and metrics.contact_end_frame
256
+ and metrics.contact_start_frame
257
+ <= frame_idx
258
+ <= metrics.contact_end_frame
259
+ ):
260
+ cv2.putText(
261
+ annotated,
262
+ "GROUND CONTACT",
263
+ (10, y_offset),
264
+ cv2.FONT_HERSHEY_SIMPLEX,
265
+ 0.7,
266
+ (0, 255, 0),
267
+ 2,
268
+ )
269
+ y_offset += 40
270
+
271
+ if (
272
+ metrics.flight_start_frame
273
+ and metrics.flight_end_frame
274
+ and metrics.flight_start_frame <= frame_idx <= metrics.flight_end_frame
275
+ ):
276
+ cv2.putText(
277
+ annotated,
278
+ "FLIGHT PHASE",
279
+ (10, y_offset),
280
+ cv2.FONT_HERSHEY_SIMPLEX,
281
+ 0.7,
282
+ (0, 0, 255),
283
+ 2,
284
+ )
285
+ y_offset += 40
286
+
287
+ if metrics.peak_height_frame == frame_idx:
288
+ cv2.putText(
289
+ annotated,
290
+ "PEAK HEIGHT",
291
+ (10, y_offset),
292
+ cv2.FONT_HERSHEY_SIMPLEX,
293
+ 0.7,
294
+ (255, 0, 255),
295
+ 2,
296
+ )
297
+
298
+ return annotated
299
+
300
+ def write_frame(self, frame: np.ndarray) -> None:
301
+ """
302
+ Write frame to output video.
303
+
304
+ Args:
305
+ frame: Video frame with shape (height, width, 3)
306
+
307
+ Raises:
308
+ ValueError: If frame dimensions don't match expected encoded dimensions
309
+ """
310
+ # Validate frame dimensions match expected encoded dimensions
311
+ frame_height, frame_width = frame.shape[:2]
312
+ if frame_height != self.height or frame_width != self.width:
313
+ raise ValueError(
314
+ f"Frame dimensions ({frame_width}x{frame_height}) don't match "
315
+ f"source dimensions ({self.width}x{self.height}). "
316
+ f"Aspect ratio must be preserved from source video."
317
+ )
318
+
319
+ # Resize to display dimensions if needed (to handle SAR)
320
+ if self.needs_resize:
321
+ frame = cv2.resize(
322
+ frame,
323
+ (self.display_width, self.display_height),
324
+ interpolation=cv2.INTER_LANCZOS4,
325
+ )
326
+
327
+ self.writer.write(frame)
328
+
329
+ def close(self) -> None:
330
+ """Release video writer."""
331
+ self.writer.release()
332
+
333
+ def __enter__(self) -> "DebugOverlayRenderer":
334
+ return self
335
+
336
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None: # type: ignore[no-untyped-def]
337
+ self.close()