media-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cli/clip.py +79 -0
  2. cli/faces.py +91 -0
  3. cli/metadata.py +68 -0
  4. cli/motion.py +77 -0
  5. cli/objects.py +94 -0
  6. cli/ocr.py +93 -0
  7. cli/scenes.py +57 -0
  8. cli/telemetry.py +65 -0
  9. cli/transcript.py +76 -0
  10. media_engine/__init__.py +7 -0
  11. media_engine/_version.py +34 -0
  12. media_engine/app.py +80 -0
  13. media_engine/batch/__init__.py +56 -0
  14. media_engine/batch/models.py +99 -0
  15. media_engine/batch/processor.py +1131 -0
  16. media_engine/batch/queue.py +232 -0
  17. media_engine/batch/state.py +30 -0
  18. media_engine/batch/timing.py +321 -0
  19. media_engine/cli.py +17 -0
  20. media_engine/config.py +674 -0
  21. media_engine/extractors/__init__.py +75 -0
  22. media_engine/extractors/clip.py +401 -0
  23. media_engine/extractors/faces.py +459 -0
  24. media_engine/extractors/frame_buffer.py +351 -0
  25. media_engine/extractors/frames.py +402 -0
  26. media_engine/extractors/metadata/__init__.py +127 -0
  27. media_engine/extractors/metadata/apple.py +169 -0
  28. media_engine/extractors/metadata/arri.py +118 -0
  29. media_engine/extractors/metadata/avchd.py +208 -0
  30. media_engine/extractors/metadata/avchd_gps.py +270 -0
  31. media_engine/extractors/metadata/base.py +688 -0
  32. media_engine/extractors/metadata/blackmagic.py +139 -0
  33. media_engine/extractors/metadata/camera_360.py +276 -0
  34. media_engine/extractors/metadata/canon.py +290 -0
  35. media_engine/extractors/metadata/dji.py +371 -0
  36. media_engine/extractors/metadata/dv.py +121 -0
  37. media_engine/extractors/metadata/ffmpeg.py +76 -0
  38. media_engine/extractors/metadata/generic.py +119 -0
  39. media_engine/extractors/metadata/gopro.py +256 -0
  40. media_engine/extractors/metadata/red.py +305 -0
  41. media_engine/extractors/metadata/registry.py +114 -0
  42. media_engine/extractors/metadata/sony.py +442 -0
  43. media_engine/extractors/metadata/tesla.py +157 -0
  44. media_engine/extractors/motion.py +765 -0
  45. media_engine/extractors/objects.py +245 -0
  46. media_engine/extractors/objects_qwen.py +754 -0
  47. media_engine/extractors/ocr.py +268 -0
  48. media_engine/extractors/scenes.py +82 -0
  49. media_engine/extractors/shot_type.py +217 -0
  50. media_engine/extractors/telemetry.py +262 -0
  51. media_engine/extractors/transcribe.py +579 -0
  52. media_engine/extractors/translate.py +121 -0
  53. media_engine/extractors/vad.py +263 -0
  54. media_engine/main.py +68 -0
  55. media_engine/py.typed +0 -0
  56. media_engine/routers/__init__.py +15 -0
  57. media_engine/routers/batch.py +78 -0
  58. media_engine/routers/health.py +93 -0
  59. media_engine/routers/models.py +211 -0
  60. media_engine/routers/settings.py +87 -0
  61. media_engine/routers/utils.py +135 -0
  62. media_engine/schemas.py +581 -0
  63. media_engine/utils/__init__.py +5 -0
  64. media_engine/utils/logging.py +54 -0
  65. media_engine/utils/memory.py +49 -0
  66. media_engine-0.1.0.dist-info/METADATA +276 -0
  67. media_engine-0.1.0.dist-info/RECORD +70 -0
  68. media_engine-0.1.0.dist-info/WHEEL +4 -0
  69. media_engine-0.1.0.dist-info/entry_points.txt +11 -0
  70. media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,765 @@
1
+ """Camera motion analysis using optical flow."""
2
+
3
+ import logging
4
+ import platform
5
+ import subprocess
6
+ import time
7
+ from dataclasses import dataclass
8
+ from enum import StrEnum
9
+
10
+ import cv2
11
+ import numpy as np
12
+
13
+ from media_engine.extractors.metadata.base import get_video_info
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Cache for hardware acceleration detection
18
+ _hwaccel_cache: str | None = None
19
+
20
+ # Analysis resolution (scale down for speed)
21
+ ANALYSIS_HEIGHT = 720
22
+ ANALYSIS_WIDTH = 1280 # 720p aspect ratio
23
+
24
+ # Motion detection thresholds
25
+ MOTION_THRESHOLD = 2.0 # Minimum average flow magnitude for motion
26
+ PAN_TILT_THRESHOLD = 0.7 # Ratio of directional vs total flow for pan/tilt
27
+ ZOOM_THRESHOLD = 0.3 # Divergence threshold for zoom detection
28
+ STATIC_THRESHOLD = 0.5 # Below this = static
29
+
30
+
31
+ class MotionType(StrEnum):
32
+ """Types of camera motion.
33
+
34
+ Note: PUSH_IN/PULL_OUT describe the optical flow pattern (radial expansion/contraction).
35
+ This could be optical zoom OR physical camera movement (dolly/travel).
36
+ Frontend can interpret based on metadata (lens type, GPS movement, device type).
37
+ """
38
+
39
+ STATIC = "static"
40
+ PAN_LEFT = "pan_left"
41
+ PAN_RIGHT = "pan_right"
42
+ TILT_UP = "tilt_up"
43
+ TILT_DOWN = "tilt_down"
44
+ PUSH_IN = "push_in" # Radial expansion (zoom in or dolly forward)
45
+ PULL_OUT = "pull_out" # Radial contraction (zoom out or dolly backward)
46
+ HANDHELD = "handheld" # Random/shaky movement
47
+ COMPLEX = "complex" # Multiple motions combined
48
+
49
+
50
+ @dataclass(slots=True)
51
+ class MotionSegment:
52
+ """A segment of video with consistent motion.
53
+
54
+ Uses slots=True to reduce memory overhead per instance.
55
+ """
56
+
57
+ start: float
58
+ end: float
59
+ motion_type: MotionType
60
+ intensity: float # Average flow magnitude
61
+
62
+
63
+ @dataclass(slots=True)
64
+ class MotionAnalysis:
65
+ """Complete motion analysis for a video.
66
+
67
+ Uses slots=True to reduce memory overhead per instance.
68
+ """
69
+
70
+ duration: float
71
+ fps: float
72
+ primary_motion: MotionType
73
+ segments: list[MotionSegment]
74
+ avg_intensity: float
75
+ is_stable: bool # True if mostly static/tripod
76
+
77
+
78
+ # Chunk duration in seconds (2 minutes)
79
+ CHUNK_DURATION = 120.0
80
+
81
+
82
+ def _detect_hwaccel() -> str | None:
83
+ """Detect available hardware acceleration for video decoding.
84
+
85
+ Returns:
86
+ Hardware acceleration method name, or None if not available.
87
+ - "videotoolbox" for macOS (Apple Silicon or Intel with VideoToolbox)
88
+ - "cuda" for NVIDIA GPUs
89
+ - None for software decoding
90
+ """
91
+ global _hwaccel_cache
92
+
93
+ if _hwaccel_cache is not None:
94
+ return _hwaccel_cache if _hwaccel_cache != "" else None
95
+
96
+ # Check platform
97
+ system = platform.system()
98
+
99
+ if system == "Darwin":
100
+ # macOS - check for VideoToolbox support
101
+ try:
102
+ result = subprocess.run(
103
+ ["ffmpeg", "-hwaccels"],
104
+ capture_output=True,
105
+ text=True,
106
+ timeout=5,
107
+ )
108
+ if "videotoolbox" in result.stdout:
109
+ logger.info("Hardware acceleration: VideoToolbox (macOS)")
110
+ _hwaccel_cache = "videotoolbox"
111
+ return "videotoolbox"
112
+ except Exception:
113
+ pass
114
+
115
+ elif system == "Linux":
116
+ # Linux - check for CUDA/NVDEC
117
+ try:
118
+ result = subprocess.run(
119
+ ["ffmpeg", "-hwaccels"],
120
+ capture_output=True,
121
+ text=True,
122
+ timeout=5,
123
+ )
124
+ if "cuda" in result.stdout:
125
+ # Verify NVIDIA GPU is present
126
+ nvidia_check = subprocess.run(
127
+ ["nvidia-smi", "-L"],
128
+ capture_output=True,
129
+ timeout=5,
130
+ )
131
+ if nvidia_check.returncode == 0:
132
+ logger.info("Hardware acceleration: CUDA (NVIDIA)")
133
+ _hwaccel_cache = "cuda"
134
+ return "cuda"
135
+ except Exception:
136
+ pass
137
+
138
+ logger.info("Hardware acceleration: None (software decoding)")
139
+ _hwaccel_cache = ""
140
+ return None
141
+
142
+
143
+ def _load_frames_chunk(
144
+ file_path: str,
145
+ start_time: float,
146
+ chunk_duration: float,
147
+ sample_fps: float,
148
+ out_width: int,
149
+ out_height: int,
150
+ hwaccel: str | None = None,
151
+ src_width: int = 0,
152
+ src_height: int = 0,
153
+ ) -> np.ndarray:
154
+ """Load a chunk of frames into memory using FFmpeg.
155
+
156
+ Args:
157
+ file_path: Path to video file
158
+ start_time: Start time in seconds
159
+ chunk_duration: Duration of chunk to load in seconds
160
+ sample_fps: Frames per second to sample
161
+ out_width: Output frame width
162
+ out_height: Output frame height
163
+ hwaccel: Hardware acceleration method (videotoolbox, cuda, or None)
164
+ src_width: Source video width (for aspect ratio calculation with hwaccel)
165
+ src_height: Source video height (for aspect ratio calculation with hwaccel)
166
+
167
+ Returns:
168
+ numpy array of shape (num_frames, height, width) with grayscale frames
169
+ """
170
+ # Build command with optional hardware acceleration
171
+ cmd = ["ffmpeg", "-hide_banner"]
172
+
173
+ # For hardware acceleration, calculate output height based on source aspect ratio
174
+ actual_out_height = out_height
175
+ if hwaccel and src_width > 0 and src_height > 0:
176
+ # Calculate height maintaining aspect ratio, rounded to even number
177
+ actual_out_height = int(out_width * src_height / src_width)
178
+ actual_out_height = actual_out_height - (actual_out_height % 2) # Ensure even
179
+
180
+ # Use hardware-accelerated decode and scaling if available
181
+ if hwaccel == "videotoolbox":
182
+ # Decode on hardware, scale on GPU, then transfer to CPU
183
+ # p010le is required for VideoToolbox hwdownload (10-bit format)
184
+ cmd.extend(["-hwaccel", "videotoolbox", "-hwaccel_output_format", "videotoolbox_vld"])
185
+ vf_filter = f"scale_vt=w={out_width}:h={actual_out_height},hwdownload,format=p010le,fps={sample_fps}"
186
+ elif hwaccel == "cuda":
187
+ cmd.extend(["-hwaccel", "cuda", "-hwaccel_output_format", "cuda"])
188
+ vf_filter = f"scale_cuda={out_width}:{actual_out_height},hwdownload,format=nv12,fps={sample_fps}"
189
+ else:
190
+ actual_out_height = out_height # Use the provided value for software
191
+ vf_filter = f"scale={out_width}:{out_height}:force_original_aspect_ratio=decrease,fps={sample_fps}"
192
+
193
+ cmd.extend(
194
+ [
195
+ "-ss",
196
+ str(start_time),
197
+ "-t",
198
+ str(chunk_duration),
199
+ "-i",
200
+ file_path,
201
+ "-vf",
202
+ vf_filter,
203
+ "-f",
204
+ "rawvideo",
205
+ "-pix_fmt",
206
+ "gray",
207
+ "-",
208
+ ]
209
+ )
210
+
211
+ logger.debug(f"FFmpeg command: {' '.join(cmd)}")
212
+
213
+ process = subprocess.Popen(
214
+ cmd,
215
+ stdout=subprocess.PIPE,
216
+ stderr=subprocess.PIPE,
217
+ )
218
+
219
+ frame_size = out_width * actual_out_height
220
+ frames: list[np.ndarray] = []
221
+
222
+ while True:
223
+ raw_frame = process.stdout.read(frame_size) # type: ignore[union-attr]
224
+ if len(raw_frame) != frame_size:
225
+ break
226
+ frame = np.frombuffer(raw_frame, dtype=np.uint8).reshape((actual_out_height, out_width))
227
+ frames.append(frame.copy())
228
+
229
+ _, stderr = process.communicate()
230
+ if stderr and logger.isEnabledFor(logging.DEBUG):
231
+ # Log first few lines of stderr for debugging
232
+ stderr_lines = stderr.decode(errors="ignore").strip().split("\n")[:5]
233
+ for line in stderr_lines:
234
+ if line:
235
+ logger.debug(f"FFmpeg: {line}")
236
+
237
+ # If hardware acceleration failed (no frames), retry without it
238
+ if not frames and hwaccel:
239
+ # Log the stderr to understand why it failed
240
+ if stderr:
241
+ logger.warning(f"Hardware acceleration ({hwaccel}) failed: {stderr.decode(errors='ignore')[:500]}")
242
+ else:
243
+ logger.warning(f"Hardware acceleration ({hwaccel}) failed, no frames produced")
244
+ return _load_frames_chunk(
245
+ file_path,
246
+ start_time,
247
+ chunk_duration,
248
+ sample_fps,
249
+ out_width,
250
+ out_height,
251
+ hwaccel=None,
252
+ src_width=src_width,
253
+ src_height=src_height,
254
+ )
255
+
256
+ if not frames:
257
+ return np.array([], dtype=np.uint8)
258
+
259
+ return np.stack(frames)
260
+
261
+
262
+ def analyze_motion(
263
+ file_path: str,
264
+ sample_fps: float = 5.0, # Analyze every N frames per second
265
+ chunk_duration: float = CHUNK_DURATION, # Process 2 minutes at a time
266
+ ) -> MotionAnalysis:
267
+ """Analyze camera motion in a video using optical flow.
268
+
269
+ Uses FFmpeg to decode frames at low resolution for efficiency with high-res video.
270
+ Processes in 2-minute chunks to balance memory usage and I/O efficiency.
271
+
272
+ Args:
273
+ file_path: Path to video file
274
+ sample_fps: How many frames per second to analyze (default 5)
275
+ chunk_duration: Duration of each processing chunk in seconds (default 120)
276
+
277
+ Returns:
278
+ MotionAnalysis with motion type segments
279
+ """
280
+ # Get video info
281
+ fps, duration, width, height = get_video_info(file_path)
282
+ if duration == 0:
283
+ # Fallback to opencv for duration
284
+ cap = cv2.VideoCapture(file_path)
285
+ fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
286
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
287
+ duration = frame_count / fps if fps > 0 else 0
288
+ cap.release()
289
+
290
+ total_samples = int(duration * sample_fps)
291
+
292
+ # Detect hardware acceleration
293
+ hwaccel = _detect_hwaccel()
294
+
295
+ logger.info(f"Analyzing motion: {duration:.1f}s @ {fps:.1f}fps, ~{total_samples} samples" + (f" (hwaccel={hwaccel})" if hwaccel else ""))
296
+
297
+ # Calculate actual frame dimensions after scaling
298
+ if width > height:
299
+ out_width = ANALYSIS_WIDTH
300
+ out_height = int(height * ANALYSIS_WIDTH / width)
301
+ else:
302
+ out_height = ANALYSIS_HEIGHT
303
+ out_width = int(width * ANALYSIS_HEIGHT / height)
304
+
305
+ # Ensure even dimensions for video
306
+ out_width = out_width - (out_width % 2)
307
+ out_height = out_height - (out_height % 2)
308
+
309
+ frame_motions: list[tuple[float, MotionType, float]] = []
310
+ prev_gray: np.ndarray | None = None
311
+ global_frame_idx = 0
312
+
313
+ # Timing stats
314
+ total_load_time = 0.0
315
+ total_flow_time = 0.0
316
+
317
+ # Process video in chunks
318
+ num_chunks = max(1, int(np.ceil(duration / chunk_duration)))
319
+ logger.debug(f"Processing in {num_chunks} chunk(s) of {chunk_duration}s each")
320
+
321
+ for chunk_idx in range(num_chunks):
322
+ chunk_start = chunk_idx * chunk_duration
323
+ actual_chunk_duration = min(chunk_duration, duration - chunk_start)
324
+
325
+ if actual_chunk_duration <= 0:
326
+ break
327
+
328
+ logger.debug(f"Loading chunk {chunk_idx + 1}/{num_chunks}: {chunk_start:.1f}s - {chunk_start + actual_chunk_duration:.1f}s")
329
+
330
+ # Load all frames for this chunk into memory
331
+ load_start = time.perf_counter()
332
+ frames = _load_frames_chunk(
333
+ file_path,
334
+ chunk_start,
335
+ actual_chunk_duration,
336
+ sample_fps,
337
+ out_width,
338
+ out_height,
339
+ hwaccel=hwaccel,
340
+ src_width=width,
341
+ src_height=height,
342
+ )
343
+ total_load_time += time.perf_counter() - load_start
344
+
345
+ if frames.size == 0:
346
+ continue
347
+
348
+ logger.debug(f"Loaded {len(frames)} frames into memory")
349
+
350
+ # Process optical flow for this chunk
351
+ flow_start = time.perf_counter()
352
+ for i in range(len(frames)):
353
+ gray = frames[i]
354
+ timestamp = global_frame_idx / sample_fps
355
+
356
+ if prev_gray is not None and prev_gray.shape == gray.shape:
357
+ # Compute optical flow
358
+ flow = cv2.calcOpticalFlowFarneback(
359
+ prev_gray,
360
+ gray,
361
+ None, # type: ignore[arg-type]
362
+ pyr_scale=0.5,
363
+ levels=3,
364
+ winsize=15,
365
+ iterations=3,
366
+ poly_n=5,
367
+ poly_sigma=1.2,
368
+ flags=0,
369
+ )
370
+
371
+ # Classify motion
372
+ motion_type, intensity = _classify_flow(flow)
373
+ frame_motions.append((timestamp, motion_type, intensity))
374
+
375
+ prev_gray = gray.copy()
376
+ global_frame_idx += 1
377
+ total_flow_time += time.perf_counter() - flow_start
378
+
379
+ # Log timing breakdown
380
+ logger.info(f"Motion analysis timing: decode={total_load_time:.2f}s, " f"optical_flow={total_flow_time:.2f}s, frames={global_frame_idx}")
381
+
382
+ if not frame_motions:
383
+ return MotionAnalysis(
384
+ duration=duration,
385
+ fps=fps,
386
+ primary_motion=MotionType.STATIC,
387
+ segments=[],
388
+ avg_intensity=0.0,
389
+ is_stable=True,
390
+ )
391
+
392
+ # Build segments from frame motions
393
+ segments = _build_segments(frame_motions)
394
+
395
+ # Determine primary motion (most common or longest)
396
+ primary_motion = _get_primary_motion(segments, duration)
397
+
398
+ # Calculate average intensity
399
+ avg_intensity = np.mean([m[2] for m in frame_motions])
400
+
401
+ # Determine if video is stable (mostly static or low intensity)
402
+ static_time = sum(s.end - s.start for s in segments if s.motion_type == MotionType.STATIC)
403
+ is_stable = static_time > duration * 0.7 or avg_intensity < MOTION_THRESHOLD
404
+
405
+ logger.info(f"Motion analysis: primary={primary_motion}, segments={len(segments)}, stable={is_stable}")
406
+
407
+ return MotionAnalysis(
408
+ duration=duration,
409
+ fps=fps,
410
+ primary_motion=primary_motion,
411
+ segments=segments,
412
+ avg_intensity=float(avg_intensity),
413
+ is_stable=bool(is_stable),
414
+ )
415
+
416
+
417
+ def _classify_flow(flow: np.ndarray) -> tuple[MotionType, float]:
418
+ """Classify motion type from optical flow field.
419
+
420
+ Args:
421
+ flow: Optical flow array (H, W, 2) with x and y components
422
+
423
+ Returns:
424
+ (motion_type, intensity)
425
+ """
426
+ flow_x = flow[:, :, 0]
427
+ flow_y = flow[:, :, 1]
428
+
429
+ # Calculate flow statistics
430
+ mean_x = np.mean(flow_x)
431
+ mean_y = np.mean(flow_y)
432
+ magnitude = np.sqrt(flow_x**2 + flow_y**2)
433
+ mean_magnitude = np.mean(magnitude)
434
+
435
+ # Check if motion is significant
436
+ if mean_magnitude < STATIC_THRESHOLD:
437
+ return MotionType.STATIC, float(mean_magnitude)
438
+
439
+ # Check for zoom (divergence from center)
440
+ h, w = flow.shape[:2]
441
+ center_y, center_x = h // 2, w // 2
442
+
443
+ # Create coordinate grids relative to center
444
+ y_coords, x_coords = np.mgrid[0:h, 0:w]
445
+ x_rel = x_coords - center_x
446
+ y_rel = y_coords - center_y
447
+
448
+ # Normalize relative positions
449
+ dist_from_center = np.sqrt(x_rel**2 + y_rel**2) + 1e-7
450
+ x_norm = x_rel / dist_from_center
451
+ y_norm = y_rel / dist_from_center
452
+
453
+ # Compute divergence (dot product of flow with radial direction)
454
+ divergence = np.mean(flow_x * x_norm + flow_y * y_norm)
455
+
456
+ if abs(divergence) > ZOOM_THRESHOLD * mean_magnitude:
457
+ if divergence > 0:
458
+ return MotionType.PUSH_IN, float(mean_magnitude)
459
+ else:
460
+ return MotionType.PULL_OUT, float(mean_magnitude)
461
+
462
+ # Check for pan/tilt (consistent directional flow)
463
+ abs_mean_x = abs(mean_x)
464
+ abs_mean_y = abs(mean_y)
465
+
466
+ # Strong horizontal motion = pan
467
+ if abs_mean_x > abs_mean_y and abs_mean_x > MOTION_THRESHOLD:
468
+ ratio = abs_mean_x / (mean_magnitude + 1e-7)
469
+ if ratio > PAN_TILT_THRESHOLD:
470
+ if mean_x > 0:
471
+ return (
472
+ MotionType.PAN_LEFT,
473
+ float(mean_magnitude),
474
+ ) # Flow right = camera pans left
475
+ else:
476
+ return MotionType.PAN_RIGHT, float(mean_magnitude)
477
+
478
+ # Strong vertical motion = tilt
479
+ if abs_mean_y > abs_mean_x and abs_mean_y > MOTION_THRESHOLD:
480
+ ratio = abs_mean_y / (mean_magnitude + 1e-7)
481
+ if ratio > PAN_TILT_THRESHOLD:
482
+ if mean_y > 0:
483
+ return MotionType.TILT_UP, float(mean_magnitude) # Flow down = camera tilts up
484
+ else:
485
+ return MotionType.TILT_DOWN, float(mean_magnitude)
486
+
487
+ # Significant motion but not consistent direction = handheld/complex
488
+ if mean_magnitude > MOTION_THRESHOLD:
489
+ return MotionType.HANDHELD, float(mean_magnitude)
490
+
491
+ return MotionType.STATIC, float(mean_magnitude)
492
+
493
+
494
+ def _build_segments(
495
+ frame_motions: list[tuple[float, MotionType, float]],
496
+ min_segment_duration: float = 0.5,
497
+ ) -> list[MotionSegment]:
498
+ """Build motion segments from frame-by-frame analysis.
499
+
500
+ Merges consecutive frames with same motion type into segments.
501
+ """
502
+ if not frame_motions:
503
+ return []
504
+
505
+ segments: list[MotionSegment] = []
506
+ current_type = frame_motions[0][1]
507
+ current_start = frame_motions[0][0]
508
+ current_intensities: list[float] = [frame_motions[0][2]]
509
+
510
+ for timestamp, motion_type, intensity in frame_motions[1:]:
511
+ if motion_type == current_type:
512
+ current_intensities.append(intensity)
513
+ else:
514
+ # End current segment
515
+ segments.append(
516
+ MotionSegment(
517
+ start=current_start,
518
+ end=timestamp,
519
+ motion_type=current_type,
520
+ intensity=float(np.mean(current_intensities)),
521
+ )
522
+ )
523
+ current_type = motion_type
524
+ current_start = timestamp
525
+ current_intensities = [intensity]
526
+
527
+ # Add final segment
528
+ if frame_motions:
529
+ segments.append(
530
+ MotionSegment(
531
+ start=current_start,
532
+ end=frame_motions[-1][0] + 0.2, # Extend slightly past last frame
533
+ motion_type=current_type,
534
+ intensity=float(np.mean(current_intensities)),
535
+ )
536
+ )
537
+
538
+ # Merge short segments
539
+ merged: list[MotionSegment] = []
540
+ for seg in segments:
541
+ if seg.end - seg.start < min_segment_duration and merged:
542
+ # Merge with previous segment
543
+ prev = merged[-1]
544
+ merged[-1] = MotionSegment(
545
+ start=prev.start,
546
+ end=seg.end,
547
+ motion_type=(prev.motion_type if prev.end - prev.start > seg.end - seg.start else seg.motion_type),
548
+ intensity=(prev.intensity + seg.intensity) / 2,
549
+ )
550
+ else:
551
+ merged.append(seg)
552
+
553
+ return merged
554
+
555
+
556
+ def _get_primary_motion(segments: list[MotionSegment], duration: float) -> MotionType:
557
+ """Determine the primary motion type based on segment durations."""
558
+ if not segments:
559
+ return MotionType.STATIC
560
+
561
+ # Sum duration per motion type
562
+ type_durations: dict[MotionType, float] = {}
563
+ for seg in segments:
564
+ seg_duration = seg.end - seg.start
565
+ type_durations[seg.motion_type] = type_durations.get(seg.motion_type, 0) + seg_duration
566
+
567
+ # Return type with longest total duration
568
+ return max(type_durations, key=type_durations.get) # type: ignore
569
+
570
+
571
+ def get_sample_timestamps(
572
+ motion: MotionAnalysis,
573
+ max_samples: int = 5,
574
+ ) -> list[float]:
575
+ """Get optimal timestamps for frame sampling based on motion analysis.
576
+
577
+ Static segments need fewer samples, moving segments need more.
578
+
579
+ Args:
580
+ motion: Motion analysis result
581
+ max_samples: Maximum number of samples to return
582
+
583
+ Returns:
584
+ List of timestamps to sample
585
+ """
586
+ if not motion.segments:
587
+ if motion.is_stable or motion.primary_motion == MotionType.STATIC:
588
+ # Static video - just sample middle
589
+ return [motion.duration / 2]
590
+ else:
591
+ # No segments - sample evenly
592
+ return [motion.duration * i / (max_samples + 1) for i in range(1, max_samples + 1)]
593
+
594
+ # Check if there are any non-static segments
595
+ has_motion_segments = any(seg.motion_type not in (MotionType.STATIC, MotionType.HANDHELD) for seg in motion.segments)
596
+
597
+ if not has_motion_segments and motion.is_stable:
598
+ # All static/handheld - just sample middle
599
+ return [motion.duration / 2]
600
+
601
+ timestamps: list[float] = []
602
+
603
+ for seg in motion.segments:
604
+ seg_duration = seg.end - seg.start
605
+
606
+ if seg.motion_type == MotionType.STATIC:
607
+ # Static segment - one sample from middle
608
+ timestamps.append((seg.start + seg.end) / 2)
609
+
610
+ elif seg.motion_type in (
611
+ MotionType.PAN_LEFT,
612
+ MotionType.PAN_RIGHT,
613
+ MotionType.TILT_UP,
614
+ MotionType.TILT_DOWN,
615
+ ):
616
+ # Pan/tilt - sample start and end (different content)
617
+ timestamps.append(seg.start + 0.2)
618
+ if seg_duration > 1.0:
619
+ timestamps.append(seg.end - 0.2)
620
+
621
+ elif seg.motion_type in (MotionType.PUSH_IN, MotionType.PULL_OUT):
622
+ # Zoom - sample at different zoom levels
623
+ timestamps.append(seg.start + 0.2)
624
+ if seg_duration > 1.0:
625
+ timestamps.append(seg.end - 0.2)
626
+
627
+ elif seg.motion_type == MotionType.HANDHELD:
628
+ # Handheld - content is same, just one sample
629
+ timestamps.append((seg.start + seg.end) / 2)
630
+
631
+ else:
632
+ # Complex/unknown - sample middle
633
+ timestamps.append((seg.start + seg.end) / 2)
634
+
635
+ # Remove duplicates and sort
636
+ timestamps = sorted(set(timestamps))
637
+
638
+ # Limit to max_samples, keeping evenly distributed
639
+ if len(timestamps) > max_samples:
640
+ indices = np.linspace(0, len(timestamps) - 1, max_samples, dtype=int)
641
+ timestamps = [timestamps[i] for i in indices]
642
+
643
+ # Ensure timestamps are within bounds
644
+ timestamps = [max(0.1, min(t, motion.duration - 0.1)) for t in timestamps]
645
+
646
+ logger.info(f"Smart sampling: {len(timestamps)} frames from {len(motion.segments)} motion segments")
647
+
648
+ return timestamps
649
+
650
+
651
+ def get_adaptive_timestamps(
652
+ motion: MotionAnalysis,
653
+ min_fps: float = 0.1,
654
+ max_fps: float = 2.0,
655
+ max_samples: int = 100,
656
+ ) -> list[float]:
657
+ """Get timestamps with adaptive sampling based on motion intensity.
658
+
659
+ SMART OPTIMIZATION: For stable/static footage, returns very few samples
660
+ since the content doesn't change. This dramatically speeds up processing
661
+ for tripod shots, interviews, static drone hovers, etc.
662
+
663
+ Stability-based limits:
664
+ - Fully stable (is_stable=True, avg_intensity < 1.0): max 3 samples
665
+ - Mostly stable (is_stable=True): max 5 samples
666
+ - Some motion: uses intensity-based adaptive sampling
667
+
668
+ Intensity to FPS mapping (for non-stable segments):
669
+ - 0-0.5: min_fps (static, nothing changing)
670
+ - 0.5-2.0: 0.25 fps (stable, minimal change)
671
+ - 2.0-4.0: 0.5 fps (moderate motion)
672
+ - 4.0-6.0: 1.0 fps (active motion)
673
+ - 6.0+: max_fps (high motion, rapid changes)
674
+
675
+ Args:
676
+ motion: Motion analysis result
677
+ min_fps: Minimum sample rate for static content (default 0.1 = 1 per 10s)
678
+ max_fps: Maximum sample rate for high motion (default 2.0)
679
+ max_samples: Maximum total samples to return
680
+
681
+ Returns:
682
+ List of timestamps to sample
683
+ """
684
+ # OPTIMIZATION: Very stable footage needs minimal sampling
685
+ if motion.is_stable and motion.avg_intensity < 1.0:
686
+ # Extremely stable (tripod, static drone) - just 3 samples
687
+ if motion.duration < 10:
688
+ timestamps = [motion.duration / 2]
689
+ else:
690
+ # Start, middle, end
691
+ timestamps = [
692
+ motion.duration * 0.15,
693
+ motion.duration * 0.5,
694
+ motion.duration * 0.85,
695
+ ]
696
+ logger.info(f"Stable video optimization: {len(timestamps)} frames only " f"(avg_intensity={motion.avg_intensity:.1f})")
697
+ return timestamps
698
+
699
+ if motion.is_stable:
700
+ # Mostly stable - cap at 5 samples spread across duration
701
+ num_samples = min(5, max(1, int(motion.duration / 10)))
702
+ if num_samples == 1:
703
+ timestamps = [motion.duration / 2]
704
+ else:
705
+ step = motion.duration / (num_samples + 1)
706
+ timestamps = [step * (i + 1) for i in range(num_samples)]
707
+ logger.info(f"Stable video: {len(timestamps)} frames " f"(avg_intensity={motion.avg_intensity:.1f})")
708
+ return timestamps
709
+
710
+ if not motion.segments:
711
+ # No segments but not stable - sample at moderate rate
712
+ interval = 2.0 # 0.5 fps
713
+ timestamps = [t for t in _frange(0.1, motion.duration - 0.1, interval)]
714
+ timestamps = timestamps[:max_samples]
715
+ return timestamps
716
+
717
+ timestamps: list[float] = []
718
+
719
+ for seg in motion.segments:
720
+ intensity = seg.intensity
721
+
722
+ # Map intensity to fps
723
+ if intensity < 0.5:
724
+ fps = min_fps
725
+ elif intensity < 2.0:
726
+ fps = 0.25
727
+ elif intensity < 4.0:
728
+ fps = 0.5
729
+ elif intensity < 6.0:
730
+ fps = 1.0
731
+ else:
732
+ fps = max_fps
733
+
734
+ # Generate timestamps for this segment
735
+ interval = 1.0 / fps
736
+ t = seg.start + 0.1 # Start slightly after segment boundary
737
+ while t < seg.end - 0.1:
738
+ timestamps.append(t)
739
+ t += interval
740
+
741
+ # Always include at least one sample per segment
742
+ if not any(seg.start <= ts <= seg.end for ts in timestamps):
743
+ timestamps.append((seg.start + seg.end) / 2)
744
+
745
+ # Remove duplicates and sort
746
+ timestamps = sorted(set(timestamps))
747
+
748
+ # Cap at max_samples, keeping even distribution
749
+ if len(timestamps) > max_samples:
750
+ step = len(timestamps) / max_samples
751
+ timestamps = [timestamps[int(i * step)] for i in range(max_samples)]
752
+
753
+ # Ensure timestamps are within video bounds
754
+ timestamps = [max(0.1, min(t, motion.duration - 0.1)) for t in timestamps]
755
+
756
+ logger.info(f"Adaptive sampling: {len(timestamps)} frames " f"(avg_intensity={motion.avg_intensity:.1f}, stable={motion.is_stable})")
757
+
758
+ return timestamps
759
+
760
+
761
+ def _frange(start: float, stop: float, step: float):
762
+ """Float range generator."""
763
+ while start < stop:
764
+ yield start
765
+ start += step