media-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cli/clip.py +79 -0
  2. cli/faces.py +91 -0
  3. cli/metadata.py +68 -0
  4. cli/motion.py +77 -0
  5. cli/objects.py +94 -0
  6. cli/ocr.py +93 -0
  7. cli/scenes.py +57 -0
  8. cli/telemetry.py +65 -0
  9. cli/transcript.py +76 -0
  10. media_engine/__init__.py +7 -0
  11. media_engine/_version.py +34 -0
  12. media_engine/app.py +80 -0
  13. media_engine/batch/__init__.py +56 -0
  14. media_engine/batch/models.py +99 -0
  15. media_engine/batch/processor.py +1131 -0
  16. media_engine/batch/queue.py +232 -0
  17. media_engine/batch/state.py +30 -0
  18. media_engine/batch/timing.py +321 -0
  19. media_engine/cli.py +17 -0
  20. media_engine/config.py +674 -0
  21. media_engine/extractors/__init__.py +75 -0
  22. media_engine/extractors/clip.py +401 -0
  23. media_engine/extractors/faces.py +459 -0
  24. media_engine/extractors/frame_buffer.py +351 -0
  25. media_engine/extractors/frames.py +402 -0
  26. media_engine/extractors/metadata/__init__.py +127 -0
  27. media_engine/extractors/metadata/apple.py +169 -0
  28. media_engine/extractors/metadata/arri.py +118 -0
  29. media_engine/extractors/metadata/avchd.py +208 -0
  30. media_engine/extractors/metadata/avchd_gps.py +270 -0
  31. media_engine/extractors/metadata/base.py +688 -0
  32. media_engine/extractors/metadata/blackmagic.py +139 -0
  33. media_engine/extractors/metadata/camera_360.py +276 -0
  34. media_engine/extractors/metadata/canon.py +290 -0
  35. media_engine/extractors/metadata/dji.py +371 -0
  36. media_engine/extractors/metadata/dv.py +121 -0
  37. media_engine/extractors/metadata/ffmpeg.py +76 -0
  38. media_engine/extractors/metadata/generic.py +119 -0
  39. media_engine/extractors/metadata/gopro.py +256 -0
  40. media_engine/extractors/metadata/red.py +305 -0
  41. media_engine/extractors/metadata/registry.py +114 -0
  42. media_engine/extractors/metadata/sony.py +442 -0
  43. media_engine/extractors/metadata/tesla.py +157 -0
  44. media_engine/extractors/motion.py +765 -0
  45. media_engine/extractors/objects.py +245 -0
  46. media_engine/extractors/objects_qwen.py +754 -0
  47. media_engine/extractors/ocr.py +268 -0
  48. media_engine/extractors/scenes.py +82 -0
  49. media_engine/extractors/shot_type.py +217 -0
  50. media_engine/extractors/telemetry.py +262 -0
  51. media_engine/extractors/transcribe.py +579 -0
  52. media_engine/extractors/translate.py +121 -0
  53. media_engine/extractors/vad.py +263 -0
  54. media_engine/main.py +68 -0
  55. media_engine/py.typed +0 -0
  56. media_engine/routers/__init__.py +15 -0
  57. media_engine/routers/batch.py +78 -0
  58. media_engine/routers/health.py +93 -0
  59. media_engine/routers/models.py +211 -0
  60. media_engine/routers/settings.py +87 -0
  61. media_engine/routers/utils.py +135 -0
  62. media_engine/schemas.py +581 -0
  63. media_engine/utils/__init__.py +5 -0
  64. media_engine/utils/logging.py +54 -0
  65. media_engine/utils/memory.py +49 -0
  66. media_engine-0.1.0.dist-info/METADATA +276 -0
  67. media_engine-0.1.0.dist-info/RECORD +70 -0
  68. media_engine-0.1.0.dist-info/WHEEL +4 -0
  69. media_engine-0.1.0.dist-info/entry_points.txt +11 -0
  70. media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,351 @@
1
+ """Shared frame buffer for efficient video processing.
2
+
3
+ Decodes video frames once and shares them across multiple extractors,
4
+ eliminating redundant video decoding which is the performance bottleneck.
5
+ """
6
+
7
+ import logging
8
+ import platform
9
+ import subprocess
10
+ from dataclasses import dataclass, field
11
+
12
+ import cv2
13
+ import numpy as np
14
+ from PIL import Image
15
+
16
+ from media_engine.extractors.metadata.base import get_video_info
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Cache for hardware acceleration detection
21
+ _hwaccel_cache: str | None = None
22
+
23
+
24
+ def _detect_hwaccel() -> str | None:
25
+ """Detect available hardware acceleration for video decoding.
26
+
27
+ Returns:
28
+ Hardware acceleration method name, or None if not available.
29
+ - "videotoolbox" for macOS
30
+ - "cuda" for NVIDIA GPUs
31
+ - None for software decoding
32
+ """
33
+ global _hwaccel_cache
34
+
35
+ if _hwaccel_cache is not None:
36
+ return _hwaccel_cache if _hwaccel_cache != "" else None
37
+
38
+ system = platform.system()
39
+
40
+ if system == "Darwin":
41
+ try:
42
+ result = subprocess.run(
43
+ ["ffmpeg", "-hwaccels"],
44
+ capture_output=True,
45
+ text=True,
46
+ timeout=5,
47
+ )
48
+ if "videotoolbox" in result.stdout:
49
+ logger.info("Hardware acceleration: VideoToolbox (macOS)")
50
+ _hwaccel_cache = "videotoolbox"
51
+ return "videotoolbox"
52
+ except Exception:
53
+ pass
54
+
55
+ elif system == "Linux":
56
+ try:
57
+ result = subprocess.run(
58
+ ["ffmpeg", "-hwaccels"],
59
+ capture_output=True,
60
+ text=True,
61
+ timeout=5,
62
+ )
63
+ if "cuda" in result.stdout:
64
+ nvidia_check = subprocess.run(
65
+ ["nvidia-smi", "-L"],
66
+ capture_output=True,
67
+ timeout=5,
68
+ )
69
+ if nvidia_check.returncode == 0:
70
+ logger.info("Hardware acceleration: CUDA (NVIDIA)")
71
+ _hwaccel_cache = "cuda"
72
+ return "cuda"
73
+ except Exception:
74
+ pass
75
+
76
+ logger.info("Hardware acceleration: None (software decoding)")
77
+ _hwaccel_cache = ""
78
+ return None
79
+
80
+
81
+ @dataclass(slots=True)
82
+ class SharedFrame:
83
+ """A decoded frame with lazy format conversions.
84
+
85
+ Stores the original BGR frame and provides lazy conversion to other formats
86
+ (RGB, grayscale, PIL) to minimize memory usage and conversion overhead.
87
+
88
+ Uses slots=True to reduce memory overhead per instance.
89
+ """
90
+
91
+ timestamp: float
92
+ bgr: np.ndarray
93
+ _rgb: np.ndarray | None = field(default=None, repr=False)
94
+ _gray: np.ndarray | None = field(default=None, repr=False)
95
+ _pil: Image.Image | None = field(default=None, repr=False)
96
+
97
+ @property
98
+ def rgb(self) -> np.ndarray:
99
+ """Get RGB format (lazy conversion from BGR)."""
100
+ if self._rgb is None:
101
+ self._rgb = cv2.cvtColor(self.bgr, cv2.COLOR_BGR2RGB)
102
+ return self._rgb
103
+
104
+ @property
105
+ def gray(self) -> np.ndarray:
106
+ """Get grayscale format (lazy conversion from BGR)."""
107
+ if self._gray is None:
108
+ self._gray = cv2.cvtColor(self.bgr, cv2.COLOR_BGR2GRAY)
109
+ return self._gray
110
+
111
+ @property
112
+ def pil(self) -> Image.Image:
113
+ """Get PIL Image format (lazy conversion from RGB)."""
114
+ if self._pil is None:
115
+ self._pil = Image.fromarray(self.rgb)
116
+ return self._pil
117
+
118
+
119
+ @dataclass(slots=True)
120
+ class SharedFrameBuffer:
121
+ """Buffer of decoded frames for a video file.
122
+
123
+ Holds pre-decoded frames that can be shared across multiple extractors,
124
+ eliminating redundant video decoding.
125
+
126
+ Uses slots=True to reduce memory overhead.
127
+ """
128
+
129
+ file_path: str
130
+ frames: dict[float, SharedFrame] # timestamp -> frame
131
+ width: int
132
+ height: int
133
+
134
+ def __len__(self) -> int:
135
+ return len(self.frames)
136
+
137
+ def get_frame(self, timestamp: float) -> SharedFrame | None:
138
+ """Get frame at exact timestamp."""
139
+ return self.frames.get(timestamp)
140
+
141
+ def get_nearest_frame(self, timestamp: float) -> SharedFrame | None:
142
+ """Get frame nearest to timestamp."""
143
+ if not self.frames:
144
+ return None
145
+ nearest_ts = min(self.frames.keys(), key=lambda t: abs(t - timestamp))
146
+ return self.frames[nearest_ts]
147
+
148
+ def timestamps(self) -> list[float]:
149
+ """Get sorted list of all timestamps."""
150
+ return sorted(self.frames.keys())
151
+
152
+
153
+ def _extract_single_frame(
154
+ file_path: str,
155
+ timestamp: float,
156
+ out_width: int,
157
+ out_height: int,
158
+ hwaccel: str | None,
159
+ src_width: int,
160
+ src_height: int,
161
+ ) -> np.ndarray | None:
162
+ """Extract a single frame at the given timestamp.
163
+
164
+ Returns BGR numpy array or None on failure.
165
+ """
166
+ cmd = ["ffmpeg", "-hide_banner"]
167
+
168
+ # Calculate output height for hardware scaling (maintains aspect ratio)
169
+ if hwaccel and src_width > 0 and src_height > 0:
170
+ actual_out_height = int(out_width * src_height / src_width)
171
+ actual_out_height = actual_out_height - (actual_out_height % 2)
172
+ else:
173
+ actual_out_height = out_height
174
+
175
+ # Build filter chain based on hardware acceleration
176
+ if hwaccel == "videotoolbox":
177
+ cmd.extend(["-hwaccel", "videotoolbox", "-hwaccel_output_format", "videotoolbox_vld"])
178
+ vf_filter = f"scale_vt=w={out_width}:h={actual_out_height},hwdownload,format=p010le"
179
+ elif hwaccel == "cuda":
180
+ cmd.extend(["-hwaccel", "cuda", "-hwaccel_output_format", "cuda"])
181
+ vf_filter = f"scale_cuda={out_width}:{actual_out_height},hwdownload,format=nv12"
182
+ else:
183
+ actual_out_height = out_height
184
+ vf_filter = f"scale={out_width}:{out_height}:force_original_aspect_ratio=decrease"
185
+
186
+ cmd.extend(
187
+ [
188
+ "-ss",
189
+ str(timestamp),
190
+ "-i",
191
+ file_path,
192
+ "-vf",
193
+ vf_filter,
194
+ "-frames:v",
195
+ "1",
196
+ "-f",
197
+ "rawvideo",
198
+ "-pix_fmt",
199
+ "bgr24",
200
+ "-",
201
+ ]
202
+ )
203
+
204
+ process: subprocess.Popen[bytes] | None = None
205
+ try:
206
+ process = subprocess.Popen(
207
+ cmd,
208
+ stdout=subprocess.PIPE,
209
+ stderr=subprocess.PIPE,
210
+ )
211
+
212
+ frame_size = out_width * actual_out_height * 3 # BGR = 3 channels
213
+ raw_frame, _ = process.communicate(timeout=30)
214
+
215
+ if len(raw_frame) != frame_size:
216
+ # Try without hardware acceleration
217
+ if hwaccel:
218
+ logger.debug(f"Hardware decode failed for frame at {timestamp}s, trying software")
219
+ return _extract_single_frame(
220
+ file_path,
221
+ timestamp,
222
+ out_width,
223
+ out_height,
224
+ None,
225
+ src_width,
226
+ src_height,
227
+ )
228
+ return None
229
+
230
+ frame = np.frombuffer(raw_frame, dtype=np.uint8).reshape((actual_out_height, out_width, 3))
231
+ return frame.copy()
232
+
233
+ except subprocess.TimeoutExpired:
234
+ if process is not None:
235
+ process.kill()
236
+ logger.warning(f"Timeout extracting frame at {timestamp}s")
237
+ return None
238
+ except Exception as e:
239
+ logger.warning(f"Error extracting frame at {timestamp}s: {e}")
240
+ return None
241
+
242
+
243
+ def decode_frames(
244
+ file_path: str,
245
+ timestamps: list[float],
246
+ max_dimension: int = 1920,
247
+ hwaccel: str | None = None,
248
+ ) -> SharedFrameBuffer:
249
+ """Decode specific frames from video with hardware acceleration.
250
+
251
+ Uses VideoToolbox on macOS, CUDA on Linux, with automatic fallback
252
+ to software decoding if hardware fails.
253
+
254
+ Args:
255
+ file_path: Path to video file
256
+ timestamps: List of timestamps (in seconds) to extract
257
+ max_dimension: Maximum width or height (maintains aspect ratio)
258
+ hwaccel: Hardware acceleration method (auto-detect if None)
259
+
260
+ Returns:
261
+ SharedFrameBuffer with decoded frames
262
+
263
+ Raises:
264
+ FileNotFoundError: If the video file doesn't exist
265
+ """
266
+ from pathlib import Path
267
+
268
+ if not Path(file_path).exists():
269
+ raise FileNotFoundError(f"Video file not found: {file_path}")
270
+
271
+ if hwaccel is None:
272
+ hwaccel = _detect_hwaccel()
273
+
274
+ # Get video info
275
+ _, duration, src_width, src_height = get_video_info(file_path)
276
+
277
+ # Calculate output dimensions (maintain aspect ratio, cap at max_dimension)
278
+ if src_width > src_height:
279
+ out_width = min(max_dimension, src_width)
280
+ out_height = int(out_width * src_height / src_width)
281
+ else:
282
+ out_height = min(max_dimension, src_height)
283
+ out_width = int(out_height * src_width / src_height)
284
+
285
+ # Ensure even dimensions
286
+ out_width = out_width - (out_width % 2)
287
+ out_height = out_height - (out_height % 2)
288
+
289
+ logger.info(f"Decoding {len(timestamps)} frames from {file_path} " f"at {out_width}x{out_height}" + (f" (hwaccel={hwaccel})" if hwaccel else ""))
290
+
291
+ frames: dict[float, SharedFrame] = {}
292
+
293
+ for ts in timestamps:
294
+ if ts < 0 or ts > duration:
295
+ logger.debug(f"Skipping out-of-range timestamp: {ts}")
296
+ continue
297
+
298
+ frame_bgr = _extract_single_frame(file_path, ts, out_width, out_height, hwaccel, src_width, src_height)
299
+ if frame_bgr is not None:
300
+ frames[ts] = SharedFrame(timestamp=ts, bgr=frame_bgr)
301
+ else:
302
+ logger.debug(f"Failed to decode frame at {ts}s")
303
+
304
+ logger.info(f"Decoded {len(frames)}/{len(timestamps)} frames successfully")
305
+
306
+ return SharedFrameBuffer(
307
+ file_path=file_path,
308
+ frames=frames,
309
+ width=out_width,
310
+ height=out_height,
311
+ )
312
+
313
+
314
+ def get_extractor_timestamps(
315
+ is_stable: bool,
316
+ avg_intensity: float,
317
+ base_timestamps: list[float],
318
+ ) -> list[float]:
319
+ """Filter timestamps based on motion analysis for efficient sampling.
320
+
321
+ For stable footage, reduces sampling significantly since content
322
+ doesn't change much between frames.
323
+
324
+ Args:
325
+ is_stable: Whether motion analysis determined footage is stable
326
+ avg_intensity: Average motion intensity from motion analysis
327
+ base_timestamps: Full list of timestamps from adaptive sampling
328
+
329
+ Returns:
330
+ Filtered list of timestamps appropriate for the motion level
331
+ """
332
+ if not base_timestamps:
333
+ return []
334
+
335
+ if is_stable and avg_intensity < 1.0:
336
+ # Very stable (tripod, hover) - just 3 frames
337
+ if len(base_timestamps) <= 3:
338
+ return base_timestamps
339
+ return [
340
+ base_timestamps[0],
341
+ base_timestamps[len(base_timestamps) // 2],
342
+ base_timestamps[-1],
343
+ ]
344
+
345
+ if is_stable:
346
+ # Mostly stable - reduce to every 4th timestamp
347
+ result = base_timestamps[::4]
348
+ return result if result else base_timestamps[:1]
349
+
350
+ # Dynamic footage - use all timestamps
351
+ return base_timestamps