media-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/clip.py +79 -0
- cli/faces.py +91 -0
- cli/metadata.py +68 -0
- cli/motion.py +77 -0
- cli/objects.py +94 -0
- cli/ocr.py +93 -0
- cli/scenes.py +57 -0
- cli/telemetry.py +65 -0
- cli/transcript.py +76 -0
- media_engine/__init__.py +7 -0
- media_engine/_version.py +34 -0
- media_engine/app.py +80 -0
- media_engine/batch/__init__.py +56 -0
- media_engine/batch/models.py +99 -0
- media_engine/batch/processor.py +1131 -0
- media_engine/batch/queue.py +232 -0
- media_engine/batch/state.py +30 -0
- media_engine/batch/timing.py +321 -0
- media_engine/cli.py +17 -0
- media_engine/config.py +674 -0
- media_engine/extractors/__init__.py +75 -0
- media_engine/extractors/clip.py +401 -0
- media_engine/extractors/faces.py +459 -0
- media_engine/extractors/frame_buffer.py +351 -0
- media_engine/extractors/frames.py +402 -0
- media_engine/extractors/metadata/__init__.py +127 -0
- media_engine/extractors/metadata/apple.py +169 -0
- media_engine/extractors/metadata/arri.py +118 -0
- media_engine/extractors/metadata/avchd.py +208 -0
- media_engine/extractors/metadata/avchd_gps.py +270 -0
- media_engine/extractors/metadata/base.py +688 -0
- media_engine/extractors/metadata/blackmagic.py +139 -0
- media_engine/extractors/metadata/camera_360.py +276 -0
- media_engine/extractors/metadata/canon.py +290 -0
- media_engine/extractors/metadata/dji.py +371 -0
- media_engine/extractors/metadata/dv.py +121 -0
- media_engine/extractors/metadata/ffmpeg.py +76 -0
- media_engine/extractors/metadata/generic.py +119 -0
- media_engine/extractors/metadata/gopro.py +256 -0
- media_engine/extractors/metadata/red.py +305 -0
- media_engine/extractors/metadata/registry.py +114 -0
- media_engine/extractors/metadata/sony.py +442 -0
- media_engine/extractors/metadata/tesla.py +157 -0
- media_engine/extractors/motion.py +765 -0
- media_engine/extractors/objects.py +245 -0
- media_engine/extractors/objects_qwen.py +754 -0
- media_engine/extractors/ocr.py +268 -0
- media_engine/extractors/scenes.py +82 -0
- media_engine/extractors/shot_type.py +217 -0
- media_engine/extractors/telemetry.py +262 -0
- media_engine/extractors/transcribe.py +579 -0
- media_engine/extractors/translate.py +121 -0
- media_engine/extractors/vad.py +263 -0
- media_engine/main.py +68 -0
- media_engine/py.typed +0 -0
- media_engine/routers/__init__.py +15 -0
- media_engine/routers/batch.py +78 -0
- media_engine/routers/health.py +93 -0
- media_engine/routers/models.py +211 -0
- media_engine/routers/settings.py +87 -0
- media_engine/routers/utils.py +135 -0
- media_engine/schemas.py +581 -0
- media_engine/utils/__init__.py +5 -0
- media_engine/utils/logging.py +54 -0
- media_engine/utils/memory.py +49 -0
- media_engine-0.1.0.dist-info/METADATA +276 -0
- media_engine-0.1.0.dist-info/RECORD +70 -0
- media_engine-0.1.0.dist-info/WHEEL +4 -0
- media_engine-0.1.0.dist-info/entry_points.txt +11 -0
- media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
"""Shared frame buffer for efficient video processing.
|
|
2
|
+
|
|
3
|
+
Decodes video frames once and shares them across multiple extractors,
|
|
4
|
+
eliminating redundant video decoding which is the performance bottleneck.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
import platform
|
|
9
|
+
import subprocess
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
|
|
12
|
+
import cv2
|
|
13
|
+
import numpy as np
|
|
14
|
+
from PIL import Image
|
|
15
|
+
|
|
16
|
+
from media_engine.extractors.metadata.base import get_video_info
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# Cache for hardware acceleration detection
|
|
21
|
+
_hwaccel_cache: str | None = None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _detect_hwaccel() -> str | None:
|
|
25
|
+
"""Detect available hardware acceleration for video decoding.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Hardware acceleration method name, or None if not available.
|
|
29
|
+
- "videotoolbox" for macOS
|
|
30
|
+
- "cuda" for NVIDIA GPUs
|
|
31
|
+
- None for software decoding
|
|
32
|
+
"""
|
|
33
|
+
global _hwaccel_cache
|
|
34
|
+
|
|
35
|
+
if _hwaccel_cache is not None:
|
|
36
|
+
return _hwaccel_cache if _hwaccel_cache != "" else None
|
|
37
|
+
|
|
38
|
+
system = platform.system()
|
|
39
|
+
|
|
40
|
+
if system == "Darwin":
|
|
41
|
+
try:
|
|
42
|
+
result = subprocess.run(
|
|
43
|
+
["ffmpeg", "-hwaccels"],
|
|
44
|
+
capture_output=True,
|
|
45
|
+
text=True,
|
|
46
|
+
timeout=5,
|
|
47
|
+
)
|
|
48
|
+
if "videotoolbox" in result.stdout:
|
|
49
|
+
logger.info("Hardware acceleration: VideoToolbox (macOS)")
|
|
50
|
+
_hwaccel_cache = "videotoolbox"
|
|
51
|
+
return "videotoolbox"
|
|
52
|
+
except Exception:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
elif system == "Linux":
|
|
56
|
+
try:
|
|
57
|
+
result = subprocess.run(
|
|
58
|
+
["ffmpeg", "-hwaccels"],
|
|
59
|
+
capture_output=True,
|
|
60
|
+
text=True,
|
|
61
|
+
timeout=5,
|
|
62
|
+
)
|
|
63
|
+
if "cuda" in result.stdout:
|
|
64
|
+
nvidia_check = subprocess.run(
|
|
65
|
+
["nvidia-smi", "-L"],
|
|
66
|
+
capture_output=True,
|
|
67
|
+
timeout=5,
|
|
68
|
+
)
|
|
69
|
+
if nvidia_check.returncode == 0:
|
|
70
|
+
logger.info("Hardware acceleration: CUDA (NVIDIA)")
|
|
71
|
+
_hwaccel_cache = "cuda"
|
|
72
|
+
return "cuda"
|
|
73
|
+
except Exception:
|
|
74
|
+
pass
|
|
75
|
+
|
|
76
|
+
logger.info("Hardware acceleration: None (software decoding)")
|
|
77
|
+
_hwaccel_cache = ""
|
|
78
|
+
return None
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@dataclass(slots=True)
|
|
82
|
+
class SharedFrame:
|
|
83
|
+
"""A decoded frame with lazy format conversions.
|
|
84
|
+
|
|
85
|
+
Stores the original BGR frame and provides lazy conversion to other formats
|
|
86
|
+
(RGB, grayscale, PIL) to minimize memory usage and conversion overhead.
|
|
87
|
+
|
|
88
|
+
Uses slots=True to reduce memory overhead per instance.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
timestamp: float
|
|
92
|
+
bgr: np.ndarray
|
|
93
|
+
_rgb: np.ndarray | None = field(default=None, repr=False)
|
|
94
|
+
_gray: np.ndarray | None = field(default=None, repr=False)
|
|
95
|
+
_pil: Image.Image | None = field(default=None, repr=False)
|
|
96
|
+
|
|
97
|
+
@property
|
|
98
|
+
def rgb(self) -> np.ndarray:
|
|
99
|
+
"""Get RGB format (lazy conversion from BGR)."""
|
|
100
|
+
if self._rgb is None:
|
|
101
|
+
self._rgb = cv2.cvtColor(self.bgr, cv2.COLOR_BGR2RGB)
|
|
102
|
+
return self._rgb
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def gray(self) -> np.ndarray:
|
|
106
|
+
"""Get grayscale format (lazy conversion from BGR)."""
|
|
107
|
+
if self._gray is None:
|
|
108
|
+
self._gray = cv2.cvtColor(self.bgr, cv2.COLOR_BGR2GRAY)
|
|
109
|
+
return self._gray
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def pil(self) -> Image.Image:
|
|
113
|
+
"""Get PIL Image format (lazy conversion from RGB)."""
|
|
114
|
+
if self._pil is None:
|
|
115
|
+
self._pil = Image.fromarray(self.rgb)
|
|
116
|
+
return self._pil
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@dataclass(slots=True)
|
|
120
|
+
class SharedFrameBuffer:
|
|
121
|
+
"""Buffer of decoded frames for a video file.
|
|
122
|
+
|
|
123
|
+
Holds pre-decoded frames that can be shared across multiple extractors,
|
|
124
|
+
eliminating redundant video decoding.
|
|
125
|
+
|
|
126
|
+
Uses slots=True to reduce memory overhead.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
file_path: str
|
|
130
|
+
frames: dict[float, SharedFrame] # timestamp -> frame
|
|
131
|
+
width: int
|
|
132
|
+
height: int
|
|
133
|
+
|
|
134
|
+
def __len__(self) -> int:
|
|
135
|
+
return len(self.frames)
|
|
136
|
+
|
|
137
|
+
def get_frame(self, timestamp: float) -> SharedFrame | None:
|
|
138
|
+
"""Get frame at exact timestamp."""
|
|
139
|
+
return self.frames.get(timestamp)
|
|
140
|
+
|
|
141
|
+
def get_nearest_frame(self, timestamp: float) -> SharedFrame | None:
|
|
142
|
+
"""Get frame nearest to timestamp."""
|
|
143
|
+
if not self.frames:
|
|
144
|
+
return None
|
|
145
|
+
nearest_ts = min(self.frames.keys(), key=lambda t: abs(t - timestamp))
|
|
146
|
+
return self.frames[nearest_ts]
|
|
147
|
+
|
|
148
|
+
def timestamps(self) -> list[float]:
|
|
149
|
+
"""Get sorted list of all timestamps."""
|
|
150
|
+
return sorted(self.frames.keys())
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _extract_single_frame(
|
|
154
|
+
file_path: str,
|
|
155
|
+
timestamp: float,
|
|
156
|
+
out_width: int,
|
|
157
|
+
out_height: int,
|
|
158
|
+
hwaccel: str | None,
|
|
159
|
+
src_width: int,
|
|
160
|
+
src_height: int,
|
|
161
|
+
) -> np.ndarray | None:
|
|
162
|
+
"""Extract a single frame at the given timestamp.
|
|
163
|
+
|
|
164
|
+
Returns BGR numpy array or None on failure.
|
|
165
|
+
"""
|
|
166
|
+
cmd = ["ffmpeg", "-hide_banner"]
|
|
167
|
+
|
|
168
|
+
# Calculate output height for hardware scaling (maintains aspect ratio)
|
|
169
|
+
if hwaccel and src_width > 0 and src_height > 0:
|
|
170
|
+
actual_out_height = int(out_width * src_height / src_width)
|
|
171
|
+
actual_out_height = actual_out_height - (actual_out_height % 2)
|
|
172
|
+
else:
|
|
173
|
+
actual_out_height = out_height
|
|
174
|
+
|
|
175
|
+
# Build filter chain based on hardware acceleration
|
|
176
|
+
if hwaccel == "videotoolbox":
|
|
177
|
+
cmd.extend(["-hwaccel", "videotoolbox", "-hwaccel_output_format", "videotoolbox_vld"])
|
|
178
|
+
vf_filter = f"scale_vt=w={out_width}:h={actual_out_height},hwdownload,format=p010le"
|
|
179
|
+
elif hwaccel == "cuda":
|
|
180
|
+
cmd.extend(["-hwaccel", "cuda", "-hwaccel_output_format", "cuda"])
|
|
181
|
+
vf_filter = f"scale_cuda={out_width}:{actual_out_height},hwdownload,format=nv12"
|
|
182
|
+
else:
|
|
183
|
+
actual_out_height = out_height
|
|
184
|
+
vf_filter = f"scale={out_width}:{out_height}:force_original_aspect_ratio=decrease"
|
|
185
|
+
|
|
186
|
+
cmd.extend(
|
|
187
|
+
[
|
|
188
|
+
"-ss",
|
|
189
|
+
str(timestamp),
|
|
190
|
+
"-i",
|
|
191
|
+
file_path,
|
|
192
|
+
"-vf",
|
|
193
|
+
vf_filter,
|
|
194
|
+
"-frames:v",
|
|
195
|
+
"1",
|
|
196
|
+
"-f",
|
|
197
|
+
"rawvideo",
|
|
198
|
+
"-pix_fmt",
|
|
199
|
+
"bgr24",
|
|
200
|
+
"-",
|
|
201
|
+
]
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
process: subprocess.Popen[bytes] | None = None
|
|
205
|
+
try:
|
|
206
|
+
process = subprocess.Popen(
|
|
207
|
+
cmd,
|
|
208
|
+
stdout=subprocess.PIPE,
|
|
209
|
+
stderr=subprocess.PIPE,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
frame_size = out_width * actual_out_height * 3 # BGR = 3 channels
|
|
213
|
+
raw_frame, _ = process.communicate(timeout=30)
|
|
214
|
+
|
|
215
|
+
if len(raw_frame) != frame_size:
|
|
216
|
+
# Try without hardware acceleration
|
|
217
|
+
if hwaccel:
|
|
218
|
+
logger.debug(f"Hardware decode failed for frame at {timestamp}s, trying software")
|
|
219
|
+
return _extract_single_frame(
|
|
220
|
+
file_path,
|
|
221
|
+
timestamp,
|
|
222
|
+
out_width,
|
|
223
|
+
out_height,
|
|
224
|
+
None,
|
|
225
|
+
src_width,
|
|
226
|
+
src_height,
|
|
227
|
+
)
|
|
228
|
+
return None
|
|
229
|
+
|
|
230
|
+
frame = np.frombuffer(raw_frame, dtype=np.uint8).reshape((actual_out_height, out_width, 3))
|
|
231
|
+
return frame.copy()
|
|
232
|
+
|
|
233
|
+
except subprocess.TimeoutExpired:
|
|
234
|
+
if process is not None:
|
|
235
|
+
process.kill()
|
|
236
|
+
logger.warning(f"Timeout extracting frame at {timestamp}s")
|
|
237
|
+
return None
|
|
238
|
+
except Exception as e:
|
|
239
|
+
logger.warning(f"Error extracting frame at {timestamp}s: {e}")
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def decode_frames(
|
|
244
|
+
file_path: str,
|
|
245
|
+
timestamps: list[float],
|
|
246
|
+
max_dimension: int = 1920,
|
|
247
|
+
hwaccel: str | None = None,
|
|
248
|
+
) -> SharedFrameBuffer:
|
|
249
|
+
"""Decode specific frames from video with hardware acceleration.
|
|
250
|
+
|
|
251
|
+
Uses VideoToolbox on macOS, CUDA on Linux, with automatic fallback
|
|
252
|
+
to software decoding if hardware fails.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
file_path: Path to video file
|
|
256
|
+
timestamps: List of timestamps (in seconds) to extract
|
|
257
|
+
max_dimension: Maximum width or height (maintains aspect ratio)
|
|
258
|
+
hwaccel: Hardware acceleration method (auto-detect if None)
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
SharedFrameBuffer with decoded frames
|
|
262
|
+
|
|
263
|
+
Raises:
|
|
264
|
+
FileNotFoundError: If the video file doesn't exist
|
|
265
|
+
"""
|
|
266
|
+
from pathlib import Path
|
|
267
|
+
|
|
268
|
+
if not Path(file_path).exists():
|
|
269
|
+
raise FileNotFoundError(f"Video file not found: {file_path}")
|
|
270
|
+
|
|
271
|
+
if hwaccel is None:
|
|
272
|
+
hwaccel = _detect_hwaccel()
|
|
273
|
+
|
|
274
|
+
# Get video info
|
|
275
|
+
_, duration, src_width, src_height = get_video_info(file_path)
|
|
276
|
+
|
|
277
|
+
# Calculate output dimensions (maintain aspect ratio, cap at max_dimension)
|
|
278
|
+
if src_width > src_height:
|
|
279
|
+
out_width = min(max_dimension, src_width)
|
|
280
|
+
out_height = int(out_width * src_height / src_width)
|
|
281
|
+
else:
|
|
282
|
+
out_height = min(max_dimension, src_height)
|
|
283
|
+
out_width = int(out_height * src_width / src_height)
|
|
284
|
+
|
|
285
|
+
# Ensure even dimensions
|
|
286
|
+
out_width = out_width - (out_width % 2)
|
|
287
|
+
out_height = out_height - (out_height % 2)
|
|
288
|
+
|
|
289
|
+
logger.info(f"Decoding {len(timestamps)} frames from {file_path} " f"at {out_width}x{out_height}" + (f" (hwaccel={hwaccel})" if hwaccel else ""))
|
|
290
|
+
|
|
291
|
+
frames: dict[float, SharedFrame] = {}
|
|
292
|
+
|
|
293
|
+
for ts in timestamps:
|
|
294
|
+
if ts < 0 or ts > duration:
|
|
295
|
+
logger.debug(f"Skipping out-of-range timestamp: {ts}")
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
frame_bgr = _extract_single_frame(file_path, ts, out_width, out_height, hwaccel, src_width, src_height)
|
|
299
|
+
if frame_bgr is not None:
|
|
300
|
+
frames[ts] = SharedFrame(timestamp=ts, bgr=frame_bgr)
|
|
301
|
+
else:
|
|
302
|
+
logger.debug(f"Failed to decode frame at {ts}s")
|
|
303
|
+
|
|
304
|
+
logger.info(f"Decoded {len(frames)}/{len(timestamps)} frames successfully")
|
|
305
|
+
|
|
306
|
+
return SharedFrameBuffer(
|
|
307
|
+
file_path=file_path,
|
|
308
|
+
frames=frames,
|
|
309
|
+
width=out_width,
|
|
310
|
+
height=out_height,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def get_extractor_timestamps(
|
|
315
|
+
is_stable: bool,
|
|
316
|
+
avg_intensity: float,
|
|
317
|
+
base_timestamps: list[float],
|
|
318
|
+
) -> list[float]:
|
|
319
|
+
"""Filter timestamps based on motion analysis for efficient sampling.
|
|
320
|
+
|
|
321
|
+
For stable footage, reduces sampling significantly since content
|
|
322
|
+
doesn't change much between frames.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
is_stable: Whether motion analysis determined footage is stable
|
|
326
|
+
avg_intensity: Average motion intensity from motion analysis
|
|
327
|
+
base_timestamps: Full list of timestamps from adaptive sampling
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
Filtered list of timestamps appropriate for the motion level
|
|
331
|
+
"""
|
|
332
|
+
if not base_timestamps:
|
|
333
|
+
return []
|
|
334
|
+
|
|
335
|
+
if is_stable and avg_intensity < 1.0:
|
|
336
|
+
# Very stable (tripod, hover) - just 3 frames
|
|
337
|
+
if len(base_timestamps) <= 3:
|
|
338
|
+
return base_timestamps
|
|
339
|
+
return [
|
|
340
|
+
base_timestamps[0],
|
|
341
|
+
base_timestamps[len(base_timestamps) // 2],
|
|
342
|
+
base_timestamps[-1],
|
|
343
|
+
]
|
|
344
|
+
|
|
345
|
+
if is_stable:
|
|
346
|
+
# Mostly stable - reduce to every 4th timestamp
|
|
347
|
+
result = base_timestamps[::4]
|
|
348
|
+
return result if result else base_timestamps[:1]
|
|
349
|
+
|
|
350
|
+
# Dynamic footage - use all timestamps
|
|
351
|
+
return base_timestamps
|