media-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/clip.py +79 -0
- cli/faces.py +91 -0
- cli/metadata.py +68 -0
- cli/motion.py +77 -0
- cli/objects.py +94 -0
- cli/ocr.py +93 -0
- cli/scenes.py +57 -0
- cli/telemetry.py +65 -0
- cli/transcript.py +76 -0
- media_engine/__init__.py +7 -0
- media_engine/_version.py +34 -0
- media_engine/app.py +80 -0
- media_engine/batch/__init__.py +56 -0
- media_engine/batch/models.py +99 -0
- media_engine/batch/processor.py +1131 -0
- media_engine/batch/queue.py +232 -0
- media_engine/batch/state.py +30 -0
- media_engine/batch/timing.py +321 -0
- media_engine/cli.py +17 -0
- media_engine/config.py +674 -0
- media_engine/extractors/__init__.py +75 -0
- media_engine/extractors/clip.py +401 -0
- media_engine/extractors/faces.py +459 -0
- media_engine/extractors/frame_buffer.py +351 -0
- media_engine/extractors/frames.py +402 -0
- media_engine/extractors/metadata/__init__.py +127 -0
- media_engine/extractors/metadata/apple.py +169 -0
- media_engine/extractors/metadata/arri.py +118 -0
- media_engine/extractors/metadata/avchd.py +208 -0
- media_engine/extractors/metadata/avchd_gps.py +270 -0
- media_engine/extractors/metadata/base.py +688 -0
- media_engine/extractors/metadata/blackmagic.py +139 -0
- media_engine/extractors/metadata/camera_360.py +276 -0
- media_engine/extractors/metadata/canon.py +290 -0
- media_engine/extractors/metadata/dji.py +371 -0
- media_engine/extractors/metadata/dv.py +121 -0
- media_engine/extractors/metadata/ffmpeg.py +76 -0
- media_engine/extractors/metadata/generic.py +119 -0
- media_engine/extractors/metadata/gopro.py +256 -0
- media_engine/extractors/metadata/red.py +305 -0
- media_engine/extractors/metadata/registry.py +114 -0
- media_engine/extractors/metadata/sony.py +442 -0
- media_engine/extractors/metadata/tesla.py +157 -0
- media_engine/extractors/motion.py +765 -0
- media_engine/extractors/objects.py +245 -0
- media_engine/extractors/objects_qwen.py +754 -0
- media_engine/extractors/ocr.py +268 -0
- media_engine/extractors/scenes.py +82 -0
- media_engine/extractors/shot_type.py +217 -0
- media_engine/extractors/telemetry.py +262 -0
- media_engine/extractors/transcribe.py +579 -0
- media_engine/extractors/translate.py +121 -0
- media_engine/extractors/vad.py +263 -0
- media_engine/main.py +68 -0
- media_engine/py.typed +0 -0
- media_engine/routers/__init__.py +15 -0
- media_engine/routers/batch.py +78 -0
- media_engine/routers/health.py +93 -0
- media_engine/routers/models.py +211 -0
- media_engine/routers/settings.py +87 -0
- media_engine/routers/utils.py +135 -0
- media_engine/schemas.py +581 -0
- media_engine/utils/__init__.py +5 -0
- media_engine/utils/logging.py +54 -0
- media_engine/utils/memory.py +49 -0
- media_engine-0.1.0.dist-info/METADATA +276 -0
- media_engine-0.1.0.dist-info/RECORD +70 -0
- media_engine-0.1.0.dist-info/WHEEL +4 -0
- media_engine-0.1.0.dist-info/entry_points.txt +11 -0
- media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
"""Fast frame extraction using OpenCV or FFmpeg.
|
|
2
|
+
|
|
3
|
+
OpenCV's VideoCapture is fast for normal videos but decodes at full resolution.
|
|
4
|
+
For high-resolution videos (4K+), FFmpeg decoding at target resolution is faster.
|
|
5
|
+
|
|
6
|
+
Also supports direct image loading - when given an image file, it loads it
|
|
7
|
+
directly instead of trying to use VideoCapture.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import os
|
|
12
|
+
import subprocess
|
|
13
|
+
import tempfile
|
|
14
|
+
|
|
15
|
+
import cv2
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
from media_engine.schemas import MediaType, get_media_type
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# Resolution threshold for using FFmpeg decode (4K+)
|
|
23
|
+
HIGH_RES_THRESHOLD = 3840 * 2160 # ~8.3M pixels
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class FrameExtractor:
|
|
27
|
+
"""Extract frames from video or image using OpenCV.
|
|
28
|
+
|
|
29
|
+
Uses cv2.VideoCapture for fast seeking and frame extraction from videos.
|
|
30
|
+
Falls back to ffmpeg for exotic codecs that OpenCV can't handle.
|
|
31
|
+
For images, loads directly with cv2.imread (no frame extraction needed).
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
# Default max dimension - scale down 4K to ~HD for faster processing
|
|
35
|
+
DEFAULT_MAX_DIMENSION = 1920
|
|
36
|
+
|
|
37
|
+
def __init__(self, file_path: str, max_dimension: int | None = DEFAULT_MAX_DIMENSION):
|
|
38
|
+
"""Initialize frame extractor.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
file_path: Path to video or image file
|
|
42
|
+
max_dimension: Maximum width/height. Frames larger than this are scaled down.
|
|
43
|
+
Set to None to disable scaling. Default: 1920 (HD)
|
|
44
|
+
"""
|
|
45
|
+
self.video_path = file_path # Keep name for compatibility
|
|
46
|
+
self.max_dimension = max_dimension
|
|
47
|
+
self.cap: cv2.VideoCapture | None = None
|
|
48
|
+
self._duration: float | None = None
|
|
49
|
+
self._fps: float | None = None
|
|
50
|
+
self._frame_count: int | None = None
|
|
51
|
+
self._width: int | None = None
|
|
52
|
+
self._height: int | None = None
|
|
53
|
+
self._use_ffmpeg_fallback = False
|
|
54
|
+
self._use_ffmpeg_decode = False # For high-res, decode at lower res with FFmpeg
|
|
55
|
+
# Image handling
|
|
56
|
+
self._is_image = False
|
|
57
|
+
self._image_frame: np.ndarray | None = None
|
|
58
|
+
|
|
59
|
+
def __enter__(self) -> "FrameExtractor":
|
|
60
|
+
"""Open video or image file."""
|
|
61
|
+
# Check if this is an image file
|
|
62
|
+
media_type = get_media_type(self.video_path)
|
|
63
|
+
if media_type == MediaType.IMAGE:
|
|
64
|
+
self._is_image = True
|
|
65
|
+
self._duration = 0.0
|
|
66
|
+
self._fps = 1.0
|
|
67
|
+
self._frame_count = 1
|
|
68
|
+
# Load the image directly
|
|
69
|
+
self._image_frame = cv2.imread(self.video_path)
|
|
70
|
+
if self._image_frame is None:
|
|
71
|
+
logger.warning(f"Failed to load image: {self.video_path}")
|
|
72
|
+
else:
|
|
73
|
+
# Apply scaling
|
|
74
|
+
self._image_frame = self._scale_frame(self._image_frame)
|
|
75
|
+
logger.debug(f"Loaded image directly: {self.video_path}")
|
|
76
|
+
return self
|
|
77
|
+
|
|
78
|
+
# Video file - use VideoCapture
|
|
79
|
+
self.cap = cv2.VideoCapture(self.video_path)
|
|
80
|
+
|
|
81
|
+
if not self.cap.isOpened():
|
|
82
|
+
logger.warning(f"OpenCV failed to open {self.video_path}, using ffmpeg fallback")
|
|
83
|
+
self._use_ffmpeg_fallback = True
|
|
84
|
+
self.cap = None
|
|
85
|
+
else:
|
|
86
|
+
self._fps = self.cap.get(cv2.CAP_PROP_FPS)
|
|
87
|
+
self._frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
|
88
|
+
self._width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
|
89
|
+
self._height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
|
90
|
+
|
|
91
|
+
if self._fps > 0 and self._frame_count > 0:
|
|
92
|
+
self._duration = self._frame_count / self._fps
|
|
93
|
+
else:
|
|
94
|
+
# Seek to estimate duration
|
|
95
|
+
self._duration = self._get_duration_ffprobe()
|
|
96
|
+
|
|
97
|
+
# Check if this is high-res video that needs FFmpeg decode
|
|
98
|
+
if self._width and self._height and self.max_dimension:
|
|
99
|
+
pixels = self._width * self._height
|
|
100
|
+
max_dim = max(self._width, self._height)
|
|
101
|
+
if pixels > HIGH_RES_THRESHOLD and max_dim > self.max_dimension:
|
|
102
|
+
logger.info(f"High-res video ({self._width}x{self._height}), " f"using FFmpeg decode at {self.max_dimension}px")
|
|
103
|
+
self._use_ffmpeg_decode = True
|
|
104
|
+
# Release opencv capture - we'll use FFmpeg instead
|
|
105
|
+
self.cap.release()
|
|
106
|
+
self.cap = None
|
|
107
|
+
|
|
108
|
+
return self
|
|
109
|
+
|
|
110
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None: # noqa: ANN001
|
|
111
|
+
"""Release video file."""
|
|
112
|
+
if self.cap is not None:
|
|
113
|
+
self.cap.release()
|
|
114
|
+
self.cap = None
|
|
115
|
+
# Clear image reference
|
|
116
|
+
self._image_frame = None
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def is_image(self) -> bool:
|
|
120
|
+
"""Check if this extractor is handling an image (not a video)."""
|
|
121
|
+
return self._is_image
|
|
122
|
+
|
|
123
|
+
@property
|
|
124
|
+
def duration(self) -> float:
|
|
125
|
+
"""Get video duration in seconds (0 for images)."""
|
|
126
|
+
if self._duration is None:
|
|
127
|
+
self._duration = self._get_duration_ffprobe()
|
|
128
|
+
return self._duration
|
|
129
|
+
|
|
130
|
+
@property
|
|
131
|
+
def fps(self) -> float:
|
|
132
|
+
"""Get video frame rate (1 for images)."""
|
|
133
|
+
if self._fps is None:
|
|
134
|
+
self._fps = 30.0 # Default fallback
|
|
135
|
+
return self._fps
|
|
136
|
+
|
|
137
|
+
def _get_duration_ffprobe(self) -> float:
|
|
138
|
+
"""Get duration using ffprobe."""
|
|
139
|
+
try:
|
|
140
|
+
cmd = [
|
|
141
|
+
"ffprobe",
|
|
142
|
+
"-v",
|
|
143
|
+
"quiet",
|
|
144
|
+
"-show_entries",
|
|
145
|
+
"format=duration",
|
|
146
|
+
"-of",
|
|
147
|
+
"default=noprint_wrappers=1:nokey=1",
|
|
148
|
+
self.video_path,
|
|
149
|
+
]
|
|
150
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
151
|
+
return float(result.stdout.strip())
|
|
152
|
+
except (subprocess.CalledProcessError, ValueError):
|
|
153
|
+
return 0.0
|
|
154
|
+
|
|
155
|
+
def _scale_frame(self, frame: np.ndarray) -> np.ndarray:
|
|
156
|
+
"""Scale down frame if larger than max_dimension.
|
|
157
|
+
|
|
158
|
+
Maintains aspect ratio. Only scales down, never up.
|
|
159
|
+
"""
|
|
160
|
+
if self.max_dimension is None:
|
|
161
|
+
return frame
|
|
162
|
+
|
|
163
|
+
h, w = frame.shape[:2]
|
|
164
|
+
max_dim = max(h, w)
|
|
165
|
+
|
|
166
|
+
if max_dim <= self.max_dimension:
|
|
167
|
+
return frame
|
|
168
|
+
|
|
169
|
+
# Calculate scale factor
|
|
170
|
+
scale = self.max_dimension / max_dim
|
|
171
|
+
new_w = int(w * scale)
|
|
172
|
+
new_h = int(h * scale)
|
|
173
|
+
|
|
174
|
+
# Use INTER_AREA for downscaling (best quality)
|
|
175
|
+
return cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA)
|
|
176
|
+
|
|
177
|
+
def get_frame_at(self, timestamp: float) -> np.ndarray | None:
|
|
178
|
+
"""Extract a single frame at the given timestamp.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
timestamp: Time in seconds (ignored for images)
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Frame as BGR numpy array (scaled to max_dimension), or None if extraction failed
|
|
185
|
+
"""
|
|
186
|
+
# For images, always return the loaded image (timestamp is ignored)
|
|
187
|
+
if self._is_image:
|
|
188
|
+
return self._image_frame
|
|
189
|
+
|
|
190
|
+
# High-res video: use FFmpeg with scale filter (decodes at target res)
|
|
191
|
+
if self._use_ffmpeg_decode:
|
|
192
|
+
return self._get_frame_ffmpeg_scaled(timestamp)
|
|
193
|
+
|
|
194
|
+
if self._use_ffmpeg_fallback:
|
|
195
|
+
frame = self._get_frame_ffmpeg(timestamp)
|
|
196
|
+
return self._scale_frame(frame) if frame is not None else None
|
|
197
|
+
|
|
198
|
+
if self.cap is None:
|
|
199
|
+
return None
|
|
200
|
+
|
|
201
|
+
# Seek to timestamp
|
|
202
|
+
self.cap.set(cv2.CAP_PROP_POS_MSEC, timestamp * 1000)
|
|
203
|
+
|
|
204
|
+
ret, frame = self.cap.read()
|
|
205
|
+
if not ret:
|
|
206
|
+
# Try ffmpeg fallback for this frame
|
|
207
|
+
frame = self._get_frame_ffmpeg(timestamp)
|
|
208
|
+
return self._scale_frame(frame) if frame is not None else None
|
|
209
|
+
|
|
210
|
+
return self._scale_frame(frame)
|
|
211
|
+
|
|
212
|
+
def get_frames_at(self, timestamps: list[float]) -> list[tuple[float, np.ndarray | None]]:
|
|
213
|
+
"""Extract multiple frames at given timestamps.
|
|
214
|
+
|
|
215
|
+
More efficient than calling get_frame_at repeatedly as it
|
|
216
|
+
processes timestamps in order to minimize seeking.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
timestamps: List of times in seconds
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
List of (timestamp, frame) tuples
|
|
223
|
+
"""
|
|
224
|
+
# Sort timestamps for efficient sequential access
|
|
225
|
+
sorted_ts = sorted(set(timestamps))
|
|
226
|
+
results: dict[float, np.ndarray | None] = {}
|
|
227
|
+
|
|
228
|
+
for ts in sorted_ts:
|
|
229
|
+
results[ts] = self.get_frame_at(ts)
|
|
230
|
+
|
|
231
|
+
# Return in original order
|
|
232
|
+
return [(ts, results.get(ts)) for ts in timestamps]
|
|
233
|
+
|
|
234
|
+
def _get_frame_ffmpeg(self, timestamp: float) -> np.ndarray | None:
|
|
235
|
+
"""Extract frame using ffmpeg (fallback, no scaling)."""
|
|
236
|
+
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
|
|
237
|
+
tmp_path = tmp.name
|
|
238
|
+
|
|
239
|
+
try:
|
|
240
|
+
cmd = [
|
|
241
|
+
"ffmpeg",
|
|
242
|
+
"-y",
|
|
243
|
+
"-ss",
|
|
244
|
+
str(timestamp),
|
|
245
|
+
"-i",
|
|
246
|
+
self.video_path,
|
|
247
|
+
"-frames:v",
|
|
248
|
+
"1",
|
|
249
|
+
"-update",
|
|
250
|
+
"1", # Required for ffmpeg 8.x single-image output
|
|
251
|
+
"-q:v",
|
|
252
|
+
"2",
|
|
253
|
+
tmp_path,
|
|
254
|
+
]
|
|
255
|
+
subprocess.run(cmd, capture_output=True, check=True)
|
|
256
|
+
|
|
257
|
+
if os.path.exists(tmp_path):
|
|
258
|
+
frame = cv2.imread(tmp_path)
|
|
259
|
+
return frame
|
|
260
|
+
except subprocess.CalledProcessError:
|
|
261
|
+
pass
|
|
262
|
+
finally:
|
|
263
|
+
if os.path.exists(tmp_path):
|
|
264
|
+
os.unlink(tmp_path)
|
|
265
|
+
|
|
266
|
+
return None
|
|
267
|
+
|
|
268
|
+
def _get_frame_ffmpeg_scaled(self, timestamp: float) -> np.ndarray | None:
|
|
269
|
+
"""Extract frame using FFmpeg with scale filter (for high-res videos).
|
|
270
|
+
|
|
271
|
+
This is faster than decoding at full resolution and then scaling with cv2.
|
|
272
|
+
"""
|
|
273
|
+
if self.max_dimension is None:
|
|
274
|
+
return self._get_frame_ffmpeg(timestamp)
|
|
275
|
+
|
|
276
|
+
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp:
|
|
277
|
+
tmp_path = tmp.name
|
|
278
|
+
|
|
279
|
+
try:
|
|
280
|
+
# Scale filter that maintains aspect ratio
|
|
281
|
+
# scale=W:H:force_original_aspect_ratio=decrease
|
|
282
|
+
scale_filter = f"scale={self.max_dimension}:{self.max_dimension}" f":force_original_aspect_ratio=decrease"
|
|
283
|
+
|
|
284
|
+
cmd = [
|
|
285
|
+
"ffmpeg",
|
|
286
|
+
"-y",
|
|
287
|
+
"-ss",
|
|
288
|
+
str(timestamp),
|
|
289
|
+
"-i",
|
|
290
|
+
self.video_path,
|
|
291
|
+
"-vf",
|
|
292
|
+
scale_filter,
|
|
293
|
+
"-frames:v",
|
|
294
|
+
"1",
|
|
295
|
+
"-update",
|
|
296
|
+
"1",
|
|
297
|
+
"-q:v",
|
|
298
|
+
"2",
|
|
299
|
+
tmp_path,
|
|
300
|
+
]
|
|
301
|
+
subprocess.run(cmd, capture_output=True, check=True)
|
|
302
|
+
|
|
303
|
+
if os.path.exists(tmp_path):
|
|
304
|
+
frame = cv2.imread(tmp_path)
|
|
305
|
+
return frame
|
|
306
|
+
except subprocess.CalledProcessError:
|
|
307
|
+
pass
|
|
308
|
+
finally:
|
|
309
|
+
if os.path.exists(tmp_path):
|
|
310
|
+
os.unlink(tmp_path)
|
|
311
|
+
|
|
312
|
+
return None
|
|
313
|
+
|
|
314
|
+
def save_frame(self, frame: np.ndarray, output_path: str, quality: int = 95) -> bool:
|
|
315
|
+
"""Save frame to file.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
frame: BGR numpy array
|
|
319
|
+
output_path: Output file path
|
|
320
|
+
quality: JPEG quality (0-100)
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
True if saved successfully
|
|
324
|
+
"""
|
|
325
|
+
try:
|
|
326
|
+
cv2.imwrite(output_path, frame, [cv2.IMWRITE_JPEG_QUALITY, quality])
|
|
327
|
+
return os.path.exists(output_path)
|
|
328
|
+
except Exception as e:
|
|
329
|
+
logger.warning(f"Failed to save frame to {output_path}: {e}")
|
|
330
|
+
return False
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def extract_frames_batch(
|
|
334
|
+
video_path: str,
|
|
335
|
+
timestamps: list[float],
|
|
336
|
+
output_dir: str | None = None,
|
|
337
|
+
) -> list[tuple[float, np.ndarray | None]]:
|
|
338
|
+
"""Extract multiple frames from a video.
|
|
339
|
+
|
|
340
|
+
Convenience function that handles the context manager.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
video_path: Path to video file
|
|
344
|
+
timestamps: List of timestamps in seconds
|
|
345
|
+
output_dir: Optional directory to save frames as JPEG files
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
List of (timestamp, frame) tuples
|
|
349
|
+
"""
|
|
350
|
+
with FrameExtractor(video_path) as extractor:
|
|
351
|
+
results = extractor.get_frames_at(timestamps)
|
|
352
|
+
|
|
353
|
+
if output_dir:
|
|
354
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
355
|
+
for ts, frame in results:
|
|
356
|
+
if frame is not None:
|
|
357
|
+
output_path = os.path.join(output_dir, f"frame_{ts:.3f}.jpg")
|
|
358
|
+
extractor.save_frame(frame, output_path)
|
|
359
|
+
|
|
360
|
+
return results
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def get_video_duration(file_path: str) -> float:
|
|
364
|
+
"""Get video duration in seconds.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
file_path: Path to video or image file
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
Duration in seconds, or 0 for images/unknown files
|
|
371
|
+
"""
|
|
372
|
+
# Check if this is an image - images have 0 duration
|
|
373
|
+
media_type = get_media_type(file_path)
|
|
374
|
+
if media_type == MediaType.IMAGE:
|
|
375
|
+
return 0.0
|
|
376
|
+
|
|
377
|
+
# Try OpenCV first (faster)
|
|
378
|
+
cap = cv2.VideoCapture(file_path)
|
|
379
|
+
if cap.isOpened():
|
|
380
|
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
|
381
|
+
frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
|
|
382
|
+
cap.release()
|
|
383
|
+
|
|
384
|
+
if fps > 0 and frame_count > 0:
|
|
385
|
+
return frame_count / fps
|
|
386
|
+
|
|
387
|
+
# Fallback to ffprobe
|
|
388
|
+
try:
|
|
389
|
+
cmd = [
|
|
390
|
+
"ffprobe",
|
|
391
|
+
"-v",
|
|
392
|
+
"quiet",
|
|
393
|
+
"-show_entries",
|
|
394
|
+
"format=duration",
|
|
395
|
+
"-of",
|
|
396
|
+
"default=noprint_wrappers=1:nokey=1",
|
|
397
|
+
file_path,
|
|
398
|
+
]
|
|
399
|
+
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
|
400
|
+
return float(result.stdout.strip())
|
|
401
|
+
except (subprocess.CalledProcessError, ValueError):
|
|
402
|
+
return 0.0
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""Modular metadata extraction.
|
|
2
|
+
|
|
3
|
+
This package provides manufacturer-specific metadata extractors.
|
|
4
|
+
Each manufacturer module registers itself on import.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from media_engine.extractors.metadata import extract_metadata
|
|
8
|
+
|
|
9
|
+
metadata = extract_metadata("/path/to/video.mp4")
|
|
10
|
+
|
|
11
|
+
To add a new manufacturer:
|
|
12
|
+
1. Create a new module (e.g., panasonic.py)
|
|
13
|
+
2. Implement a class with detect() and extract() methods
|
|
14
|
+
3. Register it using: register_extractor("panasonic", PanasonicExtractor())
|
|
15
|
+
4. Import the module below to trigger registration
|
|
16
|
+
|
|
17
|
+
The order of imports determines detection priority.
|
|
18
|
+
Specific manufacturers should be imported before generic.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import logging
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
from media_engine.schemas import Metadata
|
|
25
|
+
|
|
26
|
+
# Import manufacturer modules to trigger registration
|
|
27
|
+
# Order matters: more specific extractors first
|
|
28
|
+
from . import (
|
|
29
|
+
apple, # noqa: F401
|
|
30
|
+
arri, # noqa: F401
|
|
31
|
+
blackmagic, # noqa: F401
|
|
32
|
+
camera_360, # noqa: F401 - Insta360, QooCam, GoPro MAX, etc.
|
|
33
|
+
canon, # noqa: F401
|
|
34
|
+
dji, # noqa: F401
|
|
35
|
+
dv, # noqa: F401 - DV/HDV tape formats
|
|
36
|
+
ffmpeg, # noqa: F401
|
|
37
|
+
gopro, # noqa: F401
|
|
38
|
+
red, # noqa: F401
|
|
39
|
+
sony, # noqa: F401
|
|
40
|
+
tesla, # noqa: F401
|
|
41
|
+
)
|
|
42
|
+
from .base import (
|
|
43
|
+
FFPROBE_WORKERS,
|
|
44
|
+
build_base_metadata,
|
|
45
|
+
extract_keyframes,
|
|
46
|
+
get_duration_fast,
|
|
47
|
+
run_ffprobe,
|
|
48
|
+
run_ffprobe_batch,
|
|
49
|
+
shutdown_ffprobe_pool,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Import and register generic fallback LAST
|
|
53
|
+
from .generic import GenericExtractor
|
|
54
|
+
from .registry import get_extractor, list_extractors, register_extractor
|
|
55
|
+
|
|
56
|
+
register_extractor("generic", GenericExtractor())
|
|
57
|
+
|
|
58
|
+
logger = logging.getLogger(__name__)
|
|
59
|
+
|
|
60
|
+
__all__ = [
|
|
61
|
+
"extract_metadata",
|
|
62
|
+
"get_duration_fast",
|
|
63
|
+
"run_ffprobe_batch",
|
|
64
|
+
"list_extractors",
|
|
65
|
+
"FFPROBE_WORKERS",
|
|
66
|
+
"shutdown_ffprobe_pool",
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def extract_metadata(file_path: str, probe_data: dict | None = None) -> Metadata:
|
|
71
|
+
"""Extract metadata from video file.
|
|
72
|
+
|
|
73
|
+
This function:
|
|
74
|
+
1. Runs ffprobe to get basic metadata (or uses provided probe_data)
|
|
75
|
+
2. Detects the manufacturer/device
|
|
76
|
+
3. Calls the appropriate extractor for enhanced metadata
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
file_path: Path to video file
|
|
80
|
+
probe_data: Optional pre-fetched ffprobe data (for batch processing)
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Metadata object with video information
|
|
84
|
+
"""
|
|
85
|
+
path = Path(file_path)
|
|
86
|
+
if not path.exists():
|
|
87
|
+
raise FileNotFoundError(f"Video file not found: {file_path}")
|
|
88
|
+
|
|
89
|
+
# Handle files that ffprobe cannot read (e.g., RED R3D)
|
|
90
|
+
# These formats require direct header parsing
|
|
91
|
+
ffprobe_unsupported = path.suffix.upper() in (".R3D",)
|
|
92
|
+
|
|
93
|
+
# Run ffprobe if not provided (and file format is supported)
|
|
94
|
+
if probe_data is None:
|
|
95
|
+
if ffprobe_unsupported:
|
|
96
|
+
# Create minimal probe_data for formats ffprobe can't read
|
|
97
|
+
probe_data = {"streams": [], "format": {"filename": file_path}}
|
|
98
|
+
logger.info(f"Skipping ffprobe for unsupported format: {path.suffix}")
|
|
99
|
+
else:
|
|
100
|
+
probe_data = run_ffprobe(file_path)
|
|
101
|
+
|
|
102
|
+
# Build base metadata (device-agnostic)
|
|
103
|
+
base_metadata = build_base_metadata(probe_data, file_path)
|
|
104
|
+
|
|
105
|
+
# Find and run the appropriate extractor
|
|
106
|
+
match = get_extractor(probe_data, file_path)
|
|
107
|
+
|
|
108
|
+
if match:
|
|
109
|
+
name, extractor = match
|
|
110
|
+
logger.info(f"Using {name} extractor for {path.name}")
|
|
111
|
+
try:
|
|
112
|
+
result = extractor.extract(probe_data, file_path, base_metadata)
|
|
113
|
+
except Exception as e:
|
|
114
|
+
logger.warning(f"Extractor {name} failed: {e}, using base metadata")
|
|
115
|
+
result = base_metadata
|
|
116
|
+
else:
|
|
117
|
+
# This shouldn't happen since generic always matches
|
|
118
|
+
logger.warning(f"No extractor matched for {path.name}")
|
|
119
|
+
result = base_metadata
|
|
120
|
+
|
|
121
|
+
# Extract keyframes (separate ffprobe call, fast with -skip_frame nokey)
|
|
122
|
+
# Done after extractor so it's not lost when extractor returns new Metadata
|
|
123
|
+
keyframes = extract_keyframes(file_path)
|
|
124
|
+
if keyframes:
|
|
125
|
+
result.keyframes = keyframes
|
|
126
|
+
|
|
127
|
+
return result
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""Apple metadata extraction.
|
|
2
|
+
|
|
3
|
+
Handles Apple devices:
|
|
4
|
+
- iPhone (all models)
|
|
5
|
+
- iPad (all models)
|
|
6
|
+
- Mac (FaceTime camera, etc.)
|
|
7
|
+
|
|
8
|
+
Detection methods:
|
|
9
|
+
- make tag: "Apple"
|
|
10
|
+
- com.apple.quicktime.make tag
|
|
11
|
+
- Model contains "iPhone" or "iPad"
|
|
12
|
+
|
|
13
|
+
Apple QuickTime metadata tags:
|
|
14
|
+
- com.apple.quicktime.make
|
|
15
|
+
- com.apple.quicktime.model
|
|
16
|
+
- com.apple.quicktime.software
|
|
17
|
+
- com.apple.quicktime.creationdate
|
|
18
|
+
- com.apple.quicktime.location.iso6709
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
import logging
|
|
22
|
+
import re
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from media_engine.schemas import (
|
|
26
|
+
GPS,
|
|
27
|
+
DetectionMethod,
|
|
28
|
+
DeviceInfo,
|
|
29
|
+
MediaDeviceType,
|
|
30
|
+
Metadata,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
from .registry import get_tags_lower, register_extractor
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _parse_apple_location(location: str) -> GPS | None:
|
|
39
|
+
"""Parse Apple's ISO 6709 location string.
|
|
40
|
+
|
|
41
|
+
Format: +59.7441+010.2045+125.0/
|
|
42
|
+
"""
|
|
43
|
+
pattern = r"([+-]\d+\.?\d*)"
|
|
44
|
+
matches = re.findall(pattern, location)
|
|
45
|
+
|
|
46
|
+
if len(matches) >= 2:
|
|
47
|
+
try:
|
|
48
|
+
return GPS(
|
|
49
|
+
latitude=float(matches[0]),
|
|
50
|
+
longitude=float(matches[1]),
|
|
51
|
+
altitude=float(matches[2]) if len(matches) >= 3 else None,
|
|
52
|
+
)
|
|
53
|
+
except ValueError:
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _determine_device_type(model: str | None) -> MediaDeviceType:
|
|
60
|
+
"""Determine Apple device type from model string."""
|
|
61
|
+
if not model:
|
|
62
|
+
return MediaDeviceType.PHONE
|
|
63
|
+
|
|
64
|
+
model_upper = model.upper()
|
|
65
|
+
|
|
66
|
+
if "IPHONE" in model_upper:
|
|
67
|
+
return MediaDeviceType.PHONE
|
|
68
|
+
elif "IPAD" in model_upper:
|
|
69
|
+
return MediaDeviceType.PHONE # Tablets are close to phones
|
|
70
|
+
elif "MAC" in model_upper or "IMAC" in model_upper or "MACBOOK" in model_upper:
|
|
71
|
+
return MediaDeviceType.CAMERA # Mac webcams are cameras
|
|
72
|
+
else:
|
|
73
|
+
return MediaDeviceType.PHONE
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _clean_model_name(model: str | None) -> str | None:
|
|
77
|
+
"""Clean up Apple model name for display.
|
|
78
|
+
|
|
79
|
+
Examples:
|
|
80
|
+
"iPhone 15 Pro Max" -> "iPhone 15 Pro Max"
|
|
81
|
+
"iPhone15,3" -> "iPhone 15 Pro Max" (if we had a lookup table)
|
|
82
|
+
"""
|
|
83
|
+
if not model:
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
# Apple sometimes uses internal model identifiers like "iPhone15,3"
|
|
87
|
+
# For now, just return as-is. A future enhancement could add a lookup table.
|
|
88
|
+
return model
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class AppleExtractor:
|
|
92
|
+
"""Metadata extractor for Apple devices."""
|
|
93
|
+
|
|
94
|
+
def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
|
|
95
|
+
"""Detect if file is from an Apple device."""
|
|
96
|
+
tags = get_tags_lower(probe_data)
|
|
97
|
+
|
|
98
|
+
# Check make tag
|
|
99
|
+
make = tags.get("make") or tags.get("com.apple.quicktime.make")
|
|
100
|
+
if make and "APPLE" in make.upper():
|
|
101
|
+
return True
|
|
102
|
+
|
|
103
|
+
# Check model for iPhone/iPad
|
|
104
|
+
model = tags.get("model") or tags.get("com.apple.quicktime.model")
|
|
105
|
+
if model:
|
|
106
|
+
model_upper = model.upper()
|
|
107
|
+
if "IPHONE" in model_upper or "IPAD" in model_upper:
|
|
108
|
+
return True
|
|
109
|
+
|
|
110
|
+
# Check for Apple QuickTime-specific tags
|
|
111
|
+
if tags.get("com.apple.quicktime.creationdate"):
|
|
112
|
+
# This is a strong indicator of Apple origin
|
|
113
|
+
if tags.get("com.apple.quicktime.make") or tags.get("com.apple.quicktime.model"):
|
|
114
|
+
return True
|
|
115
|
+
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
def extract(self, probe_data: dict[str, Any], file_path: str, base_metadata: Metadata) -> Metadata:
|
|
119
|
+
"""Extract Apple-specific metadata."""
|
|
120
|
+
tags = get_tags_lower(probe_data)
|
|
121
|
+
|
|
122
|
+
# Get device info from QuickTime tags (preferred) or standard tags
|
|
123
|
+
make = tags.get("com.apple.quicktime.make") or tags.get("make") or "Apple"
|
|
124
|
+
model = tags.get("com.apple.quicktime.model") or tags.get("model")
|
|
125
|
+
software = tags.get("com.apple.quicktime.software") or tags.get("software")
|
|
126
|
+
|
|
127
|
+
# Clean up model name
|
|
128
|
+
model = _clean_model_name(model)
|
|
129
|
+
|
|
130
|
+
# Determine device type
|
|
131
|
+
device_type = _determine_device_type(model)
|
|
132
|
+
|
|
133
|
+
device = DeviceInfo(
|
|
134
|
+
make=make,
|
|
135
|
+
model=model,
|
|
136
|
+
software=software,
|
|
137
|
+
type=device_type,
|
|
138
|
+
detection_method=DetectionMethod.METADATA,
|
|
139
|
+
confidence=1.0,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Extract GPS from Apple-specific location tag
|
|
143
|
+
gps = base_metadata.gps
|
|
144
|
+
apple_location = tags.get("com.apple.quicktime.location.iso6709")
|
|
145
|
+
if apple_location:
|
|
146
|
+
parsed_gps = _parse_apple_location(apple_location)
|
|
147
|
+
if parsed_gps:
|
|
148
|
+
gps = parsed_gps
|
|
149
|
+
|
|
150
|
+
return Metadata(
|
|
151
|
+
duration=base_metadata.duration,
|
|
152
|
+
resolution=base_metadata.resolution,
|
|
153
|
+
codec=base_metadata.codec,
|
|
154
|
+
video_codec=base_metadata.video_codec,
|
|
155
|
+
audio=base_metadata.audio,
|
|
156
|
+
fps=base_metadata.fps,
|
|
157
|
+
bitrate=base_metadata.bitrate,
|
|
158
|
+
file_size=base_metadata.file_size,
|
|
159
|
+
timecode=base_metadata.timecode,
|
|
160
|
+
created_at=base_metadata.created_at,
|
|
161
|
+
device=device,
|
|
162
|
+
gps=gps,
|
|
163
|
+
color_space=base_metadata.color_space,
|
|
164
|
+
lens=base_metadata.lens,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
# Register this extractor
|
|
169
|
+
register_extractor("apple", AppleExtractor())
|