media-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/clip.py +79 -0
- cli/faces.py +91 -0
- cli/metadata.py +68 -0
- cli/motion.py +77 -0
- cli/objects.py +94 -0
- cli/ocr.py +93 -0
- cli/scenes.py +57 -0
- cli/telemetry.py +65 -0
- cli/transcript.py +76 -0
- media_engine/__init__.py +7 -0
- media_engine/_version.py +34 -0
- media_engine/app.py +80 -0
- media_engine/batch/__init__.py +56 -0
- media_engine/batch/models.py +99 -0
- media_engine/batch/processor.py +1131 -0
- media_engine/batch/queue.py +232 -0
- media_engine/batch/state.py +30 -0
- media_engine/batch/timing.py +321 -0
- media_engine/cli.py +17 -0
- media_engine/config.py +674 -0
- media_engine/extractors/__init__.py +75 -0
- media_engine/extractors/clip.py +401 -0
- media_engine/extractors/faces.py +459 -0
- media_engine/extractors/frame_buffer.py +351 -0
- media_engine/extractors/frames.py +402 -0
- media_engine/extractors/metadata/__init__.py +127 -0
- media_engine/extractors/metadata/apple.py +169 -0
- media_engine/extractors/metadata/arri.py +118 -0
- media_engine/extractors/metadata/avchd.py +208 -0
- media_engine/extractors/metadata/avchd_gps.py +270 -0
- media_engine/extractors/metadata/base.py +688 -0
- media_engine/extractors/metadata/blackmagic.py +139 -0
- media_engine/extractors/metadata/camera_360.py +276 -0
- media_engine/extractors/metadata/canon.py +290 -0
- media_engine/extractors/metadata/dji.py +371 -0
- media_engine/extractors/metadata/dv.py +121 -0
- media_engine/extractors/metadata/ffmpeg.py +76 -0
- media_engine/extractors/metadata/generic.py +119 -0
- media_engine/extractors/metadata/gopro.py +256 -0
- media_engine/extractors/metadata/red.py +305 -0
- media_engine/extractors/metadata/registry.py +114 -0
- media_engine/extractors/metadata/sony.py +442 -0
- media_engine/extractors/metadata/tesla.py +157 -0
- media_engine/extractors/motion.py +765 -0
- media_engine/extractors/objects.py +245 -0
- media_engine/extractors/objects_qwen.py +754 -0
- media_engine/extractors/ocr.py +268 -0
- media_engine/extractors/scenes.py +82 -0
- media_engine/extractors/shot_type.py +217 -0
- media_engine/extractors/telemetry.py +262 -0
- media_engine/extractors/transcribe.py +579 -0
- media_engine/extractors/translate.py +121 -0
- media_engine/extractors/vad.py +263 -0
- media_engine/main.py +68 -0
- media_engine/py.typed +0 -0
- media_engine/routers/__init__.py +15 -0
- media_engine/routers/batch.py +78 -0
- media_engine/routers/health.py +93 -0
- media_engine/routers/models.py +211 -0
- media_engine/routers/settings.py +87 -0
- media_engine/routers/utils.py +135 -0
- media_engine/schemas.py +581 -0
- media_engine/utils/__init__.py +5 -0
- media_engine/utils/logging.py +54 -0
- media_engine/utils/memory.py +49 -0
- media_engine-0.1.0.dist-info/METADATA +276 -0
- media_engine-0.1.0.dist-info/RECORD +70 -0
- media_engine-0.1.0.dist-info/WHEEL +4 -0
- media_engine-0.1.0.dist-info/entry_points.txt +11 -0
- media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
"""DJI metadata extraction.
|
|
2
|
+
|
|
3
|
+
Handles DJI drones and cameras:
|
|
4
|
+
- Mavic series (Air, Pro, Mini, etc.)
|
|
5
|
+
- Phantom series
|
|
6
|
+
- Inspire series
|
|
7
|
+
- Osmo/Pocket series (Pocket, Pocket 2, Action, etc.)
|
|
8
|
+
- FPV drones
|
|
9
|
+
|
|
10
|
+
Detection methods:
|
|
11
|
+
- encoder tag: "DJIMavic3", "DJI Pocket2", etc.
|
|
12
|
+
- make tag: "DJI"
|
|
13
|
+
- filename prefix: "DJI_"
|
|
14
|
+
- SRT sidecar file presence
|
|
15
|
+
|
|
16
|
+
SRT files contain per-frame telemetry:
|
|
17
|
+
[iso: 400] [shutter: 1/100.0] [fnum: 2.8] [latitude: 61.05121] ...
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import logging
|
|
21
|
+
import re
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Any
|
|
24
|
+
|
|
25
|
+
from media_engine.schemas import (
|
|
26
|
+
GPS,
|
|
27
|
+
ColorSpace,
|
|
28
|
+
DetectionMethod,
|
|
29
|
+
DeviceInfo,
|
|
30
|
+
GPSTrack,
|
|
31
|
+
GPSTrackPoint,
|
|
32
|
+
LensInfo,
|
|
33
|
+
MediaDeviceType,
|
|
34
|
+
Metadata,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
from .base import SidecarMetadata
|
|
38
|
+
from .registry import get_tags_lower, register_extractor
|
|
39
|
+
|
|
40
|
+
logger = logging.getLogger(__name__)
|
|
41
|
+
|
|
42
|
+
# DJI device type mapping based on model name
|
|
43
|
+
DRONE_MODELS = {
|
|
44
|
+
"mavic",
|
|
45
|
+
"phantom",
|
|
46
|
+
"inspire",
|
|
47
|
+
"mini",
|
|
48
|
+
"air",
|
|
49
|
+
"fpv",
|
|
50
|
+
"avata",
|
|
51
|
+
"matrice",
|
|
52
|
+
"agras",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
CAMERA_MODELS = {
|
|
56
|
+
"pocket",
|
|
57
|
+
"osmo",
|
|
58
|
+
"action",
|
|
59
|
+
"ronin",
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _get_device_type(model: str | None, has_gps: bool = False) -> MediaDeviceType:
|
|
64
|
+
"""Determine device type from model name.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
model: Model name string
|
|
68
|
+
has_gps: Whether GPS data was found (indicates drone)
|
|
69
|
+
"""
|
|
70
|
+
if not model:
|
|
71
|
+
# No model info - use GPS as hint (drones have GPS, Pocket doesn't)
|
|
72
|
+
return MediaDeviceType.DRONE if has_gps else MediaDeviceType.ACTION_CAMERA
|
|
73
|
+
|
|
74
|
+
model_lower = model.lower()
|
|
75
|
+
|
|
76
|
+
# Check for handheld cameras/gimbals FIRST
|
|
77
|
+
for camera_model in CAMERA_MODELS:
|
|
78
|
+
if camera_model in model_lower:
|
|
79
|
+
return MediaDeviceType.ACTION_CAMERA
|
|
80
|
+
|
|
81
|
+
# Check for drones
|
|
82
|
+
for drone_model in DRONE_MODELS:
|
|
83
|
+
if drone_model in model_lower:
|
|
84
|
+
return MediaDeviceType.DRONE
|
|
85
|
+
|
|
86
|
+
# No match - use GPS as hint
|
|
87
|
+
return MediaDeviceType.DRONE if has_gps else MediaDeviceType.ACTION_CAMERA
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _parse_encoder_model(encoder: str) -> str | None:
|
|
91
|
+
"""Parse model name from encoder string.
|
|
92
|
+
|
|
93
|
+
Examples:
|
|
94
|
+
"DJIMavic3" -> "Mavic 3"
|
|
95
|
+
"DJI Pocket2" -> "Pocket 2"
|
|
96
|
+
"DJIMini3Pro" -> "Mini 3 Pro"
|
|
97
|
+
"DJIFPV" -> "FPV"
|
|
98
|
+
"""
|
|
99
|
+
if not encoder:
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
# Remove DJI prefix (case insensitive)
|
|
103
|
+
model = encoder
|
|
104
|
+
if model.upper().startswith("DJI"):
|
|
105
|
+
model = model[3:].strip()
|
|
106
|
+
|
|
107
|
+
if not model:
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
# Add spaces before numbers (Mavic3 -> Mavic 3)
|
|
111
|
+
model = re.sub(r"(\D)(\d)", r"\1 \2", model)
|
|
112
|
+
|
|
113
|
+
# Add spaces before uppercase letters (Mini3Pro -> Mini 3 Pro)
|
|
114
|
+
model = re.sub(r"([a-z])([A-Z])", r"\1 \2", model)
|
|
115
|
+
|
|
116
|
+
return model.strip()
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _parse_color_from_comment(comment: str) -> str | None:
|
|
120
|
+
"""Parse color mode from DJI Pocket/Osmo comment tag.
|
|
121
|
+
|
|
122
|
+
The comment tag format is: "DE=D-CLike, Type=Normal, HQ=Normal, Mode=P"
|
|
123
|
+
DE values: D-CLike (D-Cinelike), Normal, D-Log, etc.
|
|
124
|
+
"""
|
|
125
|
+
if not comment:
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
# Look for DE= pattern
|
|
129
|
+
match = re.search(r"DE=([^,]+)", comment)
|
|
130
|
+
if match:
|
|
131
|
+
color_mode = match.group(1).strip()
|
|
132
|
+
# Normalize common names
|
|
133
|
+
if color_mode.lower() == "d-clike":
|
|
134
|
+
return "D-Cinelike"
|
|
135
|
+
elif color_mode.lower() == "d-log":
|
|
136
|
+
return "D-Log"
|
|
137
|
+
return color_mode
|
|
138
|
+
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _parse_srt_sidecar(video_path: str) -> SidecarMetadata | None:
|
|
143
|
+
"""Parse DJI SRT sidecar file for GPS and telemetry.
|
|
144
|
+
|
|
145
|
+
DJI drones create SRT files with per-frame telemetry:
|
|
146
|
+
- Video: DJI_0987.MP4
|
|
147
|
+
- SRT: DJI_0987.SRT
|
|
148
|
+
|
|
149
|
+
Format: [iso: 400] [shutter: 1/100.0] [fnum: 2.8] [latitude: 61.05121] ...
|
|
150
|
+
|
|
151
|
+
Returns SidecarMetadata with first GPS point and full GPS track.
|
|
152
|
+
"""
|
|
153
|
+
path = Path(video_path)
|
|
154
|
+
|
|
155
|
+
srt_patterns = [
|
|
156
|
+
path.with_suffix(".SRT"),
|
|
157
|
+
path.with_suffix(".srt"),
|
|
158
|
+
]
|
|
159
|
+
|
|
160
|
+
srt_path = None
|
|
161
|
+
for pattern in srt_patterns:
|
|
162
|
+
if pattern.exists():
|
|
163
|
+
srt_path = pattern
|
|
164
|
+
break
|
|
165
|
+
|
|
166
|
+
if not srt_path:
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
try:
|
|
170
|
+
with open(srt_path, encoding="utf-8") as f:
|
|
171
|
+
content = f.read()
|
|
172
|
+
|
|
173
|
+
gps: GPS | None = None
|
|
174
|
+
gps_track: GPSTrack | None = None
|
|
175
|
+
color_space: ColorSpace | None = None
|
|
176
|
+
lens: LensInfo | None = None
|
|
177
|
+
|
|
178
|
+
# Extract ALL GPS coordinates for track
|
|
179
|
+
lat_matches = re.findall(r"\[latitude:\s*([-\d.]+)\]", content)
|
|
180
|
+
lon_matches = re.findall(r"\[longitude:\s*([-\d.]+)\]", content)
|
|
181
|
+
abs_alt_matches = re.findall(r"abs_alt:\s*([-\d.]+)", content)
|
|
182
|
+
|
|
183
|
+
if lat_matches and lon_matches and len(lat_matches) == len(lon_matches):
|
|
184
|
+
gps_points: list[GPSTrackPoint] = []
|
|
185
|
+
last_lat: float | None = None
|
|
186
|
+
last_lon: float | None = None
|
|
187
|
+
|
|
188
|
+
for i, (lat_str, lon_str) in enumerate(zip(lat_matches, lon_matches)):
|
|
189
|
+
lat = float(lat_str)
|
|
190
|
+
lon = float(lon_str)
|
|
191
|
+
|
|
192
|
+
# Skip invalid 0,0 coordinates
|
|
193
|
+
if lat == 0 and lon == 0:
|
|
194
|
+
continue
|
|
195
|
+
|
|
196
|
+
# Get altitude if available
|
|
197
|
+
alt: float | None = None
|
|
198
|
+
if i < len(abs_alt_matches):
|
|
199
|
+
alt = float(abs_alt_matches[i])
|
|
200
|
+
|
|
201
|
+
# Dedupe consecutive identical points
|
|
202
|
+
if lat != last_lat or lon != last_lon:
|
|
203
|
+
gps_points.append(
|
|
204
|
+
GPSTrackPoint(
|
|
205
|
+
latitude=round(lat, 6),
|
|
206
|
+
longitude=round(lon, 6),
|
|
207
|
+
altitude=round(alt, 1) if alt is not None else None,
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
last_lat = lat
|
|
211
|
+
last_lon = lon
|
|
212
|
+
|
|
213
|
+
# First valid point becomes the GPS location
|
|
214
|
+
if gps_points:
|
|
215
|
+
gps = GPS(
|
|
216
|
+
latitude=gps_points[0].latitude,
|
|
217
|
+
longitude=gps_points[0].longitude,
|
|
218
|
+
altitude=gps_points[0].altitude,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Create track if we have multiple unique points
|
|
222
|
+
if len(gps_points) > 1:
|
|
223
|
+
gps_track = GPSTrack(points=gps_points, source="srt_sidecar")
|
|
224
|
+
logger.info(f"Extracted {len(gps_points)} GPS points from SRT")
|
|
225
|
+
|
|
226
|
+
# Color mode (d_log, d_cinelike, etc.)
|
|
227
|
+
color_match = re.search(r"\[color_md\s*:\s*(\w+)\]", content)
|
|
228
|
+
if color_match:
|
|
229
|
+
color_mode = color_match.group(1)
|
|
230
|
+
color_space = ColorSpace(
|
|
231
|
+
transfer=color_mode,
|
|
232
|
+
detection_method=DetectionMethod.METADATA,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Focal length and aperture
|
|
236
|
+
focal_match = re.search(r"\[focal_len:\s*([\d.]+)\]", content)
|
|
237
|
+
fnum_match = re.search(r"\[fnum:\s*([\d.]+)\]", content)
|
|
238
|
+
|
|
239
|
+
if focal_match or fnum_match:
|
|
240
|
+
lens = LensInfo(
|
|
241
|
+
focal_length=float(focal_match.group(1)) if focal_match else None,
|
|
242
|
+
aperture=float(fnum_match.group(1)) if fnum_match else None,
|
|
243
|
+
detection_method=DetectionMethod.METADATA,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
if gps or gps_track or color_space or lens:
|
|
247
|
+
return SidecarMetadata(gps=gps, gps_track=gps_track, color_space=color_space, lens=lens)
|
|
248
|
+
return None
|
|
249
|
+
|
|
250
|
+
except Exception as e:
|
|
251
|
+
logger.warning(f"Error reading DJI SRT sidecar {srt_path}: {e}")
|
|
252
|
+
return None
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
class DJIExtractor:
|
|
256
|
+
"""Metadata extractor for DJI devices."""
|
|
257
|
+
|
|
258
|
+
def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
|
|
259
|
+
"""Detect if file is from a DJI device."""
|
|
260
|
+
tags = get_tags_lower(probe_data)
|
|
261
|
+
|
|
262
|
+
# Check make tag
|
|
263
|
+
make = tags.get("make") or tags.get("manufacturer")
|
|
264
|
+
if make and "DJI" in make.upper():
|
|
265
|
+
return True
|
|
266
|
+
|
|
267
|
+
# Check encoder tag (DJIMavic3, DJI Pocket2, etc.)
|
|
268
|
+
encoder = tags.get("encoder", "")
|
|
269
|
+
if encoder.upper().startswith("DJI"):
|
|
270
|
+
return True
|
|
271
|
+
|
|
272
|
+
# Check video stream handler_name (DJI Pocket uses "DJI.AVC")
|
|
273
|
+
for stream in probe_data.get("streams", []):
|
|
274
|
+
if stream.get("codec_type") == "video":
|
|
275
|
+
stream_tags = stream.get("tags", {})
|
|
276
|
+
handler = stream_tags.get("handler_name", "")
|
|
277
|
+
if "DJI" in handler.upper():
|
|
278
|
+
return True
|
|
279
|
+
|
|
280
|
+
# Check filename prefix
|
|
281
|
+
filename = Path(file_path).name
|
|
282
|
+
if filename.upper().startswith("DJI_"):
|
|
283
|
+
return True
|
|
284
|
+
|
|
285
|
+
# Check for SRT sidecar (DJI signature)
|
|
286
|
+
path = Path(file_path)
|
|
287
|
+
if path.with_suffix(".SRT").exists() or path.with_suffix(".srt").exists():
|
|
288
|
+
# Read first line of SRT to confirm DJI format
|
|
289
|
+
try:
|
|
290
|
+
srt_upper = path.with_suffix(".SRT")
|
|
291
|
+
srt_path = srt_upper if srt_upper.exists() else path.with_suffix(".srt")
|
|
292
|
+
with open(srt_path, encoding="utf-8") as f:
|
|
293
|
+
content = f.read(500)
|
|
294
|
+
# DJI SRT has [iso:, [shutter:, etc.
|
|
295
|
+
if "[iso:" in content.lower() or "[shutter:" in content.lower():
|
|
296
|
+
return True
|
|
297
|
+
except Exception:
|
|
298
|
+
pass
|
|
299
|
+
|
|
300
|
+
return False
|
|
301
|
+
|
|
302
|
+
def extract(self, probe_data: dict[str, Any], file_path: str, base_metadata: Metadata) -> Metadata:
|
|
303
|
+
"""Extract DJI-specific metadata."""
|
|
304
|
+
tags = get_tags_lower(probe_data)
|
|
305
|
+
|
|
306
|
+
# Get make and model
|
|
307
|
+
make = tags.get("make") or tags.get("manufacturer") or "DJI"
|
|
308
|
+
model = tags.get("model") or tags.get("model_name")
|
|
309
|
+
|
|
310
|
+
# Try to get model from encoder tag
|
|
311
|
+
encoder = tags.get("encoder", "")
|
|
312
|
+
if not model and encoder.upper().startswith("DJI"):
|
|
313
|
+
model = _parse_encoder_model(encoder)
|
|
314
|
+
|
|
315
|
+
# Parse SRT sidecar for additional metadata (drones have these)
|
|
316
|
+
sidecar = _parse_srt_sidecar(file_path)
|
|
317
|
+
|
|
318
|
+
# Get GPS and track - from sidecar (drone) or base metadata
|
|
319
|
+
gps = sidecar.gps if sidecar and sidecar.gps else base_metadata.gps
|
|
320
|
+
gps_track = sidecar.gps_track if sidecar and sidecar.gps_track else base_metadata.gps_track
|
|
321
|
+
|
|
322
|
+
# Determine device type using model and GPS presence as hints
|
|
323
|
+
has_gps = gps is not None
|
|
324
|
+
device_type = _get_device_type(model, has_gps)
|
|
325
|
+
|
|
326
|
+
device = DeviceInfo(
|
|
327
|
+
make=make if make else "DJI",
|
|
328
|
+
model=model,
|
|
329
|
+
software=tags.get("software"),
|
|
330
|
+
type=device_type,
|
|
331
|
+
detection_method=DetectionMethod.METADATA,
|
|
332
|
+
confidence=1.0,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# Get color space - prefer SRT, then comment tag, then base
|
|
336
|
+
color_space = base_metadata.color_space
|
|
337
|
+
if sidecar and sidecar.color_space:
|
|
338
|
+
color_space = sidecar.color_space
|
|
339
|
+
else:
|
|
340
|
+
# Try parsing from comment tag (DJI Pocket/Osmo)
|
|
341
|
+
comment = tags.get("comment", "")
|
|
342
|
+
color_mode = _parse_color_from_comment(comment)
|
|
343
|
+
if color_mode:
|
|
344
|
+
color_space = ColorSpace(
|
|
345
|
+
transfer=color_mode,
|
|
346
|
+
detection_method=DetectionMethod.METADATA,
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
lens = sidecar.lens if sidecar and sidecar.lens else base_metadata.lens
|
|
350
|
+
|
|
351
|
+
return Metadata(
|
|
352
|
+
duration=base_metadata.duration,
|
|
353
|
+
resolution=base_metadata.resolution,
|
|
354
|
+
codec=base_metadata.codec,
|
|
355
|
+
video_codec=base_metadata.video_codec,
|
|
356
|
+
audio=base_metadata.audio,
|
|
357
|
+
fps=base_metadata.fps,
|
|
358
|
+
bitrate=base_metadata.bitrate,
|
|
359
|
+
file_size=base_metadata.file_size,
|
|
360
|
+
timecode=base_metadata.timecode,
|
|
361
|
+
created_at=base_metadata.created_at,
|
|
362
|
+
device=device,
|
|
363
|
+
gps=gps,
|
|
364
|
+
gps_track=gps_track,
|
|
365
|
+
color_space=color_space,
|
|
366
|
+
lens=lens,
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
# Register this extractor
|
|
371
|
+
register_extractor("dji", DJIExtractor())
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""DV and HDV format detection.
|
|
2
|
+
|
|
3
|
+
Detects tape-based camcorder formats:
|
|
4
|
+
- DV (SD): 720x480 NTSC or 720x576 PAL, dvvideo codec
|
|
5
|
+
- DVCAM: Professional DV variant
|
|
6
|
+
- DVCPRO: Panasonic professional DV
|
|
7
|
+
- HDV (HD): 1440x1080 or 1280x720, mpeg2video codec
|
|
8
|
+
|
|
9
|
+
These formats were used by consumer and prosumer camcorders
|
|
10
|
+
from the late 1990s through the 2010s.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from media_engine.schemas import (
|
|
17
|
+
DetectionMethod,
|
|
18
|
+
DeviceInfo,
|
|
19
|
+
MediaDeviceType,
|
|
20
|
+
Metadata,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
from .registry import get_tags_lower, register_extractor
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DVExtractor:
|
|
29
|
+
"""Metadata extractor for DV and HDV formats."""
|
|
30
|
+
|
|
31
|
+
def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
|
|
32
|
+
"""Detect if file is DV or HDV format."""
|
|
33
|
+
# Check video codec
|
|
34
|
+
for stream in probe_data.get("streams", []):
|
|
35
|
+
if stream.get("codec_type") != "video":
|
|
36
|
+
continue
|
|
37
|
+
|
|
38
|
+
codec = stream.get("codec_name", "").lower()
|
|
39
|
+
|
|
40
|
+
# DV codec
|
|
41
|
+
if codec == "dvvideo":
|
|
42
|
+
return True
|
|
43
|
+
|
|
44
|
+
# HDV uses mpeg2video with specific encoder tag
|
|
45
|
+
if codec == "mpeg2video":
|
|
46
|
+
tags = get_tags_lower(probe_data)
|
|
47
|
+
encoder = tags.get("encoder", "").lower()
|
|
48
|
+
if "hdv" in encoder:
|
|
49
|
+
return True
|
|
50
|
+
|
|
51
|
+
# Also check stream tags
|
|
52
|
+
stream_tags = stream.get("tags", {})
|
|
53
|
+
for key, value in stream_tags.items():
|
|
54
|
+
if "hdv" in str(value).lower():
|
|
55
|
+
return True
|
|
56
|
+
|
|
57
|
+
return False
|
|
58
|
+
|
|
59
|
+
def extract(
|
|
60
|
+
self,
|
|
61
|
+
probe_data: dict[str, Any],
|
|
62
|
+
file_path: str,
|
|
63
|
+
base_metadata: Metadata,
|
|
64
|
+
) -> Metadata:
|
|
65
|
+
"""Extract DV/HDV format information."""
|
|
66
|
+
format_name = "DV"
|
|
67
|
+
model = "DV Camcorder"
|
|
68
|
+
|
|
69
|
+
for stream in probe_data.get("streams", []):
|
|
70
|
+
if stream.get("codec_type") != "video":
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
codec = stream.get("codec_name", "").lower()
|
|
74
|
+
width = stream.get("width", 0)
|
|
75
|
+
height = stream.get("height", 0)
|
|
76
|
+
|
|
77
|
+
if codec == "dvvideo":
|
|
78
|
+
# Detect DV variant
|
|
79
|
+
if height == 576:
|
|
80
|
+
format_name = "DV PAL"
|
|
81
|
+
elif height == 480:
|
|
82
|
+
format_name = "DV NTSC"
|
|
83
|
+
else:
|
|
84
|
+
format_name = "DV"
|
|
85
|
+
model = f"{format_name} Camcorder"
|
|
86
|
+
|
|
87
|
+
elif codec == "mpeg2video":
|
|
88
|
+
# HDV format
|
|
89
|
+
tags = get_tags_lower(probe_data)
|
|
90
|
+
encoder = tags.get("encoder", "")
|
|
91
|
+
|
|
92
|
+
if "1080" in encoder:
|
|
93
|
+
format_name = "HDV 1080i"
|
|
94
|
+
elif "720" in encoder:
|
|
95
|
+
format_name = "HDV 720p"
|
|
96
|
+
elif width == 1440 and height == 1080:
|
|
97
|
+
format_name = "HDV 1080i"
|
|
98
|
+
elif width == 1280 and height == 720:
|
|
99
|
+
format_name = "HDV 720p"
|
|
100
|
+
else:
|
|
101
|
+
format_name = "HDV"
|
|
102
|
+
|
|
103
|
+
model = f"{format_name} Camcorder"
|
|
104
|
+
|
|
105
|
+
break
|
|
106
|
+
|
|
107
|
+
device = DeviceInfo(
|
|
108
|
+
make=None,
|
|
109
|
+
model=model,
|
|
110
|
+
type=MediaDeviceType.CAMERA,
|
|
111
|
+
detection_method=DetectionMethod.METADATA,
|
|
112
|
+
confidence=0.9,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
base_metadata.device = device
|
|
116
|
+
|
|
117
|
+
return base_metadata
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
# Register the extractor
|
|
121
|
+
register_extractor("dv", DVExtractor())
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""FFmpeg metadata extraction.
|
|
2
|
+
|
|
3
|
+
Handles files encoded/processed with FFmpeg:
|
|
4
|
+
- OBS recordings
|
|
5
|
+
- Handbrake conversions
|
|
6
|
+
- Command-line FFmpeg output
|
|
7
|
+
- Other FFmpeg-based tools
|
|
8
|
+
|
|
9
|
+
Detection:
|
|
10
|
+
- encoder tag starts with "Lavf" (libavformat)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from media_engine.schemas import (
|
|
17
|
+
DetectionMethod,
|
|
18
|
+
DeviceInfo,
|
|
19
|
+
MediaDeviceType,
|
|
20
|
+
Metadata,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
from .registry import get_tags_lower, register_extractor
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class FFmpegExtractor:
|
|
29
|
+
"""Metadata extractor for FFmpeg-encoded files."""
|
|
30
|
+
|
|
31
|
+
def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
|
|
32
|
+
"""Detect if file was encoded with FFmpeg."""
|
|
33
|
+
tags = get_tags_lower(probe_data)
|
|
34
|
+
|
|
35
|
+
# Check encoder tag for libavformat signature
|
|
36
|
+
encoder = tags.get("encoder", "")
|
|
37
|
+
if encoder.startswith("Lavf"):
|
|
38
|
+
return True
|
|
39
|
+
|
|
40
|
+
return False
|
|
41
|
+
|
|
42
|
+
def extract(self, probe_data: dict[str, Any], file_path: str, base_metadata: Metadata) -> Metadata:
|
|
43
|
+
"""Extract metadata for FFmpeg-encoded files."""
|
|
44
|
+
tags = get_tags_lower(probe_data)
|
|
45
|
+
|
|
46
|
+
encoder = tags.get("encoder", "")
|
|
47
|
+
|
|
48
|
+
device = DeviceInfo(
|
|
49
|
+
make="FFmpeg",
|
|
50
|
+
model=encoder if encoder else None,
|
|
51
|
+
software=encoder if encoder else None,
|
|
52
|
+
type=MediaDeviceType.UNKNOWN,
|
|
53
|
+
detection_method=DetectionMethod.METADATA,
|
|
54
|
+
confidence=0.8,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
return Metadata(
|
|
58
|
+
duration=base_metadata.duration,
|
|
59
|
+
resolution=base_metadata.resolution,
|
|
60
|
+
codec=base_metadata.codec,
|
|
61
|
+
video_codec=base_metadata.video_codec,
|
|
62
|
+
audio=base_metadata.audio,
|
|
63
|
+
fps=base_metadata.fps,
|
|
64
|
+
bitrate=base_metadata.bitrate,
|
|
65
|
+
file_size=base_metadata.file_size,
|
|
66
|
+
timecode=base_metadata.timecode,
|
|
67
|
+
created_at=base_metadata.created_at,
|
|
68
|
+
device=device,
|
|
69
|
+
gps=base_metadata.gps,
|
|
70
|
+
color_space=base_metadata.color_space,
|
|
71
|
+
lens=base_metadata.lens,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# Register this extractor
|
|
76
|
+
register_extractor("ffmpeg", FFmpegExtractor())
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Generic metadata extraction fallback.
|
|
2
|
+
|
|
3
|
+
This module handles files that don't match any specific manufacturer.
|
|
4
|
+
It extracts basic device info from standard metadata tags.
|
|
5
|
+
|
|
6
|
+
Detection:
|
|
7
|
+
- Always matches as fallback (registered last in __init__.py)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from media_engine.schemas import (
|
|
14
|
+
DetectionMethod,
|
|
15
|
+
DeviceInfo,
|
|
16
|
+
MediaDeviceType,
|
|
17
|
+
Metadata,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from .registry import get_tags_lower
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
# Known drone manufacturers for device type detection
|
|
25
|
+
DRONE_MANUFACTURERS = {"DJI", "Parrot", "Autel", "Skydio", "Yuneec", "GoPro Karma"}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _determine_device_type(make: str | None, model: str | None) -> MediaDeviceType:
|
|
29
|
+
"""Determine device type from make and model strings."""
|
|
30
|
+
if make:
|
|
31
|
+
make_upper = make.upper()
|
|
32
|
+
|
|
33
|
+
# Check for drones
|
|
34
|
+
if make_upper in {m.upper() for m in DRONE_MANUFACTURERS}:
|
|
35
|
+
return MediaDeviceType.DRONE
|
|
36
|
+
|
|
37
|
+
# Check for action cameras
|
|
38
|
+
if "GOPRO" in make_upper:
|
|
39
|
+
return MediaDeviceType.ACTION_CAMERA
|
|
40
|
+
|
|
41
|
+
if model:
|
|
42
|
+
model_upper = model.upper()
|
|
43
|
+
|
|
44
|
+
# Check for phones
|
|
45
|
+
if "IPHONE" in model_upper or "IPAD" in model_upper:
|
|
46
|
+
return MediaDeviceType.PHONE
|
|
47
|
+
if "PIXEL" in model_upper or "GALAXY" in model_upper:
|
|
48
|
+
return MediaDeviceType.PHONE
|
|
49
|
+
|
|
50
|
+
# Check for action cameras
|
|
51
|
+
if "GOPRO" in model_upper or "HERO" in model_upper:
|
|
52
|
+
return MediaDeviceType.ACTION_CAMERA
|
|
53
|
+
if "OSMO" in model_upper or "ACTION" in model_upper:
|
|
54
|
+
return MediaDeviceType.ACTION_CAMERA
|
|
55
|
+
|
|
56
|
+
# Default to camera for professional/unknown devices
|
|
57
|
+
return MediaDeviceType.CAMERA if make or model else MediaDeviceType.UNKNOWN
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class GenericExtractor:
|
|
61
|
+
"""Fallback metadata extractor for unknown devices."""
|
|
62
|
+
|
|
63
|
+
def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
|
|
64
|
+
"""Always match as fallback."""
|
|
65
|
+
return True
|
|
66
|
+
|
|
67
|
+
def extract(self, probe_data: dict[str, Any], file_path: str, base_metadata: Metadata) -> Metadata:
|
|
68
|
+
"""Extract basic device info from metadata tags."""
|
|
69
|
+
tags = get_tags_lower(probe_data)
|
|
70
|
+
|
|
71
|
+
# Try various tag locations for make/model
|
|
72
|
+
make = tags.get("make") or tags.get("manufacturer") or tags.get("com.apple.quicktime.make") or tags.get("com.apple.proapps.manufacturer")
|
|
73
|
+
model = tags.get("model") or tags.get("model_name") or tags.get("com.apple.quicktime.model") or tags.get("com.apple.proapps.cameraname")
|
|
74
|
+
software = tags.get("software") or tags.get("com.apple.quicktime.software")
|
|
75
|
+
|
|
76
|
+
# Check encoder tag for additional info
|
|
77
|
+
encoder = tags.get("encoder", "")
|
|
78
|
+
if not make and not model and encoder:
|
|
79
|
+
# Some cameras put info in encoder tag
|
|
80
|
+
if encoder.upper().startswith("DJI"):
|
|
81
|
+
make = "DJI"
|
|
82
|
+
model = encoder[3:] if len(encoder) > 3 else encoder
|
|
83
|
+
|
|
84
|
+
# If we still have no info, return base metadata unchanged
|
|
85
|
+
if not make and not model:
|
|
86
|
+
return base_metadata
|
|
87
|
+
|
|
88
|
+
# Determine device type
|
|
89
|
+
device_type = _determine_device_type(make, model)
|
|
90
|
+
|
|
91
|
+
device = DeviceInfo(
|
|
92
|
+
make=make,
|
|
93
|
+
model=model,
|
|
94
|
+
software=software,
|
|
95
|
+
type=device_type,
|
|
96
|
+
detection_method=DetectionMethod.METADATA,
|
|
97
|
+
confidence=0.8, # Lower confidence for generic detection
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
return Metadata(
|
|
101
|
+
duration=base_metadata.duration,
|
|
102
|
+
resolution=base_metadata.resolution,
|
|
103
|
+
codec=base_metadata.codec,
|
|
104
|
+
video_codec=base_metadata.video_codec,
|
|
105
|
+
audio=base_metadata.audio,
|
|
106
|
+
fps=base_metadata.fps,
|
|
107
|
+
bitrate=base_metadata.bitrate,
|
|
108
|
+
file_size=base_metadata.file_size,
|
|
109
|
+
timecode=base_metadata.timecode,
|
|
110
|
+
created_at=base_metadata.created_at,
|
|
111
|
+
device=device,
|
|
112
|
+
gps=base_metadata.gps,
|
|
113
|
+
color_space=base_metadata.color_space,
|
|
114
|
+
lens=base_metadata.lens,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# Register this extractor LAST (it's the fallback)
|
|
119
|
+
# This is done in __init__.py to ensure proper order
|