media-engine 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. cli/clip.py +79 -0
  2. cli/faces.py +91 -0
  3. cli/metadata.py +68 -0
  4. cli/motion.py +77 -0
  5. cli/objects.py +94 -0
  6. cli/ocr.py +93 -0
  7. cli/scenes.py +57 -0
  8. cli/telemetry.py +65 -0
  9. cli/transcript.py +76 -0
  10. media_engine/__init__.py +7 -0
  11. media_engine/_version.py +34 -0
  12. media_engine/app.py +80 -0
  13. media_engine/batch/__init__.py +56 -0
  14. media_engine/batch/models.py +99 -0
  15. media_engine/batch/processor.py +1131 -0
  16. media_engine/batch/queue.py +232 -0
  17. media_engine/batch/state.py +30 -0
  18. media_engine/batch/timing.py +321 -0
  19. media_engine/cli.py +17 -0
  20. media_engine/config.py +674 -0
  21. media_engine/extractors/__init__.py +75 -0
  22. media_engine/extractors/clip.py +401 -0
  23. media_engine/extractors/faces.py +459 -0
  24. media_engine/extractors/frame_buffer.py +351 -0
  25. media_engine/extractors/frames.py +402 -0
  26. media_engine/extractors/metadata/__init__.py +127 -0
  27. media_engine/extractors/metadata/apple.py +169 -0
  28. media_engine/extractors/metadata/arri.py +118 -0
  29. media_engine/extractors/metadata/avchd.py +208 -0
  30. media_engine/extractors/metadata/avchd_gps.py +270 -0
  31. media_engine/extractors/metadata/base.py +688 -0
  32. media_engine/extractors/metadata/blackmagic.py +139 -0
  33. media_engine/extractors/metadata/camera_360.py +276 -0
  34. media_engine/extractors/metadata/canon.py +290 -0
  35. media_engine/extractors/metadata/dji.py +371 -0
  36. media_engine/extractors/metadata/dv.py +121 -0
  37. media_engine/extractors/metadata/ffmpeg.py +76 -0
  38. media_engine/extractors/metadata/generic.py +119 -0
  39. media_engine/extractors/metadata/gopro.py +256 -0
  40. media_engine/extractors/metadata/red.py +305 -0
  41. media_engine/extractors/metadata/registry.py +114 -0
  42. media_engine/extractors/metadata/sony.py +442 -0
  43. media_engine/extractors/metadata/tesla.py +157 -0
  44. media_engine/extractors/motion.py +765 -0
  45. media_engine/extractors/objects.py +245 -0
  46. media_engine/extractors/objects_qwen.py +754 -0
  47. media_engine/extractors/ocr.py +268 -0
  48. media_engine/extractors/scenes.py +82 -0
  49. media_engine/extractors/shot_type.py +217 -0
  50. media_engine/extractors/telemetry.py +262 -0
  51. media_engine/extractors/transcribe.py +579 -0
  52. media_engine/extractors/translate.py +121 -0
  53. media_engine/extractors/vad.py +263 -0
  54. media_engine/main.py +68 -0
  55. media_engine/py.typed +0 -0
  56. media_engine/routers/__init__.py +15 -0
  57. media_engine/routers/batch.py +78 -0
  58. media_engine/routers/health.py +93 -0
  59. media_engine/routers/models.py +211 -0
  60. media_engine/routers/settings.py +87 -0
  61. media_engine/routers/utils.py +135 -0
  62. media_engine/schemas.py +581 -0
  63. media_engine/utils/__init__.py +5 -0
  64. media_engine/utils/logging.py +54 -0
  65. media_engine/utils/memory.py +49 -0
  66. media_engine-0.1.0.dist-info/METADATA +276 -0
  67. media_engine-0.1.0.dist-info/RECORD +70 -0
  68. media_engine-0.1.0.dist-info/WHEEL +4 -0
  69. media_engine-0.1.0.dist-info/entry_points.txt +11 -0
  70. media_engine-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,371 @@
1
+ """DJI metadata extraction.
2
+
3
+ Handles DJI drones and cameras:
4
+ - Mavic series (Air, Pro, Mini, etc.)
5
+ - Phantom series
6
+ - Inspire series
7
+ - Osmo/Pocket series (Pocket, Pocket 2, Action, etc.)
8
+ - FPV drones
9
+
10
+ Detection methods:
11
+ - encoder tag: "DJIMavic3", "DJI Pocket2", etc.
12
+ - make tag: "DJI"
13
+ - filename prefix: "DJI_"
14
+ - SRT sidecar file presence
15
+
16
+ SRT files contain per-frame telemetry:
17
+ [iso: 400] [shutter: 1/100.0] [fnum: 2.8] [latitude: 61.05121] ...
18
+ """
19
+
20
+ import logging
21
+ import re
22
+ from pathlib import Path
23
+ from typing import Any
24
+
25
+ from media_engine.schemas import (
26
+ GPS,
27
+ ColorSpace,
28
+ DetectionMethod,
29
+ DeviceInfo,
30
+ GPSTrack,
31
+ GPSTrackPoint,
32
+ LensInfo,
33
+ MediaDeviceType,
34
+ Metadata,
35
+ )
36
+
37
+ from .base import SidecarMetadata
38
+ from .registry import get_tags_lower, register_extractor
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+ # DJI device type mapping based on model name
43
+ DRONE_MODELS = {
44
+ "mavic",
45
+ "phantom",
46
+ "inspire",
47
+ "mini",
48
+ "air",
49
+ "fpv",
50
+ "avata",
51
+ "matrice",
52
+ "agras",
53
+ }
54
+
55
+ CAMERA_MODELS = {
56
+ "pocket",
57
+ "osmo",
58
+ "action",
59
+ "ronin",
60
+ }
61
+
62
+
63
+ def _get_device_type(model: str | None, has_gps: bool = False) -> MediaDeviceType:
64
+ """Determine device type from model name.
65
+
66
+ Args:
67
+ model: Model name string
68
+ has_gps: Whether GPS data was found (indicates drone)
69
+ """
70
+ if not model:
71
+ # No model info - use GPS as hint (drones have GPS, Pocket doesn't)
72
+ return MediaDeviceType.DRONE if has_gps else MediaDeviceType.ACTION_CAMERA
73
+
74
+ model_lower = model.lower()
75
+
76
+ # Check for handheld cameras/gimbals FIRST
77
+ for camera_model in CAMERA_MODELS:
78
+ if camera_model in model_lower:
79
+ return MediaDeviceType.ACTION_CAMERA
80
+
81
+ # Check for drones
82
+ for drone_model in DRONE_MODELS:
83
+ if drone_model in model_lower:
84
+ return MediaDeviceType.DRONE
85
+
86
+ # No match - use GPS as hint
87
+ return MediaDeviceType.DRONE if has_gps else MediaDeviceType.ACTION_CAMERA
88
+
89
+
90
+ def _parse_encoder_model(encoder: str) -> str | None:
91
+ """Parse model name from encoder string.
92
+
93
+ Examples:
94
+ "DJIMavic3" -> "Mavic 3"
95
+ "DJI Pocket2" -> "Pocket 2"
96
+ "DJIMini3Pro" -> "Mini 3 Pro"
97
+ "DJIFPV" -> "FPV"
98
+ """
99
+ if not encoder:
100
+ return None
101
+
102
+ # Remove DJI prefix (case insensitive)
103
+ model = encoder
104
+ if model.upper().startswith("DJI"):
105
+ model = model[3:].strip()
106
+
107
+ if not model:
108
+ return None
109
+
110
+ # Add spaces before numbers (Mavic3 -> Mavic 3)
111
+ model = re.sub(r"(\D)(\d)", r"\1 \2", model)
112
+
113
+ # Add spaces before uppercase letters (Mini3Pro -> Mini 3 Pro)
114
+ model = re.sub(r"([a-z])([A-Z])", r"\1 \2", model)
115
+
116
+ return model.strip()
117
+
118
+
119
+ def _parse_color_from_comment(comment: str) -> str | None:
120
+ """Parse color mode from DJI Pocket/Osmo comment tag.
121
+
122
+ The comment tag format is: "DE=D-CLike, Type=Normal, HQ=Normal, Mode=P"
123
+ DE values: D-CLike (D-Cinelike), Normal, D-Log, etc.
124
+ """
125
+ if not comment:
126
+ return None
127
+
128
+ # Look for DE= pattern
129
+ match = re.search(r"DE=([^,]+)", comment)
130
+ if match:
131
+ color_mode = match.group(1).strip()
132
+ # Normalize common names
133
+ if color_mode.lower() == "d-clike":
134
+ return "D-Cinelike"
135
+ elif color_mode.lower() == "d-log":
136
+ return "D-Log"
137
+ return color_mode
138
+
139
+ return None
140
+
141
+
142
+ def _parse_srt_sidecar(video_path: str) -> SidecarMetadata | None:
143
+ """Parse DJI SRT sidecar file for GPS and telemetry.
144
+
145
+ DJI drones create SRT files with per-frame telemetry:
146
+ - Video: DJI_0987.MP4
147
+ - SRT: DJI_0987.SRT
148
+
149
+ Format: [iso: 400] [shutter: 1/100.0] [fnum: 2.8] [latitude: 61.05121] ...
150
+
151
+ Returns SidecarMetadata with first GPS point and full GPS track.
152
+ """
153
+ path = Path(video_path)
154
+
155
+ srt_patterns = [
156
+ path.with_suffix(".SRT"),
157
+ path.with_suffix(".srt"),
158
+ ]
159
+
160
+ srt_path = None
161
+ for pattern in srt_patterns:
162
+ if pattern.exists():
163
+ srt_path = pattern
164
+ break
165
+
166
+ if not srt_path:
167
+ return None
168
+
169
+ try:
170
+ with open(srt_path, encoding="utf-8") as f:
171
+ content = f.read()
172
+
173
+ gps: GPS | None = None
174
+ gps_track: GPSTrack | None = None
175
+ color_space: ColorSpace | None = None
176
+ lens: LensInfo | None = None
177
+
178
+ # Extract ALL GPS coordinates for track
179
+ lat_matches = re.findall(r"\[latitude:\s*([-\d.]+)\]", content)
180
+ lon_matches = re.findall(r"\[longitude:\s*([-\d.]+)\]", content)
181
+ abs_alt_matches = re.findall(r"abs_alt:\s*([-\d.]+)", content)
182
+
183
+ if lat_matches and lon_matches and len(lat_matches) == len(lon_matches):
184
+ gps_points: list[GPSTrackPoint] = []
185
+ last_lat: float | None = None
186
+ last_lon: float | None = None
187
+
188
+ for i, (lat_str, lon_str) in enumerate(zip(lat_matches, lon_matches)):
189
+ lat = float(lat_str)
190
+ lon = float(lon_str)
191
+
192
+ # Skip invalid 0,0 coordinates
193
+ if lat == 0 and lon == 0:
194
+ continue
195
+
196
+ # Get altitude if available
197
+ alt: float | None = None
198
+ if i < len(abs_alt_matches):
199
+ alt = float(abs_alt_matches[i])
200
+
201
+ # Dedupe consecutive identical points
202
+ if lat != last_lat or lon != last_lon:
203
+ gps_points.append(
204
+ GPSTrackPoint(
205
+ latitude=round(lat, 6),
206
+ longitude=round(lon, 6),
207
+ altitude=round(alt, 1) if alt is not None else None,
208
+ )
209
+ )
210
+ last_lat = lat
211
+ last_lon = lon
212
+
213
+ # First valid point becomes the GPS location
214
+ if gps_points:
215
+ gps = GPS(
216
+ latitude=gps_points[0].latitude,
217
+ longitude=gps_points[0].longitude,
218
+ altitude=gps_points[0].altitude,
219
+ )
220
+
221
+ # Create track if we have multiple unique points
222
+ if len(gps_points) > 1:
223
+ gps_track = GPSTrack(points=gps_points, source="srt_sidecar")
224
+ logger.info(f"Extracted {len(gps_points)} GPS points from SRT")
225
+
226
+ # Color mode (d_log, d_cinelike, etc.)
227
+ color_match = re.search(r"\[color_md\s*:\s*(\w+)\]", content)
228
+ if color_match:
229
+ color_mode = color_match.group(1)
230
+ color_space = ColorSpace(
231
+ transfer=color_mode,
232
+ detection_method=DetectionMethod.METADATA,
233
+ )
234
+
235
+ # Focal length and aperture
236
+ focal_match = re.search(r"\[focal_len:\s*([\d.]+)\]", content)
237
+ fnum_match = re.search(r"\[fnum:\s*([\d.]+)\]", content)
238
+
239
+ if focal_match or fnum_match:
240
+ lens = LensInfo(
241
+ focal_length=float(focal_match.group(1)) if focal_match else None,
242
+ aperture=float(fnum_match.group(1)) if fnum_match else None,
243
+ detection_method=DetectionMethod.METADATA,
244
+ )
245
+
246
+ if gps or gps_track or color_space or lens:
247
+ return SidecarMetadata(gps=gps, gps_track=gps_track, color_space=color_space, lens=lens)
248
+ return None
249
+
250
+ except Exception as e:
251
+ logger.warning(f"Error reading DJI SRT sidecar {srt_path}: {e}")
252
+ return None
253
+
254
+
255
+ class DJIExtractor:
256
+ """Metadata extractor for DJI devices."""
257
+
258
+ def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
259
+ """Detect if file is from a DJI device."""
260
+ tags = get_tags_lower(probe_data)
261
+
262
+ # Check make tag
263
+ make = tags.get("make") or tags.get("manufacturer")
264
+ if make and "DJI" in make.upper():
265
+ return True
266
+
267
+ # Check encoder tag (DJIMavic3, DJI Pocket2, etc.)
268
+ encoder = tags.get("encoder", "")
269
+ if encoder.upper().startswith("DJI"):
270
+ return True
271
+
272
+ # Check video stream handler_name (DJI Pocket uses "DJI.AVC")
273
+ for stream in probe_data.get("streams", []):
274
+ if stream.get("codec_type") == "video":
275
+ stream_tags = stream.get("tags", {})
276
+ handler = stream_tags.get("handler_name", "")
277
+ if "DJI" in handler.upper():
278
+ return True
279
+
280
+ # Check filename prefix
281
+ filename = Path(file_path).name
282
+ if filename.upper().startswith("DJI_"):
283
+ return True
284
+
285
+ # Check for SRT sidecar (DJI signature)
286
+ path = Path(file_path)
287
+ if path.with_suffix(".SRT").exists() or path.with_suffix(".srt").exists():
288
+ # Read first line of SRT to confirm DJI format
289
+ try:
290
+ srt_upper = path.with_suffix(".SRT")
291
+ srt_path = srt_upper if srt_upper.exists() else path.with_suffix(".srt")
292
+ with open(srt_path, encoding="utf-8") as f:
293
+ content = f.read(500)
294
+ # DJI SRT has [iso:, [shutter:, etc.
295
+ if "[iso:" in content.lower() or "[shutter:" in content.lower():
296
+ return True
297
+ except Exception:
298
+ pass
299
+
300
+ return False
301
+
302
+ def extract(self, probe_data: dict[str, Any], file_path: str, base_metadata: Metadata) -> Metadata:
303
+ """Extract DJI-specific metadata."""
304
+ tags = get_tags_lower(probe_data)
305
+
306
+ # Get make and model
307
+ make = tags.get("make") or tags.get("manufacturer") or "DJI"
308
+ model = tags.get("model") or tags.get("model_name")
309
+
310
+ # Try to get model from encoder tag
311
+ encoder = tags.get("encoder", "")
312
+ if not model and encoder.upper().startswith("DJI"):
313
+ model = _parse_encoder_model(encoder)
314
+
315
+ # Parse SRT sidecar for additional metadata (drones have these)
316
+ sidecar = _parse_srt_sidecar(file_path)
317
+
318
+ # Get GPS and track - from sidecar (drone) or base metadata
319
+ gps = sidecar.gps if sidecar and sidecar.gps else base_metadata.gps
320
+ gps_track = sidecar.gps_track if sidecar and sidecar.gps_track else base_metadata.gps_track
321
+
322
+ # Determine device type using model and GPS presence as hints
323
+ has_gps = gps is not None
324
+ device_type = _get_device_type(model, has_gps)
325
+
326
+ device = DeviceInfo(
327
+ make=make if make else "DJI",
328
+ model=model,
329
+ software=tags.get("software"),
330
+ type=device_type,
331
+ detection_method=DetectionMethod.METADATA,
332
+ confidence=1.0,
333
+ )
334
+
335
+ # Get color space - prefer SRT, then comment tag, then base
336
+ color_space = base_metadata.color_space
337
+ if sidecar and sidecar.color_space:
338
+ color_space = sidecar.color_space
339
+ else:
340
+ # Try parsing from comment tag (DJI Pocket/Osmo)
341
+ comment = tags.get("comment", "")
342
+ color_mode = _parse_color_from_comment(comment)
343
+ if color_mode:
344
+ color_space = ColorSpace(
345
+ transfer=color_mode,
346
+ detection_method=DetectionMethod.METADATA,
347
+ )
348
+
349
+ lens = sidecar.lens if sidecar and sidecar.lens else base_metadata.lens
350
+
351
+ return Metadata(
352
+ duration=base_metadata.duration,
353
+ resolution=base_metadata.resolution,
354
+ codec=base_metadata.codec,
355
+ video_codec=base_metadata.video_codec,
356
+ audio=base_metadata.audio,
357
+ fps=base_metadata.fps,
358
+ bitrate=base_metadata.bitrate,
359
+ file_size=base_metadata.file_size,
360
+ timecode=base_metadata.timecode,
361
+ created_at=base_metadata.created_at,
362
+ device=device,
363
+ gps=gps,
364
+ gps_track=gps_track,
365
+ color_space=color_space,
366
+ lens=lens,
367
+ )
368
+
369
+
370
+ # Register this extractor
371
+ register_extractor("dji", DJIExtractor())
@@ -0,0 +1,121 @@
1
+ """DV and HDV format detection.
2
+
3
+ Detects tape-based camcorder formats:
4
+ - DV (SD): 720x480 NTSC or 720x576 PAL, dvvideo codec
5
+ - DVCAM: Professional DV variant
6
+ - DVCPRO: Panasonic professional DV
7
+ - HDV (HD): 1440x1080 or 1280x720, mpeg2video codec
8
+
9
+ These formats were used by consumer and prosumer camcorders
10
+ from the late 1990s through the 2010s.
11
+ """
12
+
13
+ import logging
14
+ from typing import Any
15
+
16
+ from media_engine.schemas import (
17
+ DetectionMethod,
18
+ DeviceInfo,
19
+ MediaDeviceType,
20
+ Metadata,
21
+ )
22
+
23
+ from .registry import get_tags_lower, register_extractor
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class DVExtractor:
29
+ """Metadata extractor for DV and HDV formats."""
30
+
31
+ def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
32
+ """Detect if file is DV or HDV format."""
33
+ # Check video codec
34
+ for stream in probe_data.get("streams", []):
35
+ if stream.get("codec_type") != "video":
36
+ continue
37
+
38
+ codec = stream.get("codec_name", "").lower()
39
+
40
+ # DV codec
41
+ if codec == "dvvideo":
42
+ return True
43
+
44
+ # HDV uses mpeg2video with specific encoder tag
45
+ if codec == "mpeg2video":
46
+ tags = get_tags_lower(probe_data)
47
+ encoder = tags.get("encoder", "").lower()
48
+ if "hdv" in encoder:
49
+ return True
50
+
51
+ # Also check stream tags
52
+ stream_tags = stream.get("tags", {})
53
+ for key, value in stream_tags.items():
54
+ if "hdv" in str(value).lower():
55
+ return True
56
+
57
+ return False
58
+
59
+ def extract(
60
+ self,
61
+ probe_data: dict[str, Any],
62
+ file_path: str,
63
+ base_metadata: Metadata,
64
+ ) -> Metadata:
65
+ """Extract DV/HDV format information."""
66
+ format_name = "DV"
67
+ model = "DV Camcorder"
68
+
69
+ for stream in probe_data.get("streams", []):
70
+ if stream.get("codec_type") != "video":
71
+ continue
72
+
73
+ codec = stream.get("codec_name", "").lower()
74
+ width = stream.get("width", 0)
75
+ height = stream.get("height", 0)
76
+
77
+ if codec == "dvvideo":
78
+ # Detect DV variant
79
+ if height == 576:
80
+ format_name = "DV PAL"
81
+ elif height == 480:
82
+ format_name = "DV NTSC"
83
+ else:
84
+ format_name = "DV"
85
+ model = f"{format_name} Camcorder"
86
+
87
+ elif codec == "mpeg2video":
88
+ # HDV format
89
+ tags = get_tags_lower(probe_data)
90
+ encoder = tags.get("encoder", "")
91
+
92
+ if "1080" in encoder:
93
+ format_name = "HDV 1080i"
94
+ elif "720" in encoder:
95
+ format_name = "HDV 720p"
96
+ elif width == 1440 and height == 1080:
97
+ format_name = "HDV 1080i"
98
+ elif width == 1280 and height == 720:
99
+ format_name = "HDV 720p"
100
+ else:
101
+ format_name = "HDV"
102
+
103
+ model = f"{format_name} Camcorder"
104
+
105
+ break
106
+
107
+ device = DeviceInfo(
108
+ make=None,
109
+ model=model,
110
+ type=MediaDeviceType.CAMERA,
111
+ detection_method=DetectionMethod.METADATA,
112
+ confidence=0.9,
113
+ )
114
+
115
+ base_metadata.device = device
116
+
117
+ return base_metadata
118
+
119
+
120
+ # Register the extractor
121
+ register_extractor("dv", DVExtractor())
@@ -0,0 +1,76 @@
1
+ """FFmpeg metadata extraction.
2
+
3
+ Handles files encoded/processed with FFmpeg:
4
+ - OBS recordings
5
+ - Handbrake conversions
6
+ - Command-line FFmpeg output
7
+ - Other FFmpeg-based tools
8
+
9
+ Detection:
10
+ - encoder tag starts with "Lavf" (libavformat)
11
+ """
12
+
13
+ import logging
14
+ from typing import Any
15
+
16
+ from media_engine.schemas import (
17
+ DetectionMethod,
18
+ DeviceInfo,
19
+ MediaDeviceType,
20
+ Metadata,
21
+ )
22
+
23
+ from .registry import get_tags_lower, register_extractor
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class FFmpegExtractor:
29
+ """Metadata extractor for FFmpeg-encoded files."""
30
+
31
+ def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
32
+ """Detect if file was encoded with FFmpeg."""
33
+ tags = get_tags_lower(probe_data)
34
+
35
+ # Check encoder tag for libavformat signature
36
+ encoder = tags.get("encoder", "")
37
+ if encoder.startswith("Lavf"):
38
+ return True
39
+
40
+ return False
41
+
42
+ def extract(self, probe_data: dict[str, Any], file_path: str, base_metadata: Metadata) -> Metadata:
43
+ """Extract metadata for FFmpeg-encoded files."""
44
+ tags = get_tags_lower(probe_data)
45
+
46
+ encoder = tags.get("encoder", "")
47
+
48
+ device = DeviceInfo(
49
+ make="FFmpeg",
50
+ model=encoder if encoder else None,
51
+ software=encoder if encoder else None,
52
+ type=MediaDeviceType.UNKNOWN,
53
+ detection_method=DetectionMethod.METADATA,
54
+ confidence=0.8,
55
+ )
56
+
57
+ return Metadata(
58
+ duration=base_metadata.duration,
59
+ resolution=base_metadata.resolution,
60
+ codec=base_metadata.codec,
61
+ video_codec=base_metadata.video_codec,
62
+ audio=base_metadata.audio,
63
+ fps=base_metadata.fps,
64
+ bitrate=base_metadata.bitrate,
65
+ file_size=base_metadata.file_size,
66
+ timecode=base_metadata.timecode,
67
+ created_at=base_metadata.created_at,
68
+ device=device,
69
+ gps=base_metadata.gps,
70
+ color_space=base_metadata.color_space,
71
+ lens=base_metadata.lens,
72
+ )
73
+
74
+
75
+ # Register this extractor
76
+ register_extractor("ffmpeg", FFmpegExtractor())
@@ -0,0 +1,119 @@
1
+ """Generic metadata extraction fallback.
2
+
3
+ This module handles files that don't match any specific manufacturer.
4
+ It extracts basic device info from standard metadata tags.
5
+
6
+ Detection:
7
+ - Always matches as fallback (registered last in __init__.py)
8
+ """
9
+
10
+ import logging
11
+ from typing import Any
12
+
13
+ from media_engine.schemas import (
14
+ DetectionMethod,
15
+ DeviceInfo,
16
+ MediaDeviceType,
17
+ Metadata,
18
+ )
19
+
20
+ from .registry import get_tags_lower
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # Known drone manufacturers for device type detection
25
+ DRONE_MANUFACTURERS = {"DJI", "Parrot", "Autel", "Skydio", "Yuneec", "GoPro Karma"}
26
+
27
+
28
+ def _determine_device_type(make: str | None, model: str | None) -> MediaDeviceType:
29
+ """Determine device type from make and model strings."""
30
+ if make:
31
+ make_upper = make.upper()
32
+
33
+ # Check for drones
34
+ if make_upper in {m.upper() for m in DRONE_MANUFACTURERS}:
35
+ return MediaDeviceType.DRONE
36
+
37
+ # Check for action cameras
38
+ if "GOPRO" in make_upper:
39
+ return MediaDeviceType.ACTION_CAMERA
40
+
41
+ if model:
42
+ model_upper = model.upper()
43
+
44
+ # Check for phones
45
+ if "IPHONE" in model_upper or "IPAD" in model_upper:
46
+ return MediaDeviceType.PHONE
47
+ if "PIXEL" in model_upper or "GALAXY" in model_upper:
48
+ return MediaDeviceType.PHONE
49
+
50
+ # Check for action cameras
51
+ if "GOPRO" in model_upper or "HERO" in model_upper:
52
+ return MediaDeviceType.ACTION_CAMERA
53
+ if "OSMO" in model_upper or "ACTION" in model_upper:
54
+ return MediaDeviceType.ACTION_CAMERA
55
+
56
+ # Default to camera for professional/unknown devices
57
+ return MediaDeviceType.CAMERA if make or model else MediaDeviceType.UNKNOWN
58
+
59
+
60
+ class GenericExtractor:
61
+ """Fallback metadata extractor for unknown devices."""
62
+
63
+ def detect(self, probe_data: dict[str, Any], file_path: str) -> bool:
64
+ """Always match as fallback."""
65
+ return True
66
+
67
+ def extract(self, probe_data: dict[str, Any], file_path: str, base_metadata: Metadata) -> Metadata:
68
+ """Extract basic device info from metadata tags."""
69
+ tags = get_tags_lower(probe_data)
70
+
71
+ # Try various tag locations for make/model
72
+ make = tags.get("make") or tags.get("manufacturer") or tags.get("com.apple.quicktime.make") or tags.get("com.apple.proapps.manufacturer")
73
+ model = tags.get("model") or tags.get("model_name") or tags.get("com.apple.quicktime.model") or tags.get("com.apple.proapps.cameraname")
74
+ software = tags.get("software") or tags.get("com.apple.quicktime.software")
75
+
76
+ # Check encoder tag for additional info
77
+ encoder = tags.get("encoder", "")
78
+ if not make and not model and encoder:
79
+ # Some cameras put info in encoder tag
80
+ if encoder.upper().startswith("DJI"):
81
+ make = "DJI"
82
+ model = encoder[3:] if len(encoder) > 3 else encoder
83
+
84
+ # If we still have no info, return base metadata unchanged
85
+ if not make and not model:
86
+ return base_metadata
87
+
88
+ # Determine device type
89
+ device_type = _determine_device_type(make, model)
90
+
91
+ device = DeviceInfo(
92
+ make=make,
93
+ model=model,
94
+ software=software,
95
+ type=device_type,
96
+ detection_method=DetectionMethod.METADATA,
97
+ confidence=0.8, # Lower confidence for generic detection
98
+ )
99
+
100
+ return Metadata(
101
+ duration=base_metadata.duration,
102
+ resolution=base_metadata.resolution,
103
+ codec=base_metadata.codec,
104
+ video_codec=base_metadata.video_codec,
105
+ audio=base_metadata.audio,
106
+ fps=base_metadata.fps,
107
+ bitrate=base_metadata.bitrate,
108
+ file_size=base_metadata.file_size,
109
+ timecode=base_metadata.timecode,
110
+ created_at=base_metadata.created_at,
111
+ device=device,
112
+ gps=base_metadata.gps,
113
+ color_space=base_metadata.color_space,
114
+ lens=base_metadata.lens,
115
+ )
116
+
117
+
118
+ # Register this extractor LAST (it's the fallback)
119
+ # This is done in __init__.py to ensure proper order